In [2]:
import pandas as pd
from datasets import Dataset
from sklearn.preprocessing import LabelEncoder
from sklearn.model_selection import train_test_split
from transformers import DistilBertTokenizer, DistilBertForSequenceClassification, Trainer, TrainingArguments
import torch
import os

# Ensure GPU is visible
os.environ["CUDA_VISIBLE_DEVICES"] = "0"

# Load your dataset from Excel
data = pd.read_excel(r'C:\Users\admin\Desktop\traini\newsdata.xlsx')

# Preprocess function
def preprocess_text(text):
    return text.lower().replace('\n', ' ').replace('\r', '')

# Apply preprocessing
data['text'] = data['text'].apply(preprocess_text)

# Remove classes with fewer than 2 samples
min_samples = 2
value_counts = data['target'].value_counts()
data = data[data['target'].isin(value_counts[value_counts >= min_samples].index)]

# Encode labels
label_encoder = LabelEncoder()
data['target'] = label_encoder.fit_transform(data['target'])
num_labels = len(label_encoder.classes_)

In [11]:

# Split the dataset
train_data, test_data = train_test_split(data, test_size=0.2, stratify=data['target'], random_state=42)

# Convert to Hugging Face Dataset
train_dataset = Dataset.from_pandas(train_data.reset_index(drop=True))
test_dataset = Dataset.from_pandas(test_data.reset_index(drop=True))

# Load tokenizer
tokenizer = DistilBertTokenizer.from_pretrained("distilbert-base-uncased")

# Tokenize the texts
def tokenize(batch):
    return tokenizer(batch['text'], padding=True, truncation=True)

train_dataset = train_dataset.map(tokenize, batched=True)
test_dataset = test_dataset.map(tokenize, batched=True)

# Rename the target column to labels
train_dataset = train_dataset.rename_column("target", "labels")
test_dataset = test_dataset.rename_column("target", "labels")

# Set format for PyTorch
train_dataset.set_format(type='torch', columns=['input_ids', 'attention_mask', 'labels'])
test_dataset.set_format(type='torch', columns=['input_ids', 'attention_mask', 'labels'])
print(train_data)
print(train_dataset)


Map: 100%|██████████| 138/138 [00:00<00:00, 4795.00 examples/s]
Map: 100%|██████████| 35/35 [00:00<00:00, 2697.60 examples/s]

                                                  text  target
92   with ukraine losing ground, allies debate how ...       6
83   hedge flow hedge funds buy chinese stocks for ...      17
136  japan's factory activity expands for first tim...       5
31   european commission official sees $100 bln in ...       8
96   colorado the first state to move forward with ...      16
..                                                 ...     ...
134  steady, widening service price rises may spur ...      16
156  meta’s new ai council is composed entirely of ...      13
93   weak yuan boosts the appeal of hong kong stock...       2
29   shell sees emerging asian markets taking more ...       6
19   explainer: what new caledonia riots mean for t...       6

[138 rows x 2 columns]
Dataset({
    features: ['text', 'labels', 'input_ids', 'attention_mask'],
    num_rows: 138
})





In [12]:

# Load the model
model = DistilBertForSequenceClassification.from_pretrained("distilbert-base-uncased", num_labels=num_labels)

# Define training arguments
training_args = TrainingArguments(
    output_dir='./bert',
    evaluation_strategy="epoch",
    learning_rate=3e-5,
    per_device_train_batch_size=8,
    per_device_eval_batch_size=8,
    num_train_epochs=50,
    weight_decay=0.01,
    no_cuda=not torch.cuda.is_available(),  # Use CUDA if available
    report_to=[]  
)

# Create a Trainer instance
trainer = Trainer(
    model=model,
    args=training_args,
    train_dataset=train_dataset,
    eval_dataset=test_dataset,
)

# Train the model
trainer.train()

# Evaluate the model
results = trainer.evaluate()
print(f"Evaluation results: {results}")

# Save the model
trainer.save_model('./bert')

# Optionally, save the tokenizer as well
tokenizer.save_pretrained('./bert')

Some weights of DistilBertForSequenceClassification were not initialized from the model checkpoint at distilbert-base-uncased and are newly initialized: ['classifier.bias', 'classifier.weight', 'pre_classifier.bias', 'pre_classifier.weight']
You should probably TRAIN this model on a down-stream task to be able to use it for predictions and inference.

[A
                                                

[A[A                                       
  6%|▌         | 31/540 [1:02:22<04:15,  1.99it/s]
[A
[A

{'eval_loss': 2.666346549987793, 'eval_runtime': 0.2214, 'eval_samples_per_second': 158.053, 'eval_steps_per_second': 22.579, 'epoch': 1.0}



[A
                                                  

[A[A                                       
  6%|▌         | 31/540 [1:02:30<04:15,  1.99it/s]
[A
[A

{'eval_loss': 2.4905195236206055, 'eval_runtime': 0.3057, 'eval_samples_per_second': 114.485, 'eval_steps_per_second': 16.355, 'epoch': 2.0}



[A
                                                  

[A[A                                       
  6%|▌         | 31/540 [1:02:39<04:15,  1.99it/s]
[A
[A

{'eval_loss': 2.373185873031616, 'eval_runtime': 0.3005, 'eval_samples_per_second': 116.488, 'eval_steps_per_second': 16.641, 'epoch': 3.0}



[A
                                                  

[A[A                                       
  6%|▌         | 31/540 [1:02:48<04:15,  1.99it/s]
[A
[A

{'eval_loss': 2.225414991378784, 'eval_runtime': 0.3027, 'eval_samples_per_second': 115.615, 'eval_steps_per_second': 16.516, 'epoch': 4.0}



[A
                                                  

[A[A                                       
  6%|▌         | 31/540 [1:02:57<04:15,  1.99it/s]
[A
[A

{'eval_loss': 2.1004581451416016, 'eval_runtime': 0.2978, 'eval_samples_per_second': 117.545, 'eval_steps_per_second': 16.792, 'epoch': 5.0}



[A
                                                  

[A[A                                       
  6%|▌         | 31/540 [1:03:06<04:15,  1.99it/s]
[A
[A

{'eval_loss': 1.947031021118164, 'eval_runtime': 0.3155, 'eval_samples_per_second': 110.925, 'eval_steps_per_second': 15.846, 'epoch': 6.0}



[A
                                                  

[A[A                                       
  6%|▌         | 31/540 [1:03:15<04:15,  1.99it/s]
[A
[A

{'eval_loss': 1.8649410009384155, 'eval_runtime': 0.2988, 'eval_samples_per_second': 117.153, 'eval_steps_per_second': 16.736, 'epoch': 7.0}



[A
                                                  

[A[A                                       
  6%|▌         | 31/540 [1:03:24<04:15,  1.99it/s]
[A
[A

{'eval_loss': 1.8271692991256714, 'eval_runtime': 0.2971, 'eval_samples_per_second': 117.801, 'eval_steps_per_second': 16.829, 'epoch': 8.0}



[A
                                                  

[A[A                                       
  6%|▌         | 31/540 [1:03:33<04:15,  1.99it/s]
[A
[A

{'eval_loss': 1.7408959865570068, 'eval_runtime': 0.3045, 'eval_samples_per_second': 114.934, 'eval_steps_per_second': 16.419, 'epoch': 9.0}



[A
                                                  

[A[A                                       
  6%|▌         | 31/540 [1:03:43<04:15,  1.99it/s]
[A
[A

{'eval_loss': 1.7231405973434448, 'eval_runtime': 0.3078, 'eval_samples_per_second': 113.709, 'eval_steps_per_second': 16.244, 'epoch': 10.0}



[A
                                                  

[A[A                                       
  6%|▌         | 31/540 [1:03:52<04:15,  1.99it/s]
[A
[A

{'eval_loss': 1.6610407829284668, 'eval_runtime': 0.3003, 'eval_samples_per_second': 116.536, 'eval_steps_per_second': 16.648, 'epoch': 11.0}



[A
                                                  

[A[A                                       
  6%|▌         | 31/540 [1:04:01<04:15,  1.99it/s]
[A
[A

{'eval_loss': 1.7291139364242554, 'eval_runtime': 0.2969, 'eval_samples_per_second': 117.868, 'eval_steps_per_second': 16.838, 'epoch': 12.0}



[A
                                                  

[A[A                                       
  6%|▌         | 31/540 [1:04:10<04:15,  1.99it/s]
[A
[A

{'eval_loss': 1.681612491607666, 'eval_runtime': 0.2969, 'eval_samples_per_second': 117.903, 'eval_steps_per_second': 16.843, 'epoch': 13.0}



[A
                                                  

[A[A                                       
  6%|▌         | 31/540 [1:04:19<04:15,  1.99it/s]
[A
[A

{'eval_loss': 1.6539177894592285, 'eval_runtime': 0.2971, 'eval_samples_per_second': 117.823, 'eval_steps_per_second': 16.832, 'epoch': 14.0}



[A
                                                  

[A[A                                       
  6%|▌         | 31/540 [1:04:28<04:15,  1.99it/s]
[A
[A

{'eval_loss': 1.7293065786361694, 'eval_runtime': 0.3022, 'eval_samples_per_second': 115.826, 'eval_steps_per_second': 16.547, 'epoch': 15.0}



[A
                                                  

[A[A                                       
  6%|▌         | 31/540 [1:04:37<04:15,  1.99it/s]
[A
[A

{'eval_loss': 1.6991902589797974, 'eval_runtime': 0.289, 'eval_samples_per_second': 121.089, 'eval_steps_per_second': 17.298, 'epoch': 16.0}



[A
                                                  

[A[A                                       
  6%|▌         | 31/540 [1:04:46<04:15,  1.99it/s]
[A
[A

{'eval_loss': 1.7011017799377441, 'eval_runtime': 0.3049, 'eval_samples_per_second': 114.8, 'eval_steps_per_second': 16.4, 'epoch': 17.0}



[A
                                                  

[A[A                                       
  6%|▌         | 31/540 [1:04:56<04:15,  1.99it/s]
[A
[A

{'eval_loss': 1.717252254486084, 'eval_runtime': 0.3054, 'eval_samples_per_second': 114.604, 'eval_steps_per_second': 16.372, 'epoch': 18.0}



[A
                                                  

[A[A                                       
  6%|▌         | 31/540 [1:05:05<04:15,  1.99it/s]
[A
[A

{'eval_loss': 1.7530707120895386, 'eval_runtime': 0.3072, 'eval_samples_per_second': 113.928, 'eval_steps_per_second': 16.275, 'epoch': 19.0}



[A
                                                  

[A[A                                       
  6%|▌         | 31/540 [1:05:15<04:15,  1.99it/s]
[A
[A

{'eval_loss': 1.759432315826416, 'eval_runtime': 0.3027, 'eval_samples_per_second': 115.615, 'eval_steps_per_second': 16.516, 'epoch': 20.0}



[A
[A
                                                  

[A[A                                       
  6%|▌         | 31/540 [1:05:25<04:15,  1.99it/s]
[A
[A

{'eval_loss': 1.744513988494873, 'eval_runtime': 0.4017, 'eval_samples_per_second': 87.12, 'eval_steps_per_second': 12.446, 'epoch': 21.0}



[A
[A
                                                  

[A[A                                       
  6%|▌         | 31/540 [1:05:38<04:15,  1.99it/s]
[A
[A

{'eval_loss': 1.7515541315078735, 'eval_runtime': 0.562, 'eval_samples_per_second': 62.283, 'eval_steps_per_second': 8.898, 'epoch': 22.0}



[A
[A
                                                  

[A[A                                       
  6%|▌         | 31/540 [1:05:54<04:15,  1.99it/s]
[A
[A

{'eval_loss': 1.7490975856781006, 'eval_runtime': 0.5786, 'eval_samples_per_second': 60.49, 'eval_steps_per_second': 8.641, 'epoch': 23.0}



[A
[A
                                                  

[A[A                                       
  6%|▌         | 31/540 [1:06:07<04:15,  1.99it/s]
[A
[A

{'eval_loss': 1.7966868877410889, 'eval_runtime': 0.4266, 'eval_samples_per_second': 82.036, 'eval_steps_per_second': 11.719, 'epoch': 24.0}




KeyboardInterrupt: 

In [8]:
import torch
from transformers import AutoTokenizer, AutoModelForSequenceClassification
import numpy as np
# Load the saved model and tokenizer
model_path = './bert'
tokenizer = AutoTokenizer.from_pretrained(model_path)
model = AutoModelForSequenceClassification.from_pretrained(model_path)

# Function to classify new titles
def classify_titles(titles):
    results = []
    for title in titles:
        # Tokenize the title
        inputs = tokenizer(title, return_tensors="pt", truncation=True, padding=True)
        
        # Get model predictions
        with torch.no_grad():
            outputs = model(**inputs)
        
        # Get the predicted class and confidence score
        logits = outputs.logits
        probabilities = torch.softmax(logits, dim=1).numpy()[0]
        predicted_class_id = np.argmax(probabilities)
        confidence_score = probabilities[predicted_class_id]
        
        # Get the corresponding category
        predicted_category = label_encoder.inverse_transform([predicted_class_id])[0]
        
        # Debugging: print logits, predicted class ID, and confidence score
        print(f"Title: '{title}'")
        print(f"Predicted class ID: {predicted_class_id}")
        print(f"Confidence score: {confidence_score:.4f}")
        
        results.append((title, predicted_category, confidence_score))
    
    return results


# Example titles to classify
titles_to_classify = ["Turkish Rates Likely Already at Peak Even If Inflation Isn’t Yet"]
classified_titles = classify_titles(titles_to_classify)

# Print the results
for title, category, confidence in classified_titles:
    print(f"Title: '{title}' is classified as '{category}'" )


Title: 'Turkish Rates Likely Already at Peak Even If Inflation Isn’t Yet'
Predicted class ID: 5
Confidence score: 0.5957
Title: 'Turkish Rates Likely Already at Peak Even If Inflation Isn’t Yet' is classified as 'Industry & Economic Updates'


In [10]:
import pandas as pd
from datasets import Dataset
from sklearn.preprocessing import LabelEncoder
from sklearn.model_selection import train_test_split, StratifiedKFold
from transformers import DistilBertTokenizer, DistilBertForSequenceClassification, Trainer, TrainingArguments, AutoTokenizer, AutoModelForSequenceClassification
import torch
import numpy as np
import os

# Ensure GPU is visible
os.environ["CUDA_VISIBLE_DEVICES"] = "0"

# Load your dataset from Excel
data = pd.read_excel(r'C:\Users\admin\Desktop\traini\newsdata.xlsx')

# Preprocess function
def preprocess_text(text):
    return text.lower().replace('\n', ' ').replace('\r', '')

# Apply preprocessing
data['text'] = data['text'].apply(preprocess_text)

# Remove classes with fewer than 2 samples
min_samples = 2
value_counts = data['target'].value_counts()
data = data[data['target'].isin(value_counts[value_counts >= min_samples].index)]

# Encode labels
label_encoder = LabelEncoder()
data['target'] = label_encoder.fit_transform(data['target'])
num_labels = len(label_encoder.classes_)

print(f"Number of classes after filtering: {num_labels}")
print(data['target'].value_counts())

# Split the dataset
train_data, test_data = train_test_split(data, test_size=0.2, stratify=data['target'], random_state=42)

# Tokenizer
model_name = "distilbert-base-uncased"
tokenizer = DistilBertTokenizer.from_pretrained(model_name)

# Tokenize function
def tokenize(batch):
    return tokenizer(batch['text'], padding=True, truncation=True)

# Convert to Hugging Face Dataset
test_dataset = Dataset.from_pandas(test_data.reset_index(drop=True))

# Tokenize the test dataset
test_dataset = test_dataset.map(tokenize, batched=True)

# Rename the target column to labels
test_dataset = test_dataset.rename_column("target", "labels")

# Set format for PyTorch
test_dataset.set_format(type='torch', columns=['input_ids', 'attention_mask', 'labels'])

# Load the saved model and tokenizer
model_path = './results_fold_0'  # Replace with the correct path to your saved model
tokenizer = AutoTokenizer.from_pretrained(r"C:\Users\admin\Desktop\traini\bert")
model = AutoModelForSequenceClassification.from_pretrained(r"C:\Users\admin\Desktop\traini\bert")

# Function to classify new titles and calculate accuracy
def classify_and_evaluate(test_dataset):
    all_predictions = []
    all_labels = []

    for i in range(len(test_dataset)):
        inputs = {
            'input_ids': test_dataset[i]['input_ids'].unsqueeze(0),
            'attention_mask': test_dataset[i]['attention_mask'].unsqueeze(0)
        }
        labels = test_dataset[i]['labels'].unsqueeze(0)

        with torch.no_grad():
            outputs = model(**inputs)
        
        logits = outputs.logits
        probabilities = torch.softmax(logits, dim=1).numpy()[0]
        predicted_class_id = np.argmax(probabilities)
        confidence_score = probabilities[predicted_class_id]

        # Append predictions and true labels
        all_predictions.append(predicted_class_id)
        all_labels.append(labels.numpy()[0])

        # Debugging: print logits, predicted class ID, and confidence score
        print(f"Sample {i}")
      
        print(f"Predicted class ID: {predicted_class_id}")
        print(f"Confidence score: {confidence_score:.4f}")
        print(f"True label: {labels.numpy()[0]}")
        print()

    # Calculate accuracy
    accuracy = np.mean(np.array(all_predictions) == np.array(all_labels))
    return accuracy

# Evaluate the model on the test dataset
accuracy = classify_and_evaluate(test_dataset)
print(f"Accuracy on the test dataset: {accuracy:.4f}")


Number of classes after filtering: 19
target
16    34
17    32
10    18
18    13
5     12
6     11
8      9
1      8
2      7
7      6
3      3
0      3
14     3
13     3
11     3
12     2
9      2
4      2
15     2
Name: count, dtype: int64


Map: 100%|██████████| 35/35 [00:00<00:00, 1896.94 examples/s]


Sample 0
Predicted class ID: 5
Confidence score: 0.5957
True label: 16

Sample 1
Predicted class ID: 17
Confidence score: 0.9867
True label: 17

Sample 2
Predicted class ID: 17
Confidence score: 0.9438
True label: 18

Sample 3
Predicted class ID: 10
Confidence score: 0.9556
True label: 10

Sample 4
Predicted class ID: 18
Confidence score: 0.9241
True label: 3

Sample 5
Predicted class ID: 18
Confidence score: 0.7310
True label: 18

Sample 6
Predicted class ID: 17
Confidence score: 0.9838
True label: 17

Sample 7
Predicted class ID: 5
Confidence score: 0.3041
True label: 7

Sample 8
Predicted class ID: 10
Confidence score: 0.5122
True label: 11

Sample 9
Predicted class ID: 5
Confidence score: 0.6100
True label: 5

Sample 10
Predicted class ID: 10
Confidence score: 0.6871
True label: 14

Sample 11
Predicted class ID: 10
Confidence score: 0.6826
True label: 8

Sample 12
Predicted class ID: 10
Confidence score: 0.8716
True label: 8

Sample 13
Predicted class ID: 10
Confidence score: 0.479

In [1]:
import pandas as pd
from datasets import Dataset
from sklearn.preprocessing import LabelEncoder
from sklearn.model_selection import StratifiedKFold
from transformers import DistilBertTokenizer, DistilBertForSequenceClassification, Trainer, TrainingArguments, AutoTokenizer, AutoModelForSequenceClassification
from transformers import EarlyStoppingCallback
from sklearn.metrics import accuracy_score, precision_recall_fscore_support
import torch
import numpy as np
import os

# Ensure GPU is visible
os.environ["CUDA_VISIBLE_DEVICES"] = "0"

# Load your dataset from Excel
data = pd.read_excel(r'C:\Users\admin\Desktop\traini\newsdata.xlsx')

# Preprocess function
def preprocess_text(text):
    return text.lower().replace('\n', ' ').replace('\r', '')

# Apply preprocessing
data['text'] = data['text'].apply(preprocess_text)

# Remove classes with fewer than 2 samples
min_samples = 2
value_counts = data['target'].value_counts()
data = data[data['target'].isin(value_counts[value_counts >= min_samples].index)]

# Encode labels
label_encoder = LabelEncoder()
data['target'] = label_encoder.fit_transform(data['target'])
num_labels = len(label_encoder.classes_)

print(f"Number of classes after filtering: {num_labels}")
print(data['target'].value_counts())

# Tokenizer
model_name = "distilbert-base-uncased"
tokenizer = DistilBertTokenizer.from_pretrained(model_name)

# Tokenize function
def tokenize(batch):
    return tokenizer(batch['text'], padding=True, truncation=True)

# Initialize cross-validation
kf = StratifiedKFold(n_splits=5)

# Cross-validation training and evaluation
accuracies = []
precisions = []
recalls = []
f1_scores = []

for fold, (train_index, val_index) in enumerate(kf.split(data, data['target'])):
    print(f"Training fold {fold + 1}")
    
    train_data = data.iloc[train_index].reset_index(drop=True)
    val_data = data.iloc[val_index].reset_index(drop=True)

    # Convert to Hugging Face Dataset
    train_dataset = Dataset.from_pandas(train_data)
    val_dataset = Dataset.from_pandas(val_data)
    
    # Tokenize the datasets
    train_dataset = train_dataset.map(tokenize, batched=True)
    val_dataset = val_dataset.map(tokenize, batched=True)

    # Rename the target column to labels
    train_dataset = train_dataset.rename_column("target", "labels")
    val_dataset = val_dataset.rename_column("target", "labels")

    # Set format for PyTorch
    train_dataset.set_format(type='torch', columns=['input_ids', 'attention_mask', 'labels'])
    val_dataset.set_format(type='torch', columns=['input_ids', 'attention_mask', 'labels'])

    # Load the model
    model = DistilBertForSequenceClassification.from_pretrained(model_name, num_labels=num_labels)

    # Define training arguments
    training_args = TrainingArguments(
        output_dir=f'./results_fold_{fold}',
        evaluation_strategy="epoch",
        save_strategy="epoch",
        learning_rate=3e-5,
        per_device_train_batch_size=8,
        per_device_eval_batch_size=8,
        num_train_epochs=10,
        weight_decay=0.01,
        use_cpu=not torch.cuda.is_available(),
        report_to=[],
        save_total_limit=2,
        load_best_model_at_end=True,
    )

    # Create a Trainer instance
    trainer = Trainer(
        model=model,
        args=training_args,
        train_dataset=train_dataset,
        eval_dataset=val_dataset,
        callbacks=[EarlyStoppingCallback(early_stopping_patience=3)]
    )

    # Train the model
    trainer.train()

    # Evaluate the model
    results = trainer.evaluate()
    print(f"Evaluation results for fold {fold + 1}: {results}")

    # Save the model and tokenizer
    model.save_pretrained(f'./results_fold_{fold}')
    tokenizer.save_pretrained(f'./results_fold_{fold}')
    
    # Calculate predictions and true labels
    predictions = np.argmax(trainer.predict(val_dataset).predictions, axis=1)
    true_labels = val_dataset['labels'].numpy()
    
    # Calculate metrics
    accuracy = accuracy_score(true_labels, predictions)
    precision, recall, f1, _ = precision_recall_fscore_support(true_labels, predictions, average='weighted')
    
    # Append metrics for this fold
    accuracies.append(accuracy)
    precisions.append(precision)
    recalls.append(recall)
    f1_scores.append(f1)

# Calculate average metrics across all folds
average_accuracy = np.mean(accuracies)
average_precision = np.mean(precisions)
average_recall = np.mean(recalls)
average_f1_score = np.mean(f1_scores)

print(f"Average accuracy: {average_accuracy:.4f}")
print(f"Average precision: {average_precision:.4f}")
print(f"Average recall: {average_recall:.4f}")
print(f"Average F1-score: {average_f1_score:.4f}")

# Determine the best model based on average F1-score (or another metric)
best_fold_index = np.argmax(f1_scores)
print(f"The best model is from fold {best_fold_index + 1}")

# Load the best model for final evaluation on the test set
model_path = f'./results_fold_{best_fold_index}'
tokenizer = AutoTokenizer.from_pretrained(model_path)
model = AutoModelForSequenceClassification.from_pretrained(model_path)

# Function to classify new titles and calculate accuracy on the test set
def classify_and_evaluate(test_dataset):
    all_predictions = []
    all_labels = []

    for i in range(len(test_dataset)):
        inputs = {
            'input_ids': test_dataset[i]['input_ids'].unsqueeze(0),
            'attention_mask': test_dataset[i]['attention_mask'].unsqueeze(0)
        }
        labels = test_dataset[i]['labels'].unsqueeze(0)

        with torch.no_grad():
            outputs = model(**inputs)
        
        logits = outputs.logits
        probabilities = torch.softmax(logits, dim=1).numpy()[0]
        predicted_class_id = np.argmax(probabilities)
        confidence_score = probabilities[predicted_class_id]

        # Append predictions and true labels
        all_predictions.append(predicted_class_id)
        all_labels.append(labels.numpy()[0])

        # Debugging: print logits, predicted class ID, and confidence score
        print(f"Sample {i}")
        print(f"Logits: {logits}")
        print(f"Predicted class ID: {predicted_class_id}")
        print(f"Confidence score: {confidence_score:.4f}")
        print(f"True label: {labels.numpy()[0]}")
        print()

    # Calculate accuracy
    accuracy = np.mean(np.array(all_predictions) == np.array(all_labels))
    return accuracy

# Convert test data to Hugging Face Dataset and tokenize
test_data = data.iloc[val_index].reset_index(drop=True)
test_dataset = Dataset.from_pandas(test_data)
test_dataset = test_dataset.map(tokenize, batched=True)
test_dataset = test_dataset.rename_column("target", "labels")
test_dataset.set_format(type='torch', columns=['input_ids', 'attention_mask', 'labels'])

# Evaluate the model on the test dataset
accuracy = classify_and_evaluate(test_dataset)
print(f"Accuracy on the test dataset: {accuracy:.4f}")




  from .autonotebook import tqdm as notebook_tqdm


Number of classes after filtering: 19
target
16    34
17    32
10    18
18    13
5     12
6     11
8      9
1      8
2      7
7      6
3      3
0      3
14     3
13     3
11     3
12     2
9      2
4      2
15     2
Name: count, dtype: int64




Training fold 1


Map: 100%|██████████| 138/138 [00:00<00:00, 5417.37 examples/s]
Map: 100%|██████████| 35/35 [00:00<00:00, 2706.55 examples/s]
Some weights of DistilBertForSequenceClassification were not initialized from the model checkpoint at distilbert-base-uncased and are newly initialized: ['classifier.bias', 'classifier.weight', 'pre_classifier.bias', 'pre_classifier.weight']
You should probably TRAIN this model on a down-stream task to be able to use it for predictions and inference.
                                                
 10%|█         | 18/180 [00:08<01:17,  2.09it/s]

{'eval_loss': 2.6629793643951416, 'eval_runtime': 0.3383, 'eval_samples_per_second': 103.473, 'eval_steps_per_second': 14.782, 'epoch': 1.0}


                                                
 20%|██        | 36/180 [00:18<01:06,  2.18it/s]

{'eval_loss': 2.5388598442077637, 'eval_runtime': 0.3265, 'eval_samples_per_second': 107.213, 'eval_steps_per_second': 15.316, 'epoch': 2.0}


                                                
 30%|███       | 54/180 [00:34<00:57,  2.18it/s]

{'eval_loss': 2.432974338531494, 'eval_runtime': 0.3272, 'eval_samples_per_second': 106.965, 'eval_steps_per_second': 15.281, 'epoch': 3.0}


                                                
 40%|████      | 72/180 [00:44<00:49,  2.18it/s]

{'eval_loss': 2.32399845123291, 'eval_runtime': 0.3415, 'eval_samples_per_second': 102.477, 'eval_steps_per_second': 14.64, 'epoch': 4.0}


                                                
 50%|█████     | 90/180 [00:54<00:40,  2.20it/s]

{'eval_loss': 2.224867820739746, 'eval_runtime': 0.3277, 'eval_samples_per_second': 106.819, 'eval_steps_per_second': 15.26, 'epoch': 5.0}


                                                 
 60%|██████    | 108/180 [01:05<00:32,  2.19it/s]

{'eval_loss': 2.141688585281372, 'eval_runtime': 0.3332, 'eval_samples_per_second': 105.058, 'eval_steps_per_second': 15.008, 'epoch': 6.0}


                                                 
 70%|███████   | 126/180 [01:16<00:24,  2.20it/s]

{'eval_loss': 2.0713295936584473, 'eval_runtime': 0.33, 'eval_samples_per_second': 106.045, 'eval_steps_per_second': 15.149, 'epoch': 7.0}


                                                 
 80%|████████  | 144/180 [01:30<00:16,  2.12it/s]

{'eval_loss': 2.0306930541992188, 'eval_runtime': 0.3308, 'eval_samples_per_second': 105.817, 'eval_steps_per_second': 15.117, 'epoch': 8.0}


                                                 
 90%|█████████ | 162/180 [01:41<00:08,  2.20it/s]

{'eval_loss': 2.0003602504730225, 'eval_runtime': 0.3282, 'eval_samples_per_second': 106.653, 'eval_steps_per_second': 15.236, 'epoch': 9.0}


                                                 
100%|██████████| 180/180 [01:51<00:00,  2.21it/s]

{'eval_loss': 1.9905593395233154, 'eval_runtime': 0.3297, 'eval_samples_per_second': 106.172, 'eval_steps_per_second': 15.167, 'epoch': 10.0}


100%|██████████| 180/180 [01:54<00:00,  1.58it/s]


{'train_runtime': 114.1219, 'train_samples_per_second': 12.092, 'train_steps_per_second': 1.577, 'train_loss': 1.9973490397135416, 'epoch': 10.0}


100%|██████████| 5/5 [00:00<00:00, 29.23it/s]


Evaluation results for fold 1: {'eval_loss': 1.9905593395233154, 'eval_runtime': 0.2265, 'eval_samples_per_second': 154.54, 'eval_steps_per_second': 22.077, 'epoch': 10.0}


100%|██████████| 5/5 [00:00<00:00, 28.62it/s]
  _warn_prf(average, modifier, f"{metric.capitalize()} is", len(result))


Training fold 2


Map: 100%|██████████| 138/138 [00:00<00:00, 5422.86 examples/s]
Map: 100%|██████████| 35/35 [00:00<00:00, 2371.58 examples/s]
Some weights of DistilBertForSequenceClassification were not initialized from the model checkpoint at distilbert-base-uncased and are newly initialized: ['classifier.bias', 'classifier.weight', 'pre_classifier.bias', 'pre_classifier.weight']
You should probably TRAIN this model on a down-stream task to be able to use it for predictions and inference.
 10%|█         | 18/180 [00:08<01:13,  2.21it/s]
 10%|█         | 18/180 [00:08<01:13,  2.21it/s]

{'eval_loss': 2.737196683883667, 'eval_runtime': 0.2883, 'eval_samples_per_second': 121.422, 'eval_steps_per_second': 17.346, 'epoch': 1.0}


 20%|██        | 36/180 [00:18<01:06,  2.18it/s]
 20%|██        | 36/180 [00:19<01:06,  2.18it/s]

{'eval_loss': 2.5771827697753906, 'eval_runtime': 0.2943, 'eval_samples_per_second': 118.945, 'eval_steps_per_second': 16.992, 'epoch': 2.0}


 30%|███       | 54/180 [00:32<00:57,  2.20it/s]
 30%|███       | 54/180 [00:32<00:57,  2.20it/s]

{'eval_loss': 2.5126283168792725, 'eval_runtime': 0.2848, 'eval_samples_per_second': 122.904, 'eval_steps_per_second': 17.558, 'epoch': 3.0}


 40%|████      | 72/180 [00:42<00:48,  2.22it/s]
 40%|████      | 72/180 [00:42<00:48,  2.22it/s]

{'eval_loss': 2.4291553497314453, 'eval_runtime': 0.2868, 'eval_samples_per_second': 122.025, 'eval_steps_per_second': 17.432, 'epoch': 4.0}


 50%|█████     | 90/180 [00:53<00:40,  2.20it/s]
 50%|█████     | 90/180 [00:53<00:40,  2.20it/s]

{'eval_loss': 2.3571975231170654, 'eval_runtime': 0.2958, 'eval_samples_per_second': 118.319, 'eval_steps_per_second': 16.903, 'epoch': 5.0}


 60%|██████    | 108/180 [01:03<00:33,  2.18it/s]
 60%|██████    | 108/180 [01:04<00:33,  2.18it/s]

{'eval_loss': 2.2936275005340576, 'eval_runtime': 0.2957, 'eval_samples_per_second': 118.347, 'eval_steps_per_second': 16.907, 'epoch': 6.0}


 70%|███████   | 126/180 [01:13<00:24,  2.20it/s]
 70%|███████   | 126/180 [01:14<00:24,  2.20it/s]

{'eval_loss': 2.2478575706481934, 'eval_runtime': 0.2808, 'eval_samples_per_second': 124.644, 'eval_steps_per_second': 17.806, 'epoch': 7.0}


 80%|████████  | 144/180 [01:24<00:16,  2.19it/s]
 80%|████████  | 144/180 [01:24<00:16,  2.19it/s]

{'eval_loss': 2.209359645843506, 'eval_runtime': 0.2898, 'eval_samples_per_second': 120.789, 'eval_steps_per_second': 17.256, 'epoch': 8.0}


 90%|█████████ | 162/180 [01:38<00:08,  2.18it/s]
 90%|█████████ | 162/180 [01:38<00:08,  2.18it/s]

{'eval_loss': 2.192894697189331, 'eval_runtime': 0.2878, 'eval_samples_per_second': 121.632, 'eval_steps_per_second': 17.376, 'epoch': 9.0}


100%|██████████| 180/180 [01:49<00:00,  2.16it/s]
100%|██████████| 180/180 [01:49<00:00,  2.16it/s]

{'eval_loss': 2.187197208404541, 'eval_runtime': 0.289, 'eval_samples_per_second': 121.112, 'eval_steps_per_second': 17.302, 'epoch': 10.0}


100%|██████████| 180/180 [01:54<00:00,  1.57it/s]


{'train_runtime': 114.5036, 'train_samples_per_second': 12.052, 'train_steps_per_second': 1.572, 'train_loss': 2.051239522298177, 'epoch': 10.0}


100%|██████████| 5/5 [00:00<00:00, 33.30it/s]


Evaluation results for fold 2: {'eval_loss': 2.187197208404541, 'eval_runtime': 0.1947, 'eval_samples_per_second': 179.74, 'eval_steps_per_second': 25.677, 'epoch': 10.0}


100%|██████████| 5/5 [00:00<00:00, 33.46it/s]
  _warn_prf(average, modifier, f"{metric.capitalize()} is", len(result))


Training fold 3


Map: 100%|██████████| 138/138 [00:00<00:00, 5981.21 examples/s]
Map: 100%|██████████| 35/35 [00:00<00:00, 4383.29 examples/s]
Some weights of DistilBertForSequenceClassification were not initialized from the model checkpoint at distilbert-base-uncased and are newly initialized: ['classifier.bias', 'classifier.weight', 'pre_classifier.bias', 'pre_classifier.weight']
You should probably TRAIN this model on a down-stream task to be able to use it for predictions and inference.
 10%|█         | 18/180 [00:07<01:12,  2.23it/s]
 10%|█         | 18/180 [00:08<01:12,  2.23it/s]

{'eval_loss': 2.7104122638702393, 'eval_runtime': 0.3735, 'eval_samples_per_second': 93.715, 'eval_steps_per_second': 13.388, 'epoch': 1.0}


 20%|██        | 36/180 [00:18<01:05,  2.19it/s]
 20%|██        | 36/180 [00:18<01:05,  2.19it/s]

{'eval_loss': 2.5723876953125, 'eval_runtime': 0.3467, 'eval_samples_per_second': 100.951, 'eval_steps_per_second': 14.422, 'epoch': 2.0}


 30%|███       | 54/180 [00:31<00:57,  2.19it/s]
 30%|███       | 54/180 [00:31<00:57,  2.19it/s]

{'eval_loss': 2.499913215637207, 'eval_runtime': 0.345, 'eval_samples_per_second': 101.446, 'eval_steps_per_second': 14.492, 'epoch': 3.0}


 40%|████      | 72/180 [00:41<00:48,  2.21it/s]
 40%|████      | 72/180 [00:41<00:48,  2.21it/s]

{'eval_loss': 2.4162464141845703, 'eval_runtime': 0.3476, 'eval_samples_per_second': 100.679, 'eval_steps_per_second': 14.383, 'epoch': 4.0}


 50%|█████     | 90/180 [00:52<00:40,  2.20it/s]
 50%|█████     | 90/180 [00:52<00:40,  2.20it/s]

{'eval_loss': 2.3385276794433594, 'eval_runtime': 0.3338, 'eval_samples_per_second': 104.867, 'eval_steps_per_second': 14.981, 'epoch': 5.0}


 60%|██████    | 108/180 [01:09<00:32,  2.20it/s]
 60%|██████    | 108/180 [01:09<00:32,  2.20it/s]

{'eval_loss': 2.270643949508667, 'eval_runtime': 0.3427, 'eval_samples_per_second': 102.128, 'eval_steps_per_second': 14.59, 'epoch': 6.0}


 70%|███████   | 126/180 [01:19<00:24,  2.22it/s]
 70%|███████   | 126/180 [01:19<00:24,  2.22it/s]

{'eval_loss': 2.2232697010040283, 'eval_runtime': 0.3346, 'eval_samples_per_second': 104.591, 'eval_steps_per_second': 14.942, 'epoch': 7.0}


 80%|████████  | 144/180 [01:33<00:16,  2.23it/s]
 80%|████████  | 144/180 [01:33<00:16,  2.23it/s]

{'eval_loss': 2.1843016147613525, 'eval_runtime': 0.3289, 'eval_samples_per_second': 106.403, 'eval_steps_per_second': 15.2, 'epoch': 8.0}


 90%|█████████ | 162/180 [01:43<00:08,  2.21it/s]
 90%|█████████ | 162/180 [01:43<00:08,  2.21it/s]

{'eval_loss': 2.1633288860321045, 'eval_runtime': 0.3446, 'eval_samples_per_second': 101.557, 'eval_steps_per_second': 14.508, 'epoch': 9.0}


100%|██████████| 180/180 [01:56<00:00,  2.21it/s]
100%|██████████| 180/180 [01:57<00:00,  2.21it/s]

{'eval_loss': 2.1595945358276367, 'eval_runtime': 0.3394, 'eval_samples_per_second': 103.134, 'eval_steps_per_second': 14.733, 'epoch': 10.0}


100%|██████████| 180/180 [01:58<00:00,  1.52it/s]


{'train_runtime': 118.4227, 'train_samples_per_second': 11.653, 'train_steps_per_second': 1.52, 'train_loss': 2.062643771701389, 'epoch': 10.0}


100%|██████████| 5/5 [00:00<00:00, 28.20it/s]


Evaluation results for fold 3: {'eval_loss': 2.1595945358276367, 'eval_runtime': 0.2297, 'eval_samples_per_second': 152.401, 'eval_steps_per_second': 21.772, 'epoch': 10.0}


100%|██████████| 5/5 [00:00<00:00, 27.93it/s]
  _warn_prf(average, modifier, f"{metric.capitalize()} is", len(result))


Training fold 4


Map: 100%|██████████| 139/139 [00:00<00:00, 5312.87 examples/s]
Map: 100%|██████████| 34/34 [00:00<00:00, 2447.72 examples/s]
Some weights of DistilBertForSequenceClassification were not initialized from the model checkpoint at distilbert-base-uncased and are newly initialized: ['classifier.bias', 'classifier.weight', 'pre_classifier.bias', 'pre_classifier.weight']
You should probably TRAIN this model on a down-stream task to be able to use it for predictions and inference.
 10%|█         | 18/180 [00:08<01:14,  2.18it/s]
 10%|█         | 18/180 [00:08<01:14,  2.18it/s]

{'eval_loss': 2.7274956703186035, 'eval_runtime': 0.2868, 'eval_samples_per_second': 118.551, 'eval_steps_per_second': 17.434, 'epoch': 1.0}


 20%|██        | 36/180 [00:18<01:06,  2.18it/s]
 20%|██        | 36/180 [00:18<01:06,  2.18it/s]

{'eval_loss': 2.559138774871826, 'eval_runtime': 0.2939, 'eval_samples_per_second': 115.676, 'eval_steps_per_second': 17.011, 'epoch': 2.0}


 30%|███       | 54/180 [00:29<00:58,  2.16it/s]
 30%|███       | 54/180 [00:29<00:58,  2.16it/s]

{'eval_loss': 2.4832592010498047, 'eval_runtime': 0.289, 'eval_samples_per_second': 117.645, 'eval_steps_per_second': 17.301, 'epoch': 3.0}


 40%|████      | 72/180 [00:43<00:50,  2.14it/s]
 40%|████      | 72/180 [00:43<00:50,  2.14it/s]

{'eval_loss': 2.446662664413452, 'eval_runtime': 0.2898, 'eval_samples_per_second': 117.333, 'eval_steps_per_second': 17.255, 'epoch': 4.0}


 50%|█████     | 90/180 [00:53<00:41,  2.17it/s]
 50%|█████     | 90/180 [00:53<00:41,  2.17it/s]

{'eval_loss': 2.3675167560577393, 'eval_runtime': 0.2863, 'eval_samples_per_second': 118.77, 'eval_steps_per_second': 17.466, 'epoch': 5.0}


 60%|██████    | 108/180 [01:09<00:33,  2.15it/s]
 60%|██████    | 108/180 [01:10<00:33,  2.15it/s]

{'eval_loss': 2.3071846961975098, 'eval_runtime': 0.2931, 'eval_samples_per_second': 116.003, 'eval_steps_per_second': 17.059, 'epoch': 6.0}


 70%|███████   | 126/180 [01:20<00:24,  2.16it/s]
 70%|███████   | 126/180 [01:20<00:24,  2.16it/s]

{'eval_loss': 2.3071341514587402, 'eval_runtime': 0.2922, 'eval_samples_per_second': 116.375, 'eval_steps_per_second': 17.114, 'epoch': 7.0}


 80%|████████  | 144/180 [01:37<00:16,  2.12it/s]
 80%|████████  | 144/180 [01:37<00:16,  2.12it/s]

{'eval_loss': 2.2528059482574463, 'eval_runtime': 0.2948, 'eval_samples_per_second': 115.328, 'eval_steps_per_second': 16.96, 'epoch': 8.0}


 90%|█████████ | 162/180 [01:47<00:08,  2.12it/s]
 90%|█████████ | 162/180 [01:47<00:08,  2.12it/s]

{'eval_loss': 2.253525495529175, 'eval_runtime': 0.2937, 'eval_samples_per_second': 115.748, 'eval_steps_per_second': 17.022, 'epoch': 9.0}


100%|██████████| 180/180 [02:05<00:00,  2.12it/s]
100%|██████████| 180/180 [02:05<00:00,  2.12it/s]

{'eval_loss': 2.248444080352783, 'eval_runtime': 0.2928, 'eval_samples_per_second': 116.102, 'eval_steps_per_second': 17.074, 'epoch': 10.0}


100%|██████████| 180/180 [02:11<00:00,  1.37it/s]


{'train_runtime': 131.6639, 'train_samples_per_second': 10.557, 'train_steps_per_second': 1.367, 'train_loss': 2.0204199896918404, 'epoch': 10.0}


100%|██████████| 5/5 [00:00<00:00, 31.72it/s]


Evaluation results for fold 4: {'eval_loss': 2.248444080352783, 'eval_runtime': 0.2046, 'eval_samples_per_second': 166.211, 'eval_steps_per_second': 24.443, 'epoch': 10.0}


100%|██████████| 5/5 [00:00<00:00, 32.86it/s]
  _warn_prf(average, modifier, f"{metric.capitalize()} is", len(result))


Training fold 5


Map: 100%|██████████| 139/139 [00:00<00:00, 5802.17 examples/s]
Map: 100%|██████████| 34/34 [00:00<00:00, 3071.23 examples/s]
Some weights of DistilBertForSequenceClassification were not initialized from the model checkpoint at distilbert-base-uncased and are newly initialized: ['classifier.bias', 'classifier.weight', 'pre_classifier.bias', 'pre_classifier.weight']
You should probably TRAIN this model on a down-stream task to be able to use it for predictions and inference.
 10%|█         | 18/180 [00:07<01:14,  2.17it/s]
 10%|█         | 18/180 [00:08<01:14,  2.17it/s]

{'eval_loss': 2.7480034828186035, 'eval_runtime': 0.3223, 'eval_samples_per_second': 105.499, 'eval_steps_per_second': 15.515, 'epoch': 1.0}


 20%|██        | 36/180 [00:20<01:07,  2.14it/s]
 20%|██        | 36/180 [00:20<01:07,  2.14it/s]

{'eval_loss': 2.607926368713379, 'eval_runtime': 0.3219, 'eval_samples_per_second': 105.629, 'eval_steps_per_second': 15.534, 'epoch': 2.0}


 30%|███       | 54/180 [00:38<00:59,  2.13it/s]
 30%|███       | 54/180 [00:39<00:59,  2.13it/s]

{'eval_loss': 2.5312492847442627, 'eval_runtime': 0.3152, 'eval_samples_per_second': 107.871, 'eval_steps_per_second': 15.863, 'epoch': 3.0}


 40%|████      | 72/180 [00:49<00:51,  2.10it/s]
 40%|████      | 72/180 [00:49<00:51,  2.10it/s]

{'eval_loss': 2.457970142364502, 'eval_runtime': 0.3178, 'eval_samples_per_second': 106.973, 'eval_steps_per_second': 15.731, 'epoch': 4.0}


 50%|█████     | 90/180 [01:06<00:42,  2.11it/s]
 50%|█████     | 90/180 [01:07<00:42,  2.11it/s]

{'eval_loss': 2.4033074378967285, 'eval_runtime': 0.3248, 'eval_samples_per_second': 104.677, 'eval_steps_per_second': 15.394, 'epoch': 5.0}


 60%|██████    | 108/180 [01:23<00:34,  2.11it/s]
 60%|██████    | 108/180 [01:23<00:34,  2.11it/s]

{'eval_loss': 2.358975887298584, 'eval_runtime': 0.3317, 'eval_samples_per_second': 102.499, 'eval_steps_per_second': 15.073, 'epoch': 6.0}


 70%|███████   | 126/180 [01:33<00:26,  2.03it/s]
 70%|███████   | 126/180 [01:34<00:26,  2.03it/s]

{'eval_loss': 2.3189873695373535, 'eval_runtime': 0.3203, 'eval_samples_per_second': 106.151, 'eval_steps_per_second': 15.61, 'epoch': 7.0}


 80%|████████  | 144/180 [01:51<00:16,  2.15it/s]
 80%|████████  | 144/180 [01:52<00:16,  2.15it/s]

{'eval_loss': 2.2961490154266357, 'eval_runtime': 0.3242, 'eval_samples_per_second': 104.875, 'eval_steps_per_second': 15.423, 'epoch': 8.0}


 90%|█████████ | 162/180 [02:06<00:08,  2.13it/s]
 90%|█████████ | 162/180 [02:06<00:08,  2.13it/s]

{'eval_loss': 2.2917842864990234, 'eval_runtime': 0.3248, 'eval_samples_per_second': 104.688, 'eval_steps_per_second': 15.395, 'epoch': 9.0}


100%|██████████| 180/180 [02:17<00:00,  2.17it/s]
100%|██████████| 180/180 [02:17<00:00,  2.17it/s]

{'eval_loss': 2.285047769546509, 'eval_runtime': 0.3473, 'eval_samples_per_second': 97.889, 'eval_steps_per_second': 14.395, 'epoch': 10.0}


100%|██████████| 180/180 [02:20<00:00,  1.29it/s]


{'train_runtime': 140.0481, 'train_samples_per_second': 9.925, 'train_steps_per_second': 1.285, 'train_loss': 2.0575547960069445, 'epoch': 10.0}


100%|██████████| 5/5 [00:00<00:00, 30.05it/s]


Evaluation results for fold 5: {'eval_loss': 2.285047769546509, 'eval_runtime': 0.218, 'eval_samples_per_second': 155.93, 'eval_steps_per_second': 22.931, 'epoch': 10.0}


100%|██████████| 5/5 [00:00<00:00, 29.93it/s]
  _warn_prf(average, modifier, f"{metric.capitalize()} is", len(result))


Average accuracy: 0.4099
Average precision: 0.3163
Average recall: 0.4099
Average F1-score: 0.3231
The best model is from fold 1


Map: 100%|██████████| 34/34 [00:00<00:00, 5681.30 examples/s]


Sample 0
Logits: tensor([[-0.5147,  0.1452, -0.1827,  0.3754, -0.4275,  0.1474,  0.1920, -0.2378,
         -0.0086, -0.8068,  0.6469, -0.7972, -0.3134, -0.6750, -0.5901, -0.6189,
          1.0171,  0.4323,  0.5441]])
Predicted class ID: 16
Confidence score: 0.1382
True label: 3

Sample 1
Logits: tensor([[-0.2758, -0.1615, -0.1410, -0.4631, -0.6451,  0.5631,  1.4634,  0.5597,
          0.0450, -0.6723,  0.4159, -0.5304, -0.3976, -0.7369, -0.6446, -0.5435,
          1.1181,  0.6323,  0.0744]])
Predicted class ID: 6
Confidence score: 0.1847
True label: 6

Sample 2
Logits: tensor([[-0.1676, -0.1231, -0.1395, -0.3573, -0.5591,  0.5704,  1.4357,  0.4759,
         -0.0123, -0.7218,  0.4429, -0.6238, -0.3513, -0.7556, -0.6621, -0.4805,
          0.9892,  0.5345,  0.0711]])
Predicted class ID: 6
Confidence score: 0.1844
True label: 6

Sample 3
Logits: tensor([[-0.1351, -0.2074, -0.0946, -0.4974, -0.6130,  0.6034,  1.0048,  0.8198,
         -0.0852, -0.6555,  0.2955, -0.3564, -0.3423, -0.5509, -