In [5]:
from google.colab import drive
drive.mount('/content/drive')

Mounted at /content/drive


In [6]:
! pip3 install transformers

Looking in indexes: https://pypi.org/simple, https://us-python.pkg.dev/colab-wheels/public/simple/


In [7]:
import pandas as pd
from torch.utils.data import Dataset, DataLoader
import torch
from sklearn.model_selection import train_test_split

# Twitter 2015

#### Twitter fake news dataset

In [8]:
train = pd.read_csv('/content/drive/MyDrive/NLP Project/Datasets/Twitter2015/tweetstrain2015.csv')
train.head()

Unnamed: 0.2,Unnamed: 0.1,Unnamed: 0,tweetId,tweetText,label
0,0,0,263046056240115712,se acuerdan de la pel cula el despu de ana rec...,fake
1,1,1,262995061304852481,milenagimon miren sandi en ny tremenda imagen ...,fake
2,2,2,262979898002534400,buena la foto del hurac n sandi recuerda la pe...,fake
3,3,3,262996108400271360,scari shit hurrican ny http co e jlbufh,fake
4,4,4,263018881839411200,fave place world nyc hurrican sandi statueofli...,fake


In [9]:
test = pd.read_csv('/content/drive/MyDrive/NLP Project/Datasets/Twitter2015/tweetstest2015.csv')
test.head()

Unnamed: 0.2,Unnamed: 0.1,Unnamed: 0,tweetId,tweetText,label
0,0,0,578854927457349632,kereeen rt shyman eclips iss http co je hcfpvfn,fake
1,1,1,578874632670953472,absolut beauti rt shyman eclips iss http co oq...,fake
2,2,2,578891261353984000,shyman eclips iss http co c vfboscrj wow amaz,fake
3,3,3,578846612312748032,eclips iss http co en otvsu,fake
4,4,4,578975333841551360,ebonfigli clips vue de l iss autr chose http c...,fake


In [10]:
#encode output
label_mapping = {'fake': 1, 'real': 0}
train.label = train.label.map(label_mapping)
test.label = test.label.map(label_mapping)

### Train val split

In [11]:
train = train.dropna()
X_train, X_val, y_train, y_val = train_test_split(train['tweetText'], train['label'], test_size=0.2, random_state=42)
X_test, y_test = test['tweetText'], test.label

#### Preprocessing the dataset

In [12]:
from transformers import AutoTokenizer

tokenizer = AutoTokenizer.from_pretrained('roberta-base')

Downloading (‚Ä¶)lve/main/config.json:   0%|          | 0.00/481 [00:00<?, ?B/s]

Downloading (‚Ä¶)olve/main/vocab.json:   0%|          | 0.00/899k [00:00<?, ?B/s]

Downloading (‚Ä¶)olve/main/merges.txt:   0%|          | 0.00/456k [00:00<?, ?B/s]

Downloading (‚Ä¶)/main/tokenizer.json:   0%|          | 0.00/1.36M [00:00<?, ?B/s]

In [13]:
X_train_encoded = tokenizer(
    list(X_train.values),
    padding=True,
    truncation=True,
    return_tensors='pt'
)
X_val_encoded = tokenizer(
    list(X_val.values),
    padding=True,
    truncation=True,
    return_tensors='pt'
)
X_test_encoded = tokenizer(
    list(X_test),
    padding=True,
    truncation=True,
    return_tensors='pt'
)

In [14]:
class FakeNewsDataset(Dataset):
    def __init__(self, encodings, labels):
        self.encodings = encodings
        self.labels = labels
    
    def __getitem__(self, idx):
        item = {key: torch.tensor(val[idx]) for key, val in self.encodings.items()}
        item['labels'] = torch.tensor(self.labels[idx])
        return item

    def __len__(self):
        return len(self.labels)

In [15]:
train_dataset = FakeNewsDataset(X_train_encoded, y_train.values)
val_dataset = FakeNewsDataset(X_val_encoded, y_val.values)
test_dataset = FakeNewsDataset(X_test_encoded, y_test.values)
train_loader = DataLoader(train_dataset, batch_size=32)
val_loader = DataLoader(val_dataset, batch_size=32)
test_loader = DataLoader(test_dataset, batch_size=32)

## Training

In [16]:
pip install evaluate

Looking in indexes: https://pypi.org/simple, https://us-python.pkg.dev/colab-wheels/public/simple/
Collecting evaluate
  Downloading evaluate-0.4.0-py3-none-any.whl (81 kB)
[2K     [90m‚îÅ‚îÅ‚îÅ‚îÅ‚îÅ‚îÅ‚îÅ‚îÅ‚îÅ‚îÅ‚îÅ‚îÅ‚îÅ‚îÅ‚îÅ‚îÅ‚îÅ‚îÅ‚îÅ‚îÅ‚îÅ‚îÅ‚îÅ‚îÅ‚îÅ‚îÅ‚îÅ‚îÅ‚îÅ‚îÅ‚îÅ‚îÅ‚îÅ‚îÅ‚îÅ‚îÅ‚îÅ‚îÅ‚îÅ‚îÅ[0m [32m81.4/81.4 KB[0m [31m3.4 MB/s[0m eta [36m0:00:00[0m
[?25hCollecting dill
  Downloading dill-0.3.6-py3-none-any.whl (110 kB)
[2K     [90m‚îÅ‚îÅ‚îÅ‚îÅ‚îÅ‚îÅ‚îÅ‚îÅ‚îÅ‚îÅ‚îÅ‚îÅ‚îÅ‚îÅ‚îÅ‚îÅ‚îÅ‚îÅ‚îÅ‚îÅ‚îÅ‚îÅ‚îÅ‚îÅ‚îÅ‚îÅ‚îÅ‚îÅ‚îÅ‚îÅ‚îÅ‚îÅ‚îÅ‚îÅ‚îÅ‚îÅ‚îÅ‚îÅ‚îÅ[0m [32m110.5/110.5 KB[0m [31m6.4 MB/s[0m eta [36m0:00:00[0m
Collecting datasets>=2.0.0
  Downloading datasets-2.11.0-py3-none-any.whl (468 kB)
[2K     [90m‚îÅ‚îÅ‚îÅ‚îÅ‚îÅ‚îÅ‚îÅ‚îÅ‚îÅ‚îÅ‚îÅ‚îÅ‚îÅ‚îÅ‚îÅ‚îÅ‚îÅ‚îÅ‚îÅ‚îÅ‚îÅ‚îÅ‚îÅ‚îÅ‚îÅ‚îÅ‚îÅ‚îÅ‚îÅ‚îÅ‚îÅ‚îÅ‚îÅ‚îÅ‚îÅ‚îÅ‚îÅ‚îÅ[0m [32m468.7/468.7 KB[0m [31m25.5 MB/s[0m eta [36m0:00:00[0m
[?25hCollecting multiprocess
  Downloading multiprocess-0.70.14-py39-none-

In [17]:
from transformers import Trainer, TrainingArguments, RobertaForSequenceClassification
import evaluate
import numpy as np
import os
os.environ['HF_MLFLOW_LOG_ARTIFACTS'] = "1" # save models as artifact for the expirment

In [18]:
def compute_metrics(eval_preds):
    metric = evaluate.combine(["accuracy", "f1", "precision", "recall"])
    logits, labels = eval_preds
    predictions = np.argmax(logits, axis=-1)
    return metric.compute(predictions=predictions, references=labels) 

In [19]:
training_args = TrainingArguments(
    output_dir='/content/drive/MyDrive/NLP Project/saved_models/roberta/',          # output directory
    num_train_epochs=5,              # total number of training epochs
    per_device_train_batch_size=8,  # batch size per device during training
    per_device_eval_batch_size=8,   # batch size for evaluation
    warmup_steps=500,                # number of warmup steps for learning rate scheduler
    weight_decay=0.01,               # strength of weight decay
    logging_dir='./logs',            # directory for storing logs
    logging_steps=400,
    evaluation_strategy='steps',
    eval_steps=400,
    load_best_model_at_end=True,
    save_total_limit=3,
    save_steps=400

)

model = RobertaForSequenceClassification.from_pretrained('roberta-base', num_labels=2)

trainer = Trainer(
    model=model,                         # the instantiated ü§ó Transformers model to be trained
    args=training_args,                  # training arguments, defined above
    train_dataset=train_dataset,         # training dataset
    eval_dataset=val_dataset,             # evaluation dataset
    compute_metrics=compute_metrics
)


Downloading pytorch_model.bin:   0%|          | 0.00/501M [00:00<?, ?B/s]

Some weights of the model checkpoint at roberta-base were not used when initializing RobertaForSequenceClassification: ['lm_head.dense.bias', 'roberta.pooler.dense.bias', 'lm_head.layer_norm.weight', 'roberta.pooler.dense.weight', 'lm_head.layer_norm.bias', 'lm_head.dense.weight', 'lm_head.decoder.weight', 'lm_head.bias']
- This IS expected if you are initializing RobertaForSequenceClassification from the checkpoint of a model trained on another task or with another architecture (e.g. initializing a BertForSequenceClassification model from a BertForPreTraining model).
- This IS NOT expected if you are initializing RobertaForSequenceClassification from the checkpoint of a model that you expect to be exactly identical (initializing a BertForSequenceClassification model from a BertForSequenceClassification model).
Some weights of RobertaForSequenceClassification were not initialized from the model checkpoint at roberta-base and are newly initialized: ['classifier.dense.weight', 'classifie

In [20]:
trainer.train()

  item = {key: torch.tensor(val[idx]) for key, val in self.encodings.items()}


Step,Training Loss,Validation Loss,Accuracy,F1,Precision,Recall
400,0.5984,0.400362,0.835434,0.880102,0.834947,0.930421
800,0.5135,0.447994,0.844538,0.879675,0.883987,0.875405
1200,0.4626,0.405099,0.860294,0.898757,0.848586,0.955232
1600,0.4057,0.406613,0.852941,0.885246,0.89701,0.873786
2000,0.421,0.431127,0.842087,0.888779,0.818719,0.971953
2400,0.4336,0.362283,0.885154,0.914136,0.888098,0.941748
2800,0.4039,0.440026,0.87535,0.906069,0.88688,0.926106
3200,0.4463,0.408838,0.883754,0.912723,0.890256,0.936354
3600,0.353,0.458809,0.876751,0.905882,0.898197,0.9137
4000,0.3677,0.324697,0.89881,0.925149,0.889885,0.963323


Downloading builder script:   0%|          | 0.00/4.20k [00:00<?, ?B/s]

Downloading builder script:   0%|          | 0.00/6.77k [00:00<?, ?B/s]

Downloading builder script:   0%|          | 0.00/7.55k [00:00<?, ?B/s]

Downloading builder script:   0%|          | 0.00/7.36k [00:00<?, ?B/s]

  item = {key: torch.tensor(val[idx]) for key, val in self.encodings.items()}
  item = {key: torch.tensor(val[idx]) for key, val in self.encodings.items()}
  item = {key: torch.tensor(val[idx]) for key, val in self.encodings.items()}
  item = {key: torch.tensor(val[idx]) for key, val in self.encodings.items()}
  item = {key: torch.tensor(val[idx]) for key, val in self.encodings.items()}
  item = {key: torch.tensor(val[idx]) for key, val in self.encodings.items()}
  item = {key: torch.tensor(val[idx]) for key, val in self.encodings.items()}
  item = {key: torch.tensor(val[idx]) for key, val in self.encodings.items()}
  item = {key: torch.tensor(val[idx]) for key, val in self.encodings.items()}
  item = {key: torch.tensor(val[idx]) for key, val in self.encodings.items()}
  item = {key: torch.tensor(val[idx]) for key, val in self.encodings.items()}
  item = {key: torch.tensor(val[idx]) for key, val in self.encodings.items()}
  item = {key: torch.tensor(val[idx]) for key, val in self.encod

TrainOutput(global_step=7140, training_loss=0.38307512961849777, metrics={'train_runtime': 6781.8977, 'train_samples_per_second': 8.42, 'train_steps_per_second': 1.053, 'total_flos': 1.50249568163328e+16, 'train_loss': 0.38307512961849777, 'epoch': 5.0})

### Calculate performance

In [23]:
model = RobertaForSequenceClassification.from_pretrained('/content/drive/MyDrive/NLP Project/saved_models/roberta/checkpoint-6800/')

In [24]:
from tqdm import tqdm
import numpy as np

device = torch.device('cuda' if torch.cuda.is_available() else 'cpu')
model.to(device)
model.eval()

tp = 0.0  # true positives
fp = 0.0  # false positives
tn = 0.0  # true negatives
fn = 0.0  # false negatives

with torch.no_grad():
    for data in tqdm(test_loader):
        input_ids, labels = data['input_ids'].to(device), data['labels'].to(device)
        out = torch.softmax(model(input_ids).logits, dim=1)
        preds = torch.argmax(out, dim=1)
        
        # update confusion matrix
        tp += ((preds == 1) & (labels == 1)).sum().item()
        fp += ((preds == 1) & (labels == 0)).sum().item()
        tn += ((preds == 0) & (labels == 0)).sum().item()
        fn += ((preds == 0) & (labels == 1)).sum().item()
        
        acc = (tp + tn) / (tp + fp + tn + fn)
        precision = tp / (tp + fp)
        recall = tp / (tp + fn)
        f1_score = 2 * precision * recall / (precision + recall)

print(f"Accuracy: {acc:.4f}")
print(f"Precision: {precision:.4f}")
print(f"Recall: {recall:.4f}")
print(f"F1 Score: {f1_score:.4f}")


  item = {key: torch.tensor(val[idx]) for key, val in self.encodings.items()}
100%|‚ñà‚ñà‚ñà‚ñà‚ñà‚ñà‚ñà‚ñà‚ñà‚ñà| 118/118 [01:47<00:00,  1.10it/s]

Accuracy: 0.6772
Precision: 0.6798
Recall: 0.9906
F1 Score: 0.8063





In [25]:
eval_result = trainer.evaluate(eval_dataset=val_dataset)

# Print the evaluation results
print("Evaluation results:")
for key, value in eval_result.items():
    print(f"{key}: {value:.4f}")


  item = {key: torch.tensor(val[idx]) for key, val in self.encodings.items()}


Evaluation results:
eval_loss: 0.3247
eval_accuracy: 0.8988
eval_f1: 0.9251
eval_precision: 0.8899
eval_recall: 0.9633
eval_runtime: 74.4725
eval_samples_per_second: 38.3500
eval_steps_per_second: 4.7940
epoch: 5.0000


In [26]:
eval_result = trainer.evaluate(eval_dataset=test_dataset)

# Print the evaluation results
print("Evaluation results:")
for key, value in eval_result.items():
    print(f"{key}: {value:.4f}")


  item = {key: torch.tensor(val[idx]) for key, val in self.encodings.items()}


Evaluation results:
eval_loss: 1.1954
eval_accuracy: 0.6096
eval_f1: 0.6362
eval_precision: 0.8639
eval_recall: 0.5035
eval_runtime: 110.1998
eval_samples_per_second: 34.0740
eval_steps_per_second: 4.2650
epoch: 5.0000


# Twitter 2016

#### Twitter fake news dataset

In [27]:
train = pd.read_csv('/content/drive/MyDrive/NLP Project/Datasets/Twitter2016/tweetstrain2016.csv')
train.head()

Unnamed: 0.2,Unnamed: 0.1,Unnamed: 0,tweetId,tweetText,label
0,0,0,324597532548276224,need fed solv bostonbomb chan http co exqtpzqqbg,fake
1,1,1,325145334739267584,pic comparison boston suspect sunil tripathi f...,fake
2,2,2,325152091423248385,complet convinc sunil tripathi fellow http co ...,fake
3,3,3,324554646976868352,brutal lo que se pued conseguir en colaboraci ...,fake
4,4,4,324315545572896768,chan bomb throw http co diyso lxqm http co nxb...,fake


In [28]:
test = pd.read_csv('/content/drive/MyDrive/NLP Project/Datasets/Twitter2016/tweetstest2016.csv')
test.head()

Unnamed: 0.2,Unnamed: 0.1,Unnamed: 0,tweetId,tweetText,label
0,0,0,651118294447951872,antiterror arabianblood russianfeder syria dai...,fake
1,1,1,651115824065830912,n http co ws jxmgj,fake
2,2,2,651095856662360065,http co lz awn nhttp co gvsduzmaxa,fake
3,3,3,651086828234104832,http co kg quidotf http co dkfmxppje,fake
4,4,4,651034616007106560,nthe airstrik isi ammunit depot near talbiseh ...,fake


In [29]:
#encode output
label_mapping = {'fake': 1, 'real': 0}
train.label = train.label.map(label_mapping)
test.label = test.label.map(label_mapping)

### Train val split

In [30]:
train = train.dropna()
X_train, X_val, y_train, y_val = train_test_split(train['tweetText'], train['label'], test_size=0.2, random_state=42)
X_test, y_test = test['tweetText'], test.label

#### Preprocessing the dataset

In [31]:
from transformers import AutoTokenizer

tokenizer = AutoTokenizer.from_pretrained('roberta-base')

In [32]:
X_train_encoded = tokenizer(
    list(X_train.values),
    padding=True,
    truncation=True,
    return_tensors='pt'
)
X_val_encoded = tokenizer(
    list(X_val.values),
    padding=True,
    truncation=True,
    return_tensors='pt'
)
X_test_encoded = tokenizer(
    list(X_test),
    padding=True,
    truncation=True,
    return_tensors='pt'
)

In [33]:
class FakeNewsDataset(Dataset):
    def __init__(self, encodings, labels):
        self.encodings = encodings
        self.labels = labels
    
    def __getitem__(self, idx):
        item = {key: torch.tensor(val[idx]) for key, val in self.encodings.items()}
        item['labels'] = torch.tensor(self.labels[idx])
        return item

    def __len__(self):
        return len(self.labels)

In [34]:
train_dataset = FakeNewsDataset(X_train_encoded, y_train.values)
val_dataset = FakeNewsDataset(X_val_encoded, y_val.values)
test_dataset = FakeNewsDataset(X_test_encoded, y_test.values)
train_loader = DataLoader(train_dataset, batch_size=32)
val_loader = DataLoader(val_dataset, batch_size=32)
test_loader = DataLoader(test_dataset, batch_size=32)

## Training

In [35]:
pip install evaluate

Looking in indexes: https://pypi.org/simple, https://us-python.pkg.dev/colab-wheels/public/simple/


In [36]:
from transformers import Trainer, TrainingArguments, RobertaForSequenceClassification
import evaluate
import numpy as np
import os
os.environ['HF_MLFLOW_LOG_ARTIFACTS'] = "1" # save models as artifact for the expirment

In [37]:
def compute_metrics(eval_preds):
    metric = evaluate.combine(["accuracy", "f1", "precision", "recall"])
    logits, labels = eval_preds
    predictions = np.argmax(logits, axis=-1)
    return metric.compute(predictions=predictions, references=labels) 

In [38]:
training_args = TrainingArguments(
    output_dir='/content/drive/MyDrive/NLP Project/saved_models/roberta/',          # output directory
    num_train_epochs=5,              # total number of training epochs
    per_device_train_batch_size=8,  # batch size per device during training
    per_device_eval_batch_size=8,   # batch size for evaluation
    warmup_steps=500,                # number of warmup steps for learning rate scheduler
    weight_decay=0.01,               # strength of weight decay
    logging_dir='./logs',            # directory for storing logs
    logging_steps=400,
    evaluation_strategy='steps',
    eval_steps=400,
    load_best_model_at_end=True,
    save_total_limit=3,
    save_steps=400

)

model = RobertaForSequenceClassification.from_pretrained('roberta-base', num_labels=2)

trainer = Trainer(
    model=model,                         # the instantiated ü§ó Transformers model to be trained
    args=training_args,                  # training arguments, defined above
    train_dataset=train_dataset,         # training dataset
    eval_dataset=val_dataset,             # evaluation dataset
    compute_metrics=compute_metrics
)


Some weights of the model checkpoint at roberta-base were not used when initializing RobertaForSequenceClassification: ['lm_head.dense.bias', 'roberta.pooler.dense.bias', 'lm_head.layer_norm.weight', 'roberta.pooler.dense.weight', 'lm_head.layer_norm.bias', 'lm_head.dense.weight', 'lm_head.decoder.weight', 'lm_head.bias']
- This IS expected if you are initializing RobertaForSequenceClassification from the checkpoint of a model trained on another task or with another architecture (e.g. initializing a BertForSequenceClassification model from a BertForPreTraining model).
- This IS NOT expected if you are initializing RobertaForSequenceClassification from the checkpoint of a model that you expect to be exactly identical (initializing a BertForSequenceClassification model from a BertForSequenceClassification model).
Some weights of RobertaForSequenceClassification were not initialized from the model checkpoint at roberta-base and are newly initialized: ['classifier.dense.weight', 'classifie

In [39]:
trainer.train()

  item = {key: torch.tensor(val[idx]) for key, val in self.encodings.items()}


Step,Training Loss,Validation Loss,Accuracy,F1,Precision,Recall
400,0.5449,0.394717,0.829495,0.859255,0.861758,0.856767
800,0.4469,0.610436,0.823417,0.870605,0.784537,0.977883
1200,0.4615,0.548898,0.851887,0.885083,0.837089,0.938915
1600,0.4507,0.442144,0.857326,0.888332,0.846778,0.934176
2000,0.5221,0.485041,0.859245,0.887927,0.859891,0.917852
2400,0.4285,0.428459,0.856046,0.890777,0.826204,0.966298
2800,0.4063,0.353217,0.861484,0.893166,0.840297,0.953133
3200,0.361,0.533062,0.803903,0.860396,0.758026,0.994734
3600,0.4839,0.455833,0.861484,0.891069,0.853083,0.932596
4000,0.3957,0.484908,0.862444,0.886842,0.886376,0.887309


  item = {key: torch.tensor(val[idx]) for key, val in self.encodings.items()}
  item = {key: torch.tensor(val[idx]) for key, val in self.encodings.items()}
  item = {key: torch.tensor(val[idx]) for key, val in self.encodings.items()}
  item = {key: torch.tensor(val[idx]) for key, val in self.encodings.items()}
  item = {key: torch.tensor(val[idx]) for key, val in self.encodings.items()}
  item = {key: torch.tensor(val[idx]) for key, val in self.encodings.items()}
  item = {key: torch.tensor(val[idx]) for key, val in self.encodings.items()}
  item = {key: torch.tensor(val[idx]) for key, val in self.encodings.items()}
  item = {key: torch.tensor(val[idx]) for key, val in self.encodings.items()}
  item = {key: torch.tensor(val[idx]) for key, val in self.encodings.items()}
  item = {key: torch.tensor(val[idx]) for key, val in self.encodings.items()}
  item = {key: torch.tensor(val[idx]) for key, val in self.encodings.items()}
  item = {key: torch.tensor(val[idx]) for key, val in self.encod

Step,Training Loss,Validation Loss,Accuracy,F1,Precision,Recall
400,0.5449,0.394717,0.829495,0.859255,0.861758,0.856767
800,0.4469,0.610436,0.823417,0.870605,0.784537,0.977883
1200,0.4615,0.548898,0.851887,0.885083,0.837089,0.938915
1600,0.4507,0.442144,0.857326,0.888332,0.846778,0.934176
2000,0.5221,0.485041,0.859245,0.887927,0.859891,0.917852
2400,0.4285,0.428459,0.856046,0.890777,0.826204,0.966298
2800,0.4063,0.353217,0.861484,0.893166,0.840297,0.953133
3200,0.361,0.533062,0.803903,0.860396,0.758026,0.994734
3600,0.4839,0.455833,0.861484,0.891069,0.853083,0.932596
4000,0.3957,0.484908,0.862444,0.886842,0.886376,0.887309


  item = {key: torch.tensor(val[idx]) for key, val in self.encodings.items()}
  item = {key: torch.tensor(val[idx]) for key, val in self.encodings.items()}
  item = {key: torch.tensor(val[idx]) for key, val in self.encodings.items()}
  item = {key: torch.tensor(val[idx]) for key, val in self.encodings.items()}
  item = {key: torch.tensor(val[idx]) for key, val in self.encodings.items()}
  item = {key: torch.tensor(val[idx]) for key, val in self.encodings.items()}


TrainOutput(global_step=7815, training_loss=0.3811940579405215, metrics={'train_runtime': 1446.9053, 'train_samples_per_second': 43.206, 'train_steps_per_second': 5.401, 'total_flos': 1702665125330100.0, 'train_loss': 0.3811940579405215, 'epoch': 5.0})

### Calculate performance

In [40]:
model = RobertaForSequenceClassification.from_pretrained('/content/drive/MyDrive/NLP Project/saved_models/roberta/checkpoint-7600/')

In [42]:
eval_result = trainer.evaluate(eval_dataset=val_dataset)

# Print the evaluation results
print("Evaluation results:")
for key, value in eval_result.items():
    print(f"{key}: {value:.4f}")


  item = {key: torch.tensor(val[idx]) for key, val in self.encodings.items()}


Evaluation results:
eval_loss: 0.3471
eval_accuracy: 0.8874
eval_f1: 0.9083
eval_precision: 0.8989
eval_recall: 0.9179
eval_runtime: 16.8339
eval_samples_per_second: 185.6970
eval_steps_per_second: 23.2270
epoch: 5.0000


In [43]:
eval_result = trainer.evaluate(eval_dataset=test_dataset)

# Print the evaluation results
print("Evaluation results:")
for key, value in eval_result.items():
    print(f"{key}: {value:.4f}")


  item = {key: torch.tensor(val[idx]) for key, val in self.encodings.items()}


Evaluation results:
eval_loss: 1.2628
eval_accuracy: 0.5599
eval_f1: 0.6137
eval_precision: 0.5881
eval_recall: 0.6417
eval_runtime: 72.1376
eval_samples_per_second: 30.1780
eval_steps_per_second: 3.7840
epoch: 5.0000
