In [3]:
from transformers import BertTokenizer, BertForSequenceClassification, Trainer, TrainingArguments

import bert_functions as bf
import pandas as pd
import os

# Model with best results so far
model_dir = os.path.join('models', 'size_16000_00:56:40')

model = BertForSequenceClassification.from_pretrained(model_dir, device_map='cpu')
tokenizer = BertTokenizer.from_pretrained(model_dir)

# Different datasets
encoded_dir = 'encoded'
loaded_datasets = bf.load_encoded_datasets(encoded_dir, bf.all_datasets)

# List with test datasets
names = ['dirty_DBLP-ACM_test', 'dirty_DBLP-GoogleScholar_test', 'dirty_iTunes-Amazon_test', \
            'structured_Amazon-Google_test', 'structured_Beer_test', 'structured_DBLP-ACM_test', \
            'structured_DBLP-ACM_test', 'structured_DBLP-GoogleScholar_test', 'structured_Fodors-Zagats_test', \
            'structured_iTunes-Amazon_test', 'structured_Walmart-Amazon_test', 'textual_Abt-Buy_test', \
]

# Create a DataFrame to store the results
results_df = pd.DataFrame(columns=['Type', 'Dataset', 'Accuracy', 'Precision', 'Recall', 'F1'])

training_args = TrainingArguments(
    output_dir='./results',
    num_train_epochs=3,
    per_device_train_batch_size=16,
    per_device_eval_batch_size=64,
    warmup_steps=500,
    weight_decay=0.01,
    logging_dir='./logs',
)

trainer = Trainer(
    model=model, 
    args=training_args,
    compute_metrics=bf.compute_metrics,
)

# Evaluate the model on each dataset
for dataset in names:
    combined_test_dataset = loaded_datasets[dataset]
    test_dataset = bf.CustomDataset(combined_test_dataset)

    # Evaluate the model
    test_result = trainer.evaluate(eval_dataset=test_dataset)
    
    results_df.loc[len(results_df)] = [
        dataset.split('_')[0], 
        dataset.split('_')[1], 
        test_result['eval_accuracy'], 
        test_result['eval_precision'], 
        test_result['eval_recall'], 
        test_result['eval_f1']
    ]

# Save the results to a csv in the results folder in parent directory
parent_dir = os.path.dirname(os.getcwd())
results_path = os.path.join(parent_dir, 'results', 'bert_results.csv')
results_df.to_csv(results_path)

  item = {key: torch.tensor(val[idx]) for key, val in self.data.items()}


  0%|          | 0/14 [00:00<?, ?it/s]

  item = {key: torch.tensor(val[idx]) for key, val in self.data.items()}


  0%|          | 0/34 [00:00<?, ?it/s]

  item = {key: torch.tensor(val[idx]) for key, val in self.data.items()}


  0%|          | 0/1 [00:00<?, ?it/s]

  item = {key: torch.tensor(val[idx]) for key, val in self.data.items()}


  0%|          | 0/8 [00:00<?, ?it/s]

  item = {key: torch.tensor(val[idx]) for key, val in self.data.items()}


  0%|          | 0/1 [00:00<?, ?it/s]

  item = {key: torch.tensor(val[idx]) for key, val in self.data.items()}


  0%|          | 0/14 [00:00<?, ?it/s]

  item = {key: torch.tensor(val[idx]) for key, val in self.data.items()}


  0%|          | 0/14 [00:00<?, ?it/s]

  item = {key: torch.tensor(val[idx]) for key, val in self.data.items()}


  0%|          | 0/34 [00:00<?, ?it/s]

  item = {key: torch.tensor(val[idx]) for key, val in self.data.items()}


  0%|          | 0/1 [00:00<?, ?it/s]

  item = {key: torch.tensor(val[idx]) for key, val in self.data.items()}


  0%|          | 0/1 [00:00<?, ?it/s]

  item = {key: torch.tensor(val[idx]) for key, val in self.data.items()}


  0%|          | 0/7 [00:00<?, ?it/s]

  item = {key: torch.tensor(val[idx]) for key, val in self.data.items()}


  0%|          | 0/7 [00:00<?, ?it/s]