# Testing Code

In [None]:
!pip install --upgrade pandas

In [1]:
import torch
from torch.utils.data import DataLoader
from transformers import BertForTokenClassification
from sklearn.model_selection import train_test_split
import numpy as np
from tqdm import tqdm
from config import Config
from cleaning import DataReader
from utils import compute_metrics, read_labels, get_label_map, get_inv_label_map
from preprocess import NERDataset
from sklearn.metrics import classification_report

class Tester:
    def __init__(self, test_dataset_path):
        self.cfg = Config()
        
        self.device = self.cfg.device 

        self.data_reader = DataReader("TrainingG_Data.txt")

        self.data, _, _ = self.data_reader.read_data_bert()

        self.label_list = read_labels('NewEntities.txt')

        self.label_map = get_label_map(self.label_list)
        self.inv_label_map = get_inv_label_map(self.label_list)

        self.test_data_reader = DataReader(test_dataset_path)
        self.test_data, _, _ = self.test_data_reader.read_data_bert()

        self.test_dataset = NERDataset(
            texts=[x[0] for x in self.test_data],  # Extract texts from the test data.
            tags=[x[1] for x in self.test_data],  # Extract tags (labels) from the test data.
            label_list=self.label_list,  # Provide the list of labels.
            model_name=self.cfg.MODEL_NAME,  # Model name from the configuration, used for tokenizer initialization.
            max_length=self.cfg.MAX_LEN  # Maximum sequence length from the configuration.
        )

    def model_test(self, test_dl, model, device):
        with torch.no_grad():  # Disable gradient calculations for inference.
            model.to(device)  # Move the model to the specified device (CPU or GPU).
            model.eval()  # Set the model to evaluation mode.
            final_loss = 0  # Initialize total loss for testing.
            all_predictions = []  # List to store all predictions.
            all_labels = []  # List to store all true labels.

            for data in tqdm(test_dl, total=len(test_dl)):
                input_ids = data['input_ids'].to(device)
                attention_mask = data['attention_mask'].to(device)
                token_type_ids = data['token_type_ids'].to(device)
                labels = data['labels'].to(device)

                outputs = model(input_ids=input_ids,
                                token_type_ids=token_type_ids,
                                attention_mask=attention_mask,
                                labels=labels)

                loss = outputs.loss
                final_loss += loss.item()
                all_labels.extend(labels.to('cpu').numpy())
                all_predictions.extend(outputs.logits.to('cpu').numpy())

            metrics = compute_metrics(predictions=np.asarray(all_predictions), labels=np.asarray(all_labels), inv_label_map=self.inv_label_map)

            accuracy_score = metrics['accuracy_score']
            precision = metrics['precision']
            recall = metrics['recall']
            f1 = metrics['f1']

            print(f' Accuracy: {accuracy_score}')
            print(f' Precision: {precision}')
            print(f' Recall: {recall}')
            print(f' F1: {f1}')
            

    def run(self):
        model = BertForTokenClassification.from_pretrained(self.cfg.MODEL_NAME,
                                                           return_dict=True,
                                                           num_labels=len(self.label_map),
                                                           output_attentions=False,
                                                           output_hidden_states=False)

        test_data_loader = DataLoader(dataset=self.test_dataset, batch_size=self.cfg.TEST_BATCH_SIZE, shuffle=True)

        model.load_state_dict(torch.load('JuneModel_G.pt', map_location=self.device))

        self.model_test(test_dl=test_data_loader, model=model, device=self.device)

if __name__ == '__main__':
    test_dataset_path = "TestingData.txt"
    tester = Tester(test_dataset_path)
    tester.run()


  from pandas.core import (


Data: 27268 , Sentences: 27268 , Tags: 27268
Data: 856 , Sentences: 856 , Tags: 856


Some weights of BertForTokenClassification were not initialized from the model checkpoint at aubmindlab/bert-base-arabertv02 and are newly initialized: ['classifier.weight', 'classifier.bias']
You should probably TRAIN this model on a down-stream task to be able to use it for predictions and inference.
100%|██████████| 54/54 [00:07<00:00,  6.95it/s]


 Accuracy: 0.8921698113207547
 Precision: 0.6338924233661076
 Recall: 0.4756944444444444
 F1: 0.5435159930572775
