In [None]:
import torch
import numpy as np
import pandas as pd
import pickle

from transformers import Trainer, TrainingArguments, BertForTokenClassification  
from sklearn.metrics import confusion_matrix, f1_score, accuracy_score,recall_score,precision_score

In [None]:
def load_from_pickle(pickle_name):
    with open(pickle_name, 'rb') as fh:
        unpickled_object = pickle.load(fh)
    return unpickled_object   


def dump_to_pickle(data_set, file_name, class_name=None):
    if class_name == None:
        folders = os.path.join('Datasets')
    else:
        folders = os.path.join('Datasets', class_name)
    os.makedirs(folders, exist_ok=True)
    filename = file_name+'.pkl'
    file_path = os.path.join(folders, filename)
    outfile = open(file_path,'wb')
    pickle.dump(data_set,outfile, protocol=4)

    
class Preoblikuj_u_Dataset(torch.utils.data.Dataset):
    def __init__(self, encodings, labels):
        self.encodings = encodings
        self.labels = labels

    def __getitem__(self, idx):
        item = {key: torch.tensor(val[idx]) for key, val in self.encodings.items()}
        item['labels'] = torch.tensor(self.labels[idx])
        return item

    def __len__(self):
        return len(self.labels)

In [None]:
def testing1(test_set_path, model_path, df_test_path):
        
    testset = load_from_pickle(test_set_path)
    df_test = pd.read_pickle(df_test_path)

    model = BertForTokenClassification.from_pretrained(model_path, num_labels=2)
    args = TrainingArguments(output_dir='./evaldir', per_device_eval_batch_size=16)
    
    evaler = Trainer(
        args=args,
        model=model
    )
    
    pred=evaler.predict(testset)
    test_indexs_new=df_test.index.to_list()                   
    wids=np.array([testset.dataset.encodings.encodings[ii].word_ids for ii in test_indexs_new])
    wids[wids==None]=-1
    wids=wids.astype(int)
  
    type_ids=np.array([testset.dataset.encodings.encodings[ii].type_ids for ii in test_indexs_new],dtype=bool)
    pre=pred[0].argmax(axis=-1)
    pre_list=[]
    test_list=[]
    for ii in range(wids.shape[0]):
        test_list.append(wids[ii][type_ids[ii]])
        pre_list.append(pre[ii][type_ids[ii]])
        
    labels=[]
    for ii in range(len(pre_list)):
        labels.append(np.array(range(test_list[ii].max()+1)))
        for jj in labels[ii]:
            bb=np.where(test_list[ii]==jj)[0]
            labela=np.array(pre_list[ii])[bb].mean()
            if labela>0.01:
                labels[ii][jj]=1
            else:
                labels[ii][jj]=0
  
    labels_original=list(df_test.iloc[:]['labels']) 
    f1av=0.0
    lf1=0
    
    for ii in range(len(labels)):
        if len(labels[ii])==len(labels_original[ii]):
            f1av=f1av+f1_score( labels_original[ii], labels[ii],average=None)
            lf1=lf1+1
        else:
            print(ii, len(labels[ii]), len(labels_original[ii]))
    
    lab=np.array([])       # predicted labels
    labor=np.array([])     # true labels labele
    for ii in range(len(labels)):
        if len(labels[ii])==len(labels_original[ii]):
            lab=np.concatenate((lab,labels[ii]))
            labor=np.concatenate((labor,labels_original[ii]))
            
    accuracy = accuracy_score(labor, lab, normalize=True)
    precision = precision_score(labor, lab, average=None)
    recall = recall_score(labor, lab, average=None)
    f1 = f1_score(labor, lab, average=None)
    matrix = confusion_matrix(labor, lab)
    
    return [accuracy,precision,recall,f1,matrix]

In [None]:
model_path = './Results/Drug/0Shot/Model'
test_set_path = './Datasets/Drug/dataset_withOne_test_NULA.pkl'
df_test_path = './Datasets/Drug/df_test_UnseenClass.pkl'
res = testing1(test_set_path, model_path, df_test_path) 

with open('./Results/Drug/0Shot/metrike_linguisticNEW.pkl','wb') as outfile:
    pickle.dump(res, outfile)

metrike=load_from_pickle('./Results/Drug/0Shot/metrike_linguisticNEW.pkl')
metrike