In [12]:
import torch
import pandas as pd
from sklearn.metrics import f1_score
from transformers import pipeline
from datasets import load_dataset

In [8]:
def set_seed(seed):
    torch.manual_seed(seed)
    torch.cuda.manual_seed_all(seed)
    torch.backends.cudnn.deterministic = True
    torch.backends.cudnn.benchmark = False

In [9]:
def _eval_model(merged_path: str, dataset: str):
    
    print(f'Avaliando modelo {merged_path}')
    
    pipe = pipeline(
        "text-classification", 
        model=merged_path,
        tokenizer=merged_path,
        device='cuda',
        truncation=True
    )
    tokenizer_kwargs = {
        'padding':True,
        'truncation':True,
        'max_length':512
    }

    data_val = load_dataset('csv', data_files=dataset)
    vals = data_val['train'].map(
        lambda x: pipe(x['text'], **tokenizer_kwargs)[0]
    )
    df = pd.DataFrame(vals)
    df['model_label'] = df['label'].replace('Positivo', 1).replace('Negativo', 0).replace('Neutro', -1)
    res = f1_score(
        df[df['label']!='Neutro']['true_label'], 
        df[df['label']!='Neutro']['model_label'], 
        average='binary'
    )
    results = {
        'dataset': {
            'acc,none': res,
        }
    }
    return {"score": res, "results": results}

In [10]:
datasets = ['../data/maritaca-ai_sst2_pt.csv', '../data/maritaca-ai_imdb_pt.csv']

In [14]:
root = f'/home/viviane/Documents/Computação Evolutiva/Trabalho_2/mergekit'
opt = 'experimentos_v1/cmaes_merged'

metrics = []

for ds in datasets[:1]:
    outputs = []
    for i in range(1, 6):
        set_seed(1994)
        outputs.append(_eval_model(f'{root}/{opt}/merge_{i}/final_model', ds))
    print(outputs)
    metrics.append(outputs)



Avaliando modelo /home/viviane/Documents/Computação Evolutiva/Trabalho_2/mergekit/experimentos_v1/cmaes_merged/merge_1/final_model


  df['model_label'] = df['label'].replace('Positivo', 1).replace('Negativo', 0).replace('Neutro', -1)


Avaliando modelo /home/viviane/Documents/Computação Evolutiva/Trabalho_2/mergekit/experimentos_v1/cmaes_merged/merge_2/final_model


Some weights of BertForSequenceClassification were not initialized from the model checkpoint at /home/viviane/Documents/Computação Evolutiva/Trabalho_2/mergekit/experimentos_v1/cmaes_merged/merge_2/final_model and are newly initialized: ['classifier.bias', 'classifier.weight']
You should probably TRAIN this model on a down-stream task to be able to use it for predictions and inference.
  df['model_label'] = df['label'].replace('Positivo', 1).replace('Negativo', 0).replace('Neutro', -1)


Avaliando modelo /home/viviane/Documents/Computação Evolutiva/Trabalho_2/mergekit/experimentos_v1/cmaes_merged/merge_3/final_model


Some weights of BertForSequenceClassification were not initialized from the model checkpoint at /home/viviane/Documents/Computação Evolutiva/Trabalho_2/mergekit/experimentos_v1/cmaes_merged/merge_3/final_model and are newly initialized: ['classifier.bias', 'classifier.weight']
You should probably TRAIN this model on a down-stream task to be able to use it for predictions and inference.
  df['model_label'] = df['label'].replace('Positivo', 1).replace('Negativo', 0).replace('Neutro', -1)


Avaliando modelo /home/viviane/Documents/Computação Evolutiva/Trabalho_2/mergekit/experimentos_v1/cmaes_merged/merge_4/final_model


  df['model_label'] = df['label'].replace('Positivo', 1).replace('Negativo', 0).replace('Neutro', -1)


Avaliando modelo /home/viviane/Documents/Computação Evolutiva/Trabalho_2/mergekit/experimentos_v1/cmaes_merged/merge_5/final_model
[{'score': 0.9003322259136213, 'results': {'dataset': {'acc,none': 0.9003322259136213}}}, {'score': 0.8354430379746836, 'results': {'dataset': {'acc,none': 0.8354430379746836}}}, {'score': 0.0045662100456621, 'results': {'dataset': {'acc,none': 0.0045662100456621}}}, {'score': 0.9011725293132329, 'results': {'dataset': {'acc,none': 0.9011725293132329}}}, {'score': 0.7065868263473054, 'results': {'dataset': {'acc,none': 0.7065868263473054}}}]


  df['model_label'] = df['label'].replace('Positivo', 1).replace('Negativo', 0).replace('Neutro', -1)


In [6]:
metrics[0]

[{'score': 0.9003322259136213,
  'results': {'dataset': {'acc,none': 0.9003322259136213}}},
 {'score': 0.8354430379746836,
  'results': {'dataset': {'acc,none': 0.8354430379746836}}},
 {'score': 0.0045662100456621,
  'results': {'dataset': {'acc,none': 0.0045662100456621}}},
 {'score': 0.9011725293132329,
  'results': {'dataset': {'acc,none': 0.9011725293132329}}},
 {'score': 0.7065868263473054,
  'results': {'dataset': {'acc,none': 0.7065868263473054}}}]

In [7]:
metrics_sst2 = [x['score'] for x in metrics[0]]

In [8]:
metrics_sst2

[0.9003322259136213,
 0.8354430379746836,
 0.0045662100456621,
 0.9011725293132329,
 0.7065868263473054]

In [9]:
metrics_imdb = [x['score'] for x in metrics[1]]

In [10]:
metrics_imdb

[0.8237885462555066,
 0.8809523809523809,
 0.0,
 0.8251659816629782,
 0.3344947735191638]

In [11]:
df = pd.DataFrame({'metrics_sst2': metrics_sst2, 'metrics_imdb': metrics_imdb})

In [12]:
df.to_csv('meticas_cmaes_2.csv', index=False)