In [1]:
import torch
import os
import numpy as np
import pandas as pd
from sklearn.metrics import f1_score
from transformers import pipeline
from datasets import load_dataset
# from transformers import set_seed

import random

  from .autonotebook import tqdm as notebook_tqdm


In [20]:
def set_seed(seed):
    torch.manual_seed(seed)
    torch.cuda.manual_seed_all(seed)
    torch.backends.cudnn.deterministic = True
    torch.backends.cudnn.benchmark = False

In [32]:
def _eval_model(merged_path: str, dataset: str):
    
    print(f'Avaliando modelo {merged_path}')
    
    pipe = pipeline(
        "text-classification", 
        model=merged_path,
        tokenizer=merged_path,
        device='cuda',
        truncation=True
    )

    tokenizer_kwargs = {
        'padding':True,
        'truncation':True,
        'max_length':512
    }

    data_val = load_dataset('csv', data_files=dataset)
    vals = data_val['train'].map(
        lambda x: pipe(x['text'], **tokenizer_kwargs)[0]
    )
    df = pd.DataFrame(vals)
    df['model_label'] = df['label'].replace('Positivo', 1).replace('Negativo', 0).replace('Neutro', -1)
    res = f1_score(
        df[df['label']!='Neutro']['true_label'], 
        df[df['label']!='Neutro']['model_label'], 
        average='binary'
    )
    results = {
        'dataset': {
            'acc,none': res,
        }
    }
    return {"score": res, "results": results}

In [33]:
datasets = ['../data/maritaca-ai_sst2_pt.csv', '../data/maritaca-ai_imdb_pt.csv']

In [37]:
root = f'/home/viviane/Documents/Computação Evolutiva/Trabalho_2/mergekit'
opt = 'experimentos_v1/sade_merged'

metrics = []

for ds in datasets[:1]:
    outputs = []
    for i in range(1, 2):
        set_seed(1994)
        outputs.append(_eval_model(f'{root}/{opt}/merge_{i}/final_model', ds)['score'])
    print(outputs)
    metrics.append(outputs)



Avaliando modelo /home/viviane/Documents/Computação Evolutiva/Trabalho_2/mergekit/experimentos_v1/sade_merged/merge_1/final_model


Some weights of BertForSequenceClassification were not initialized from the model checkpoint at /home/viviane/Documents/Computação Evolutiva/Trabalho_2/mergekit/experimentos_v1/sade_merged/merge_1/final_model and are newly initialized: ['classifier.bias', 'classifier.weight']
You should probably TRAIN this model on a down-stream task to be able to use it for predictions and inference.


[0.8472222222222222]


  df['model_label'] = df['label'].replace('Positivo', 1).replace('Negativo', 0).replace('Neutro', -1)


In [28]:
metrics[0]

[0.665083135391924]

In [33]:
df = pd.DataFrame({'metrics_sst2': metrics[0], 'metrics_imdb': metrics[1]})
df.to_csv('meticas_sade_2.csv', index=False)