In [1]:
import pandas as pd
from sklearn.metrics import f1_score
from transformers import pipeline
from datasets import load_dataset

  from .autonotebook import tqdm as notebook_tqdm


In [2]:
def _eval_model(merged_path: str, dataset: str):
    
    print(f'Avaliando modelo {merged_path}')
    
    pipe = pipeline(
        "text-classification", 
        model=merged_path,
        tokenizer=merged_path,
        device='cuda',
        truncation=True
    )
    tokenizer_kwargs = {
        'padding':True,
        'truncation':True,
        'max_length':512
    }

    data_val = load_dataset('csv', data_files=dataset)
    vals = data_val['train'].map(
        lambda x: pipe(x['text'], **tokenizer_kwargs)[0]
    )
    df = pd.DataFrame(vals)
    df['model_label'] = df['label'].replace('Positivo', 1).replace('Negativo', 0).replace('Neutro', -1)
    res = f1_score(
        df[df['label']!='Neutro']['true_label'], 
        df[df['label']!='Neutro']['model_label'], 
        average='binary'
    )
    results = {
        'dataset': {
            'acc,none': res,
        }
    }
    return {"score": res, "results": results}

In [3]:
datasets = ['../data/maritaca-ai_sst2_pt.csv', '../data/maritaca-ai_imdb_pt.csv']

In [4]:
root = f'mergekit'
opt = 'de_merged'

metrics = []

for ds in datasets:
    outputs = []
    for i in range(1, 6):
        outputs.append(_eval_model(f'{root}/{opt}/merge_{i}/final_model', ds)['score'])
    metrics.append(outputs)



Avaliando modelo /home/viviane/Documents/Computação Evolutiva/Trabalho_2/mergekit/de_merged/merge_1/final_model


Map:   1%|          | 9/872 [00:00<00:51, 16.60 examples/s]You seem to be using the pipelines sequentially on GPU. In order to maximize efficiency please use a dataset
Map: 100%|██████████| 872/872 [00:30<00:00, 28.88 examples/s]
  df['model_label'] = df['label'].replace('Positivo', 1).replace('Negativo', 0).replace('Neutro', -1)


Avaliando modelo /home/viviane/Documents/Computação Evolutiva/Trabalho_2/mergekit/de_merged/merge_2/final_model


Some weights of BertForSequenceClassification were not initialized from the model checkpoint at /home/viviane/Documents/Computação Evolutiva/Trabalho_2/mergekit/de_merged/merge_2/final_model and are newly initialized: ['classifier.bias', 'classifier.weight']
You should probably TRAIN this model on a down-stream task to be able to use it for predictions and inference.
Map: 100%|██████████| 872/872 [00:27<00:00, 31.55 examples/s]
  df['model_label'] = df['label'].replace('Positivo', 1).replace('Negativo', 0).replace('Neutro', -1)


Avaliando modelo /home/viviane/Documents/Computação Evolutiva/Trabalho_2/mergekit/de_merged/merge_3/final_model


Map: 100%|██████████| 872/872 [00:26<00:00, 32.83 examples/s]
  df['model_label'] = df['label'].replace('Positivo', 1).replace('Negativo', 0).replace('Neutro', -1)


Avaliando modelo /home/viviane/Documents/Computação Evolutiva/Trabalho_2/mergekit/de_merged/merge_4/final_model


Map: 100%|██████████| 872/872 [00:26<00:00, 32.98 examples/s]
  df['model_label'] = df['label'].replace('Positivo', 1).replace('Negativo', 0).replace('Neutro', -1)


Avaliando modelo /home/viviane/Documents/Computação Evolutiva/Trabalho_2/mergekit/de_merged/merge_5/final_model


Some weights of BertForSequenceClassification were not initialized from the model checkpoint at /home/viviane/Documents/Computação Evolutiva/Trabalho_2/mergekit/de_merged/merge_5/final_model and are newly initialized: ['classifier.bias', 'classifier.weight']
You should probably TRAIN this model on a down-stream task to be able to use it for predictions and inference.
Map: 100%|██████████| 872/872 [00:27<00:00, 31.86 examples/s]
  df['model_label'] = df['label'].replace('Positivo', 1).replace('Negativo', 0).replace('Neutro', -1)


Avaliando modelo /home/viviane/Documents/Computação Evolutiva/Trabalho_2/mergekit/de_merged/merge_1/final_model


Map: 100%|██████████| 5000/5000 [11:01<00:00,  7.55 examples/s]
  df['model_label'] = df['label'].replace('Positivo', 1).replace('Negativo', 0).replace('Neutro', -1)


Avaliando modelo /home/viviane/Documents/Computação Evolutiva/Trabalho_2/mergekit/de_merged/merge_2/final_model


Some weights of BertForSequenceClassification were not initialized from the model checkpoint at /home/viviane/Documents/Computação Evolutiva/Trabalho_2/mergekit/de_merged/merge_2/final_model and are newly initialized: ['classifier.bias', 'classifier.weight']
You should probably TRAIN this model on a down-stream task to be able to use it for predictions and inference.
Map: 100%|██████████| 5000/5000 [11:19<00:00,  7.36 examples/s]
  df['model_label'] = df['label'].replace('Positivo', 1).replace('Negativo', 0).replace('Neutro', -1)


Avaliando modelo /home/viviane/Documents/Computação Evolutiva/Trabalho_2/mergekit/de_merged/merge_3/final_model


Map: 100%|██████████| 5000/5000 [10:50<00:00,  7.68 examples/s]
  df['model_label'] = df['label'].replace('Positivo', 1).replace('Negativo', 0).replace('Neutro', -1)


Avaliando modelo /home/viviane/Documents/Computação Evolutiva/Trabalho_2/mergekit/de_merged/merge_4/final_model


Map: 100%|██████████| 5000/5000 [09:42<00:00,  8.58 examples/s]
  df['model_label'] = df['label'].replace('Positivo', 1).replace('Negativo', 0).replace('Neutro', -1)


Avaliando modelo /home/viviane/Documents/Computação Evolutiva/Trabalho_2/mergekit/de_merged/merge_5/final_model


Some weights of BertForSequenceClassification were not initialized from the model checkpoint at /home/viviane/Documents/Computação Evolutiva/Trabalho_2/mergekit/de_merged/merge_5/final_model and are newly initialized: ['classifier.bias', 'classifier.weight']
You should probably TRAIN this model on a down-stream task to be able to use it for predictions and inference.
Map: 100%|██████████| 5000/5000 [09:42<00:00,  8.58 examples/s]
  df['model_label'] = df['label'].replace('Positivo', 1).replace('Negativo', 0).replace('Neutro', -1)


In [6]:
metrics[1]

[0.8149807938540333,
 0.00487012987012987,
 0.8255481410867492,
 0.8218992903007772,
 0.4959481361426256]

In [7]:
df = pd.DataFrame({'metrics_sst2': metrics[0], 'metrics_imdb': metrics[1]})
df.to_csv('meticas_de.csv', index=False)