In [1]:
from transformers import pipeline
import pandas as pd
from sklearn.metrics import classification_report, accuracy_score, f1_score, precision_score, recall_score
from pathlib import Path
from tqdm import tqdm

  from .autonotebook import tqdm as notebook_tqdm


In [2]:
def get_data_and_labels(tsv_path):
    df = pd.read_csv(tsv_path, sep='\t')
    data = df['tweet'].to_list()
    labels = df['label'].to_list()
    labels = [t.strip().upper() for t in labels]
    return data, labels

In [3]:
def get_predicted_labels(data, model, label_map):
    sentiment_pipeline = pipeline('sentiment-analysis', model=model)
    predictions = sentiment_pipeline(data, max_length=128, truncation=True)
    predicted_labels = [label_map[p['label']] for p in predictions]
    return predicted_labels


In [4]:
from numpy import average


def evaluate_model_and_dataset(data_set_path):
    data_files = sorted(Path(data_set_path).glob('*.csv'))
    results = []
    for csv_file in tqdm(data_files, total=len(data_files)):
        language = csv_file.stem.split('_')[0]
        model = "finiteautomata/bertweet-base-sentiment-analysis"
        label_map = {'POS': 'POSITIVE', 'NEG': 'NEGATIVE', 'NEU': 'NEUTRAL'}
        data, true_labels = get_data_and_labels(csv_file)
        predicted_labels = get_predicted_labels(data, model, label_map)
        label_set = ['NEGATIVE', 'POSITIVE', 'NEUTRAL']
        accuracy = accuracy_score(true_labels, predicted_labels)
        precision = precision_score(true_labels, predicted_labels, labels=label_set, average=None)
        recall = recall_score(true_labels, predicted_labels, labels=label_set, average=None)
        f1 = f1_score(true_labels, predicted_labels, average='micro')
        output = {  'model': model,
                    'language': language,
                    'num_examples': len(data),
                    'precision': {label_set[i]: precision[i] for i in range(len(label_set))},
                    'recall': {label_set[i]: recall[i] for i in range(len(label_set))},
                    'f1_score': f1,
                    'accuracy': accuracy
                }
        results.append(output)
    return results     
    

In [5]:
data_set_path = '../data/processed/translated/train/'

In [6]:
results = evaluate_model_and_dataset(data_set_path)

  0%|          | 0/9 [00:00<?, ?it/s]emoji is not installed, thus not converting emoticons or emojis into text. Install emoji: pip3 install emoji==0.6.0
  0%|          | 0/9 [00:25<?, ?it/s]


KeyboardInterrupt: 