In [1]:
import pandas as pd
import os
from enum import Enum

metadata_file = "./results/metadata.pkl"
dataset_types =  Enum("dataset_types", "train development test")

def save_results(y_pred, index, name, task, lenguage, dataset_type, group=None, description=None, truth=False, filename=None):
    
    path = f"./results/{task}/{lenguage}/{dataset_type.name}{'/' + group if group is not None else ''}/{name if filename is None else filename}.pkl"
    
    directory = "/".join(path.split("/")[:-1])
    if not os.path.exists(directory):
        os.makedirs(directory)
    
    if os.path.exists(metadata_file):
        metadata = pd.read_pickle(metadata_file)
    else:
        metadata = pd.DataFrame({
            "Path": pd.Series([], dtype=str),
            "Name": pd.Series([], dtype=str),
            "Description": pd.Series([], dtype=str),
            "Dataset type": pd.Categorical([], categories=dataset_types, ordered=False),
            "Groud Truth": pd.Series([], dtype=bool),
            "Group": pd.Series([], dtype=str),
            "Task": pd.Series([], dtype=str),
            "Lenguage": pd.Series([], dtype=str),
        }).set_index("Path")
    
    if path in metadata.index:
        metadata = remove_results(path)

    metadata.loc[path] = {"Name": name, "Description": description, "Dataset type": dataset_type, "Groud Truth": truth, "Group": group, "Task": task, "Lenguage": lenguage}
    results = pd.DataFrame({"id": index, "y_pred": y_pred}).set_index("id") 
    
    results.to_pickle(path)
    metadata.to_pickle(metadata_file)
    
    print("Results saved on: " + path)

def remove_results(path=None):
    if os.path.exists(metadata_file):
        metadata = pd.read_pickle(metadata_file)
        if path is not None:
            if os.path.exists(path):
                metadata = metadata.drop(path)
                os.remove(path)
                metadata.to_pickle(metadata_file)
        else:
            if os.path.exists(metadata_file):
                used_files = [os.path.normpath(f) for f in metadata.index]
                all_files = set([os.path.normpath(os.path.join(dp, f)) for dp, dn, filenames in os.walk('./results') for f in filenames][1:])
                for f in used_files:
                    if f not in all_files:
                        os.remove(f)
        return metadata

    
def load_results():
    if os.path.exists(metadata_file):
        metadata = pd.read_pickle(metadata_file)
    else:
        return None
    
    metadata["Results"] = [pd.read_pickle(path) for path in metadata.index]
    return metadata

In [129]:
from sklearn.metrics import accuracy_score, classification_report, f1_score

def print_score(y_true, y_pred, name):
    classification_report_results = classification_report(y_true, y_pred)
    
    acc, f1 = accuracy_score(y_true, y_pred), f1_score(y_true, y_pred, average='macro')

    print(name)
    print('\nAccuracy: ', acc)
    print('F1 macro: ', f1)
    print('\nClassification Report')
    print('======================================================')
    print('\n', classification_report_results)
    
    return {"F1 macro": f1, "Accuracy": acc}

In [38]:
df_results = load_results()

mask = df_results["Groud Truth"] == True

df_truth = df_results[mask]
df_pred = df_results[~mask]

df_truth.head()

Unnamed: 0_level_0,Name,Description,Dataset type,Groud Truth,Group,Task,Lenguage,Results
Path,Unnamed: 1_level_1,Unnamed: 2_level_1,Unnamed: 3_level_1,Unnamed: 4_level_1,Unnamed: 5_level_1,Unnamed: 6_level_1,Unnamed: 7_level_1,Unnamed: 8_level_1
./results/hateval2019/task1/english/train/train_truth_task1.pkl,English Train,,dataset_types.train,True,,hateval2019/task1,english,y_pred id 201 1 202 ...
./results/hateval2019/task1/english/development/dev_truth_task1.pkl,English Development,,dataset_types.development,True,,hateval2019/task1,english,y_pred id 18201 0 1820...
./results/hateval2019/task1/english/test/test_truth_task1.pkl,English Test,,dataset_types.test,True,,hateval2019/task1,english,y_pred id 34243 0 3059...


In [141]:
results = {dataset_type: [] for dataset_type in dataset_types}

all_results = []

for path, (name, desc, dataset_type, truth, group, task, lenguage, y_pred) in df_pred.iterrows():
    y_true = df_truth[(df_truth["Dataset type"] == dataset_type) & (df_truth["Task"] == task) & (df_truth["Lenguage"] == lenguage)]["Results"][0]
    
    result = print_score(y_true, y_pred, name)
    result_copy = result.copy()
    result_copy.update({"Dataset type": dataset_type, "Name": name})
    
    all_results.append(result_copy)
    results[dataset_type].append(result)

  _warn_prf(average, modifier, msg_start, len(result))
  _warn_prf(average, modifier, msg_start, len(result))
  _warn_prf(average, modifier, msg_start, len(result))


Dummy Classifier

Accuracy:  0.5796666666666667
F1 macro:  0.3669550538088204

Classification Report

               precision    recall  f1-score   support

           0       0.58      1.00      0.73      5217
           1       0.00      0.00      0.00      3783

    accuracy                           0.58      9000
   macro avg       0.29      0.50      0.37      9000
weighted avg       0.34      0.58      0.43      9000

Multinomial Naive Bayes classifier

Accuracy:  0.8204444444444444
F1 macro:  0.8126298508371801

Classification Report

               precision    recall  f1-score   support

           0       0.82      0.88      0.85      5217
           1       0.82      0.73      0.77      3783

    accuracy                           0.82      9000
   macro avg       0.82      0.81      0.81      9000
weighted avg       0.82      0.82      0.82      9000

Best Multinomial Naive Bayes classifier

Accuracy:  0.8162222222222222
F1 macro:  0.8071309065220538

Classification Repor

  _warn_prf(average, modifier, msg_start, len(result))
  _warn_prf(average, modifier, msg_start, len(result))
  _warn_prf(average, modifier, msg_start, len(result))
  _warn_prf(average, modifier, msg_start, len(result))
  _warn_prf(average, modifier, msg_start, len(result))
  _warn_prf(average, modifier, msg_start, len(result))


Dummy Classifier

Accuracy:  0.573
F1 macro:  0.3642720915448188

Classification Report

               precision    recall  f1-score   support

           0       0.57      1.00      0.73       573
           1       0.00      0.00      0.00       427

    accuracy                           0.57      1000
   macro avg       0.29      0.50      0.36      1000
weighted avg       0.33      0.57      0.42      1000

Multinomial Naive Bayes classifier

Accuracy:  0.738
F1 macro:  0.7302028627329833

Classification Report

               precision    recall  f1-score   support

           0       0.76      0.79      0.78       573
           1       0.70      0.67      0.68       427

    accuracy                           0.74      1000
   macro avg       0.73      0.73      0.73      1000
weighted avg       0.74      0.74      0.74      1000

Best Multinomial Naive Bayes classifier

Accuracy:  0.742
F1 macro:  0.7329590620031796

Classification Report

               precision    recall  

Best AdaBoost classifier

Accuracy:  0.48533333333333334
F1 macro:  0.4428953278257908

Classification Report

               precision    recall  f1-score   support

           0       0.73      0.18      0.29      1740
           1       0.44      0.91      0.60      1260

    accuracy                           0.49      3000
   macro avg       0.59      0.54      0.44      3000
weighted avg       0.61      0.49      0.42      3000

Multi-layer Perceptron classifier

Accuracy:  0.5113333333333333
F1 macro:  0.4837036783119388

Classification Report

               precision    recall  f1-score   support

           0       0.74      0.24      0.36      1740
           1       0.46      0.88      0.60      1260

    accuracy                           0.51      3000
   macro avg       0.60      0.56      0.48      3000
weighted avg       0.62      0.51      0.46      3000

Best Multi-layer Perceptron classifier

Accuracy:  0.518
F1 macro:  0.4883659271050003

Classification Report

   

In [137]:
for dataset_type, scores in results.items():
    print(dataset_type.name)
    print(pd.DataFrame(scores, index=df_pred[df_pred["Dataset type"] == dataset_type]["Name"]), "\n")

train
                                         F1 macro  Accuracy
Name                                                       
Dummy Classifier                         0.366955  0.579667
Multinomial Naive Bayes classifier       0.812630  0.820444
Best Multinomial Naive Bayes classifier  0.807131  0.816222
Bernoulli Naive Bayes classifier         0.821029  0.826000
Best Bernoulli Naive Bayes classifier    0.815134  0.820889
Ridge Classifier                         0.877431  0.881667
Best Ridge Classifier                    0.827157  0.835667
Random Forest classifier                 0.998974  0.999000
Best Random Forest classifier            0.996576  0.996667
Support Vector Classification            0.956309  0.957556
Best Support Vector Classification       0.807855  0.820667
AdaBoost classifier                      0.767939  0.781667
Best AdaBoost classifier                 0.814896  0.824000
Multi-layer Perceptron classifier        0.888015  0.891444
Best Multi-layer Perceptron classi

In [142]:
tmp = pd.DataFrame(all_results).set_index(["Name", "Dataset type"]).groupby(level=[1,0]).sum()
tmp.sort_values(by=["F1 macro", "Accuracy"], ascending=False)

Unnamed: 0_level_0,Unnamed: 1_level_0,F1 macro,Accuracy
Dataset type,Name,Unnamed: 2_level_1,Unnamed: 3_level_1
dataset_types.train,Random Forest classifier,0.998974,0.999
dataset_types.train,Best Random Forest classifier,0.996576,0.996667
dataset_types.train,Support Vector Classification,0.956309,0.957556
dataset_types.train,Bert base,0.929994,0.931556
dataset_types.train,Multi-layer Perceptron classifier,0.888015,0.891444
dataset_types.train,Ridge Classifier,0.877431,0.881667
dataset_types.train,Best Ridge Classifier,0.827157,0.835667
dataset_types.train,Bernoulli Naive Bayes classifier,0.821029,0.826
dataset_types.train,Best Multi-layer Perceptron classifier,0.817371,0.821778
dataset_types.train,Best Bernoulli Naive Bayes classifier,0.815134,0.820889
