In [1]:
from IPython.core.display import display, HTML
display(HTML("<style>.container { width:100% !important; }</style>"))

import pandas as pd
import os
from enum import Enum

pd.set_option("display.precision", 5)
pd.set_option('display.max_rows', None)

In [2]:
metadata_file = "./results/metadata.pkl"

class dataset_types(Enum):
    train = 1
    development = 2
    test = 3
    
    def title(self):
        return self._shorten_names[self.value - 1]

    def __lt__(self, other):
        return self.value < other.value
dataset_types._shorten_names = ["Train", "Dev", "Test"]

def save_results(y_pred, index, name, task, lenguage, dataset_type, group=None, description=None, truth=False, filename=None):
    
    path = f"./results/{task}/{lenguage}/{dataset_type.name}{'/' + group if group is not None else ''}/{name if filename is None else filename}.pkl"
    
    directory = "/".join(path.split("/")[:-1])
    if not os.path.exists(directory):
        os.makedirs(directory)
    
    if os.path.exists(metadata_file):
        metadata = pd.read_pickle(metadata_file)
    else:
        metadata = pd.DataFrame({
            "Path": pd.Series([], dtype=str),
            "Name": pd.Series([], dtype=str),
            "Description": pd.Series([], dtype=str),
            "Dataset type": pd.Categorical([], categories=dataset_types, ordered=False),
            "Groud Truth": pd.Series([], dtype=bool),
            "Group": pd.Series([], dtype=str),
            "Task": pd.Series([], dtype=str),
            "Lenguage": pd.Series([], dtype=str),
        }).set_index("Path")
    
    if path in metadata.index:
        metadata = remove_results(path)

    metadata.loc[path] = {"Name": name, "Description": description, "Dataset type": dataset_type, "Groud Truth": truth, "Group": group, "Task": task, "Lenguage": lenguage}
    results = pd.DataFrame({"id": index, "y_pred": y_pred}).set_index("id") 
    
    results.to_pickle(path)
    metadata.to_pickle(metadata_file)
    
    print("Results saved on: " + path)

def remove_results(path=None):
    if os.path.exists(metadata_file):
        metadata = pd.read_pickle(metadata_file)
        if path is not None:
            if os.path.exists(path):
                metadata = metadata.drop(path)
                os.remove(path)
                metadata.to_pickle(metadata_file)
        else:
            if os.path.exists(metadata_file):
                used_files = [os.path.normpath(f) for f in metadata.index]
                all_files = set([os.path.normpath(os.path.join(dp, f)) for dp, dn, filenames in os.walk('./results') for f in filenames][1:])
                for f in used_files:
                    if f not in all_files:
                        os.remove(f)
        return metadata

    
def load_results():
    if os.path.exists(metadata_file):
        metadata = pd.read_pickle(metadata_file)
    else:
        return None
    
    metadata["Results"] = [pd.read_pickle(path) for path in metadata.index]
    return metadata

In [3]:
from sklearn.metrics import accuracy_score, classification_report, f1_score

def print_score(y_true, y_pred, name, f1_average):
    classification_report_results = acc = f1 = None
    if y_true is not None:
        classification_report_results = classification_report(y_true, y_pred)

        acc, f1 = accuracy_score(y_true, y_pred), f1_score(y_true, y_pred, average=f1_average)   

    print(name)
    print('F1 macro: ', f1)
    print('Accuracy: ', acc)

    print('\nClassification Report')
    print('======================================================')
    print('\n', classification_report_results)
    
    return {"F1": f1, "Accuracy": acc}

def print_score_hateval_task1(y_true, y_pred, name):
    return print_score(y_true, y_pred, name, 'macro')
    
def print_score_detoxis_task1(y_true, y_pred, name):
    return print_score(y_true, y_pred, name, 'binary')

In [4]:
df_results = load_results()

mask = df_results["Groud Truth"] == True

df_truth = df_results[mask]
df_pred = df_results[~mask]

df_truth.sort_values(by=["Lenguage", "Dataset type"])

Unnamed: 0_level_0,Name,Description,Dataset type,Groud Truth,Group,Task,Lenguage,Results
Path,Unnamed: 1_level_1,Unnamed: 2_level_1,Unnamed: 3_level_1,Unnamed: 4_level_1,Unnamed: 5_level_1,Unnamed: 6_level_1,Unnamed: 7_level_1,Unnamed: 8_level_1
./results/hateval2019/task1/english/train/train_truth_task1.pkl,English Train,,dataset_types.train,True,,hateval2019/task1,english,y_pred id 201 1 202 ...
./results/hateval2019/task1/english/development/dev_truth_task1.pkl,English Development,,dataset_types.development,True,,hateval2019/task1,english,y_pred id 18201 0 1820...
./results/hateval2019/task1/english/test/test_truth_task1.pkl,English Test,,dataset_types.test,True,,hateval2019/task1,english,y_pred id 34243 0 3059...
./results/hateval2019/task1/spanish/train/train_truth_task1.pkl,Spanish Train,,dataset_types.train,True,,hateval2019/task1,spanish,y_pred id 20001 1 2000...
./results/detoxis/task1/spanish/train/train_truth_task1.pkl,Detoxis Train,,dataset_types.train,True,,detoxis/task1,spanish,y_pred id 0 0 1 ...
./results/hateval2019/task1/spanish/development/dev_truth_task1.pkl,Spanish Development,,dataset_types.development,True,,hateval2019/task1,spanish,y_pred id 20005 0 2000...
./results/detoxis/task1/spanish/development/dev_truth_task1.pkl,Detoxis Development,,dataset_types.development,True,,detoxis/task1,spanish,y_pred id 0 0 1 ...
./results/hateval2019/task1/spanish/test/test_truth_task1.pkl,Spanish Test,,dataset_types.test,True,,hateval2019/task1,spanish,y_pred id 31494 0 3246...


In [5]:
results = []

for path, (name, desc, dataset_type, truth, group, task, lenguage, y_pred) in df_pred.iterrows():
    y_true = df_truth[(df_truth["Dataset type"] == dataset_type) & (df_truth["Task"] == task) & (df_truth["Lenguage"] == lenguage)]["Results"]
    y_true = [None] if y_true.empty else y_true
    
    result = {}
    if task == "hateval2019/task1":
        result = print_score_hateval_task1(y_true[0], y_pred, name)
    if task == "detoxis/task1":
        result = print_score_detoxis_task1(y_true[0], y_pred, name)
        
    result.update({"Dataset type": dataset_type.title(),
                   "Task": ' '.join(task.split('/')).title(),
                   "Name": name,
                   "Group": ' '.join(group.split('_')).title(),
                   "Lenguage": lenguage.title(),
                   "Description": desc})
    
    results.append(result)

Bert base
F1 macro:  0.9299943995519642
Accuracy:  0.9315555555555556

Classification Report

               precision    recall  f1-score   support

           0       0.95      0.93      0.94      5217
           1       0.91      0.93      0.92      3783

    accuracy                           0.93      9000
   macro avg       0.93      0.93      0.93      9000
weighted avg       0.93      0.93      0.93      9000

Bert base
F1 macro:  0.7469041129594169
Accuracy:  0.749

Classification Report

               precision    recall  f1-score   support

           0       0.81      0.73      0.77       573
           1       0.68      0.77      0.72       427

    accuracy                           0.75      1000
   macro avg       0.75      0.75      0.75      1000
weighted avg       0.76      0.75      0.75      1000

Bert base
F1 macro:  0.5941497231031642
Accuracy:  0.6053333333333333

Classification Report

               precision    recall  f1-score   support

           0       

GPT2 base
F1 macro:  0.7356241852210881
Accuracy:  0.739

Classification Report

               precision    recall  f1-score   support

           0       0.79      0.74      0.77       573
           1       0.68      0.73      0.71       427

    accuracy                           0.74      1000
   macro avg       0.73      0.74      0.74      1000
weighted avg       0.74      0.74      0.74      1000

GPT2 base
F1 macro:  0.4359989192440023
Accuracy:  0.49066666666666664

Classification Report

               precision    recall  f1-score   support

           0       0.83      0.15      0.26      1740
           1       0.45      0.95      0.61      1260

    accuracy                           0.49      3000
   macro avg       0.64      0.55      0.44      3000
weighted avg       0.67      0.49      0.41      3000

Atalaya
F1 macro:  0.9015782362817413
Accuracy:  0.9038888888888889

Classification Report

               precision    recall  f1-score   support

           0       0

Multi-layer Perceptron classifier (best)
F1 macro:  0.5484764542936288
Accuracy:  0.7646209386281588

Classification Report

               precision    recall  f1-score   support

           0       0.77      0.92      0.84      1869
           1       0.73      0.44      0.55       901

    accuracy                           0.76      2770
   macro avg       0.75      0.68      0.69      2770
weighted avg       0.76      0.76      0.75      2770

Ridge Classifier
F1 macro:  0.5798525798525799
Accuracy:  0.7532467532467533

Classification Report

               precision    recall  f1-score   support

           0       0.76      0.90      0.83       447
           1       0.73      0.48      0.58       246

    accuracy                           0.75       693
   macro avg       0.75      0.69      0.70       693
weighted avg       0.75      0.75      0.74       693

Ridge Classifier (best)
F1 macro:  0.5795724465558194
Accuracy:  0.7445887445887446

Classification Report

          

  _warn_prf(average, modifier, msg_start, len(result))
  _warn_prf(average, modifier, msg_start, len(result))
  _warn_prf(average, modifier, msg_start, len(result))


Random Forest classifier
F1 macro:  0.9989737648560049
Accuracy:  0.999

Classification Report

               precision    recall  f1-score   support

           0       1.00      1.00      1.00      5217
           1       1.00      1.00      1.00      3783

    accuracy                           1.00      9000
   macro avg       1.00      1.00      1.00      9000
weighted avg       1.00      1.00      1.00      9000

Support Vector Classification (best)
F1 macro:  0.9540676206890405
Accuracy:  0.9553333333333334

Classification Report

               precision    recall  f1-score   support

           0       0.96      0.97      0.96      5217
           1       0.95      0.94      0.95      3783

    accuracy                           0.96      9000
   macro avg       0.96      0.95      0.95      9000
weighted avg       0.96      0.96      0.96      9000

Support Vector Classification
F1 macro:  0.9555173113702441
Accuracy:  0.9567777777777777

Classification Report

             

  _warn_prf(average, modifier, msg_start, len(result))
  _warn_prf(average, modifier, msg_start, len(result))
  _warn_prf(average, modifier, msg_start, len(result))
  _warn_prf(average, modifier, msg_start, len(result))
  _warn_prf(average, modifier, msg_start, len(result))
  _warn_prf(average, modifier, msg_start, len(result))



               precision    recall  f1-score   support

           0       0.73      0.83      0.78       573
           1       0.72      0.58      0.64       427

    accuracy                           0.73      1000
   macro avg       0.72      0.71      0.71      1000
weighted avg       0.72      0.72      0.72      1000

Dummy Classifier
F1 macro:  0.36708860759493667
Accuracy:  0.58

Classification Report

               precision    recall  f1-score   support

           0       0.58      1.00      0.73      1740
           1       0.00      0.00      0.00      1260

    accuracy                           0.58      3000
   macro avg       0.29      0.50      0.37      3000
weighted avg       0.34      0.58      0.43      3000

Multinomial Naive Bayes classifier (best)
F1 macro:  0.4790122127839357
Accuracy:  0.5063333333333333

Classification Report

               precision    recall  f1-score   support

           0       0.73      0.24      0.36      1740
           1       

  _warn_prf(average, modifier, msg_start, len(result))
  _warn_prf(average, modifier, msg_start, len(result))
  _warn_prf(average, modifier, msg_start, len(result))
  _warn_prf(average, modifier, msg_start, len(result))
  _warn_prf(average, modifier, msg_start, len(result))
  _warn_prf(average, modifier, msg_start, len(result))


Ridge Classifier
F1 macro:  0.9417928090882361
Accuracy:  0.9437777777777778

Classification Report

               precision    recall  f1-score   support

           0       0.94      0.96      0.95      2643
           1       0.94      0.92      0.93      1857

    accuracy                           0.94      4500
   macro avg       0.94      0.94      0.94      4500
weighted avg       0.94      0.94      0.94      4500

Random Forest classifier (best)
F1 macro:  0.9956459547023461
Accuracy:  0.9957777777777778

Classification Report

               precision    recall  f1-score   support

           0       1.00      1.00      1.00      2643
           1       0.99      1.00      0.99      1857

    accuracy                           1.00      4500
   macro avg       1.00      1.00      1.00      4500
weighted avg       1.00      1.00      1.00      4500

Random Forest classifier
F1 macro:  0.9977085798307364
Accuracy:  0.9977777777777778

Classification Report

               pre

Classification Report

               precision    recall  f1-score   support

           0       0.75      0.82      0.78       278
           1       0.74      0.65      0.69       222

    accuracy                           0.74       500
   macro avg       0.74      0.73      0.74       500
weighted avg       0.74      0.74      0.74       500

AdaBoost classifier
F1 macro:  0.7515898251192369
Accuracy:  0.76

Classification Report

               precision    recall  f1-score   support

           0       0.75      0.85      0.80       278
           1       0.77      0.65      0.71       222

    accuracy                           0.76       500
   macro avg       0.76      0.75      0.75       500
weighted avg       0.76      0.76      0.76       500

Multi-layer Perceptron classifier (best)
F1 macro:  0.7319693094629156
Accuracy:  0.738

Classification Report

               precision    recall  f1-score   support

           0       0.75      0.80      0.77       278
         

  _warn_prf(average, modifier, msg_start, len(result))
  _warn_prf(average, modifier, msg_start, len(result))
  _warn_prf(average, modifier, msg_start, len(result))
  _warn_prf(average, modifier, msg_start, len(result))
  _warn_prf(average, modifier, msg_start, len(result))
  _warn_prf(average, modifier, msg_start, len(result))
  _warn_prf(average, modifier, msg_start, len(result))
  _warn_prf(average, modifier, msg_start, len(result))
  _warn_prf(average, modifier, msg_start, len(result))


Ridge Classifier (best)
F1 macro:  0.9572162007986309
Accuracy:  0.9729241877256317

Classification Report

               precision    recall  f1-score   support

           0       0.97      0.99      0.98      1869
           1       0.98      0.93      0.96       901

    accuracy                           0.97      2770
   macro avg       0.98      0.96      0.97      2770
weighted avg       0.97      0.97      0.97      2770

Random Forest classifier
F1 macro:  0.9779785431959345
Accuracy:  0.9859205776173285

Classification Report

               precision    recall  f1-score   support

           0       0.98      1.00      0.99      1869
           1       1.00      0.96      0.98       901

    accuracy                           0.99      2770
   macro avg       0.99      0.98      0.98      2770
weighted avg       0.99      0.99      0.99      2770

Random Forest classifier (best)
F1 macro:  0.7264094955489615
Accuracy:  0.8335740072202166

Classification Report

           

  _warn_prf(average, modifier, msg_start, len(result))
  _warn_prf(average, modifier, msg_start, len(result))
  _warn_prf(average, modifier, msg_start, len(result))
  _warn_prf(average, modifier, msg_start, len(result))
  _warn_prf(average, modifier, msg_start, len(result))
  _warn_prf(average, modifier, msg_start, len(result))
  _warn_prf(average, modifier, msg_start, len(result))
  _warn_prf(average, modifier, msg_start, len(result))
  _warn_prf(average, modifier, msg_start, len(result))
  _warn_prf(average, modifier, msg_start, len(result))
  _warn_prf(average, modifier, msg_start, len(result))
  _warn_prf(average, modifier, msg_start, len(result))



Classification Report

 None
Multinomial Naive Bayes classifier (best)
F1 macro:  None
Accuracy:  None

Classification Report

 None
Bernoulli Naive Bayes classifier
F1 macro:  None
Accuracy:  None

Classification Report

 None
Bernoulli Naive Bayes classifier (best)
F1 macro:  None
Accuracy:  None

Classification Report

 None
Ridge Classifier
F1 macro:  None
Accuracy:  None

Classification Report

 None
Ridge Classifier (best)
F1 macro:  None
Accuracy:  None

Classification Report

 None
Random Forest classifier
F1 macro:  None
Accuracy:  None

Classification Report

 None
Random Forest classifier (best)
F1 macro:  None
Accuracy:  None

Classification Report

 None
Support Vector Classification
F1 macro:  None
Accuracy:  None

Classification Report

 None
Support Vector Classification (best)
F1 macro:  None
Accuracy:  None

Classification Report

 None
AdaBoost classifier
F1 macro:  None
Accuracy:  None

Classification Report

 None
AdaBoost classifier (best)
F1 macro:  None
Accurac

MLP SMOTE_TomekLinks
F1 macro:  0.6601941747572816
Accuracy:  0.7474747474747475

Classification Report

               precision    recall  f1-score   support

           0       0.82      0.78      0.80       447
           1       0.63      0.69      0.66       246

    accuracy                           0.75       693
   macro avg       0.73      0.73      0.73       693
weighted avg       0.75      0.75      0.75       693

MLP polynom_fit_SMOTE
F1 macro:  None
Accuracy:  None

Classification Report

 None
MLP ProWSyn
F1 macro:  None
Accuracy:  None

Classification Report

 None
MLP SMOTE_IPF
F1 macro:  None
Accuracy:  None

Classification Report

 None
MLP SMOBD
F1 macro:  None
Accuracy:  None

Classification Report

 None
MLP G_SMOTE
F1 macro:  None
Accuracy:  None

Classification Report

 None
MLP CCR
F1 macro:  None
Accuracy:  None

Classification Report

 None
MLP LVQ_SMOTE
F1 macro:  None
Accuracy:  None

Classification Report

 None
MLP Assembled_SMOTE
F1 macro:  None
Accur

Ridge SMOTE_IPF
F1 macro:  0.6888552697932425
Accuracy:  0.7772563176895307

Classification Report

               precision    recall  f1-score   support

           0       0.87      0.79      0.83      1869
           1       0.63      0.76      0.69       901

    accuracy                           0.78      2770
   macro avg       0.75      0.77      0.76      2770
weighted avg       0.79      0.78      0.78      2770

Ridge SMOBD
F1 macro:  0.6927318295739348
Accuracy:  0.7787003610108303

Classification Report

               precision    recall  f1-score   support

           0       0.87      0.78      0.83      1869
           1       0.63      0.77      0.69       901

    accuracy                           0.78      2770
   macro avg       0.75      0.78      0.76      2770
weighted avg       0.80      0.78      0.78      2770

Ridge G_SMOTE
F1 macro:  0.694331983805668
Accuracy:  0.7819494584837545

Classification Report

               precision    recall  f1-score   supp

In [6]:
df_results = pd.DataFrame(results).set_index(["Task", "Lenguage", "Dataset type", "Group", "Name", "Description"]).sort_index(level=[0, 1, 2, 3, 4])
scores = df_results.columns.to_list()
df_results_index = df_results.sort_values(by=["Task", "Lenguage", "Dataset type"] + scores, ascending=3*[True] + len(scores) * [False]).droplevel("Group")
df_results_index.droplevel("Description")

Unnamed: 0_level_0,Unnamed: 1_level_0,Unnamed: 2_level_0,Unnamed: 3_level_0,F1,Accuracy
Task,Lenguage,Dataset type,Name,Unnamed: 4_level_1,Unnamed: 5_level_1
Detoxis Task1,Spanish,Dev,Ridge SMOBD,0.66038,0.74026
Detoxis Task1,Spanish,Dev,MLP SMOTE_TomekLinks,0.66019,0.74747
Detoxis Task1,Spanish,Dev,Ridge G_SMOTE,0.65672,0.73449
Detoxis Task1,Spanish,Dev,Bert base (3 epochs),0.65471,0.77778
Detoxis Task1,Spanish,Dev,Ridge SMOTE_TomekLinks,0.65291,0.73304
Detoxis Task1,Spanish,Dev,Ridge SMOTE_IPF,0.65028,0.73304
Detoxis Task1,Spanish,Dev,Bert Avarage,0.64819,0.7619
Detoxis Task1,Spanish,Dev,Ridge Assembled_SMOTE,0.6454,0.72727
Detoxis Task1,Spanish,Dev,Bert base (4 epochs),0.64069,0.76046
Detoxis Task1,Spanish,Dev,Ridge polynom_fit_SMOTE,0.6406,0.7215


In [7]:
a = [lambda x: 1, lambda x: 2, lambda x: 3] # Objective
b = [(lambda x: i+1) for i in range(3)] # Problem
c = [(lambda i: lambda x: i+1)(i) for i in range(3)] #First solution

print(a[0](1), b[0](1), c[0](1))

1 3 1


In [8]:
list_aggregate = [(lambda t: lambda x: x[:, :, t])(t.title()) for t in dataset_types]

df_results_columns = df_results.groupby(["Task", "Lenguage",'Group','Name'], as_index=False).aggregate(list_aggregate)
df_results_columns.columns = pd.MultiIndex.from_product([scores, [t.title() for t in dataset_types]])
df_results_columns = df_results_columns.sort_values(by=['Task', 'Lenguage', 'Group', ("F1", dataset_types.test.title()), ("F1", dataset_types.development.title())], ascending=[True, True, True, False, False])

df_results_columns

Unnamed: 0_level_0,Unnamed: 1_level_0,Unnamed: 2_level_0,Unnamed: 3_level_0,F1,F1,F1,Accuracy,Accuracy,Accuracy
Unnamed: 0_level_1,Unnamed: 1_level_1,Unnamed: 2_level_1,Unnamed: 3_level_1,Train,Dev,Test,Train,Dev,Test
Task,Lenguage,Group,Name,Unnamed: 4_level_2,Unnamed: 5_level_2,Unnamed: 6_level_2,Unnamed: 7_level_2,Unnamed: 8_level_2,Unnamed: 9_level_2
Detoxis Task1,Spanish,Deep Learning,Bert base (3 epochs),0.85899,0.65471,,0.91336,0.77778,
Detoxis Task1,Spanish,Deep Learning,Bert Avarage,0.98041,0.64819,,0.98736,0.7619,
Detoxis Task1,Spanish,Deep Learning,Bert base (4 epochs),0.95833,0.64069,,0.97329,0.76046,
Detoxis Task1,Spanish,Deep Learning,Bert base (2 epochs),0.67094,0.61283,,0.8148,0.76479,
Detoxis Task1,Spanish,Deep Learning,Atalaya,0.34249,0.06792,,0.74079,0.64358,
Detoxis Task1,Spanish,Sbert,Multi-layer Perceptron classifier,0.65392,0.61072,,0.79747,0.75902,
Detoxis Task1,Spanish,Sbert,Ridge Classifier,0.62939,0.57985,,0.79422,0.75325,
Detoxis Task1,Spanish,Sbert,Support Vector Classification,0.82315,0.56,,0.90072,0.7619,
Detoxis Task1,Spanish,Sbert,AdaBoost classifier,0.6403,0.50575,,0.78989,0.68975,
Detoxis Task1,Spanish,Sbert,Random Forest classifier,0.99944,0.29352,,0.99964,0.7013,


In [9]:
df_results_columns2 = df_results_columns#.droplevel('Group')
df_results_columns2 = df_results_columns2.sort_values(by=["Task", "Lenguage", ("F1", dataset_types.test.title()), ("F1", dataset_types.development.title())], ascending=[True, True, False, False])
df_results_columns2

Unnamed: 0_level_0,Unnamed: 1_level_0,Unnamed: 2_level_0,Unnamed: 3_level_0,F1,F1,F1,Accuracy,Accuracy,Accuracy
Unnamed: 0_level_1,Unnamed: 1_level_1,Unnamed: 2_level_1,Unnamed: 3_level_1,Train,Dev,Test,Train,Dev,Test
Task,Lenguage,Group,Name,Unnamed: 4_level_2,Unnamed: 5_level_2,Unnamed: 6_level_2,Unnamed: 7_level_2,Unnamed: 8_level_2,Unnamed: 9_level_2
Detoxis Task1,Spanish,Sbert Oversampling,Ridge SMOBD,0.69273,0.66038,,0.7787,0.74026,
Detoxis Task1,Spanish,Sbert Oversampling,MLP SMOTE_TomekLinks,0.74348,0.66019,,0.82238,0.74747,
Detoxis Task1,Spanish,Sbert Oversampling,Ridge G_SMOTE,0.69433,0.65672,,0.78195,0.73449,
Detoxis Task1,Spanish,Deep Learning,Bert base (3 epochs),0.85899,0.65471,,0.91336,0.77778,
Detoxis Task1,Spanish,Sbert Oversampling,Ridge SMOTE_TomekLinks,0.68725,0.65291,,0.7769,0.73304,
Detoxis Task1,Spanish,Sbert Oversampling,Ridge SMOTE_IPF,0.68886,0.65028,,0.77726,0.73304,
Detoxis Task1,Spanish,Deep Learning,Bert Avarage,0.98041,0.64819,,0.98736,0.7619,
Detoxis Task1,Spanish,Sbert Oversampling,Ridge Assembled_SMOTE,0.6983,0.6454,,0.78195,0.72727,
Detoxis Task1,Spanish,Deep Learning,Bert base (4 epochs),0.95833,0.64069,,0.97329,0.76046,
Detoxis Task1,Spanish,Sbert Oversampling,Ridge polynom_fit_SMOTE,0.69402,0.6406,,0.78195,0.7215,


In [10]:
df_results_columns2.loc["Detoxis Task1"]

Unnamed: 0_level_0,Unnamed: 1_level_0,Unnamed: 2_level_0,F1,F1,F1,Accuracy,Accuracy,Accuracy
Unnamed: 0_level_1,Unnamed: 1_level_1,Unnamed: 2_level_1,Train,Dev,Test,Train,Dev,Test
Lenguage,Group,Name,Unnamed: 3_level_2,Unnamed: 4_level_2,Unnamed: 5_level_2,Unnamed: 6_level_2,Unnamed: 7_level_2,Unnamed: 8_level_2
Spanish,Sbert Oversampling,Ridge SMOBD,0.69273,0.66038,,0.7787,0.74026,
Spanish,Sbert Oversampling,MLP SMOTE_TomekLinks,0.74348,0.66019,,0.82238,0.74747,
Spanish,Sbert Oversampling,Ridge G_SMOTE,0.69433,0.65672,,0.78195,0.73449,
Spanish,Deep Learning,Bert base (3 epochs),0.85899,0.65471,,0.91336,0.77778,
Spanish,Sbert Oversampling,Ridge SMOTE_TomekLinks,0.68725,0.65291,,0.7769,0.73304,
Spanish,Sbert Oversampling,Ridge SMOTE_IPF,0.68886,0.65028,,0.77726,0.73304,
Spanish,Deep Learning,Bert Avarage,0.98041,0.64819,,0.98736,0.7619,
Spanish,Sbert Oversampling,Ridge Assembled_SMOTE,0.6983,0.6454,,0.78195,0.72727,
Spanish,Deep Learning,Bert base (4 epochs),0.95833,0.64069,,0.97329,0.76046,
Spanish,Sbert Oversampling,Ridge polynom_fit_SMOTE,0.69402,0.6406,,0.78195,0.7215,


Improvements:
- Allow removal of all files with specific features or codition (not just path)

In [11]:
xsource": [
    "import smote_variants as sv\n",
    "\n",
    "#From https://smote-variants.readthedocs.io/en/latest/ranking.html\n",
    "oversamplers = [\n",
    "    sv.polynom_fit_SMOTE, sv.ProWSyn, sv.SMOTE_IPF, sv.Lee, sv.SMOBD, sv.G_SMOTE,\n",
    "    sv.CCR, sv.LVQ_SMOTE, sv.Assembled_SMOTE, sv.SMOTE_TomekLinks\n",
    "]"


SyntaxError: EOL while scanning string literal (<ipython-input-11-5d982af481a4>, line 1)