This notebook will be used to evalutate all the CSV files with the results that were achieved by employing different parameters and configurations for the experiment that we ran.

In [1]:
from __future__ import annotations

import datetime
import os
import re

from IPython.display import display as ipy_display
import numpy as np
import pandas as pd
import matplotlib as mpl
from matplotlib import pyplot as plt

%config InlineBackend.figure_format = 'svg'

In [2]:
# The files are names as such: all_results__lsvm_logreg__<base>__<params>
REPORTS_PATH = '../models/results/reports'
REPORT_PATTERN = re.compile(r'all_results__(?P<models>lsvm_logreg)__(?P<params>.*)\.csv')

all_results = pd.DataFrame()


for root, _, files in os.walk(REPORTS_PATH):
    for file in sorted(files):
        match = REPORT_PATTERN.match(file)
        if match:
            models = match.group('models')
            params = match.group('params')
            feature_set = params if not params.split('_') else params.split('_')[0]
            df = pd.read_csv(os.path.join(root, file), encoding='utf-8')
            df['filename'] = file.removeprefix('all_results__').removesuffix('.csv')
            df['params'] = params
            df['feature_set'] = feature_set
            all_results = pd.concat([all_results, df])

all_results

Unnamed: 0,mean_fit_time,std_fit_time,mean_score_time,std_score_time,param_clf__alpha,param_clf__penalty,params,split0_test_score,split1_test_score,split2_test_score,...,param_cxf__words__ngram_range,ngrams,feature,param_cxf__misspell__ngram_range,Unnamed: 1,LSVM,LSVM.1,LogReg,LogReg.1,param_cxf__words__min_df
0,5.688849,0.937616,0.027985,0.003849,0.0001,l1,embeddings,0.782863,0.807862,0.745821,...,,,,,,,,,,
1,6.518269,1.054850,0.025982,0.006546,0.0001,l1,embeddings,0.781646,0.819137,0.773271,...,,,,,,,,,,
2,2.151847,0.477962,0.029868,0.003457,0.0001,l2,embeddings,0.782663,0.807857,0.780625,...,,,,,,,,,,
3,1.661348,0.193600,0.035759,0.012819,0.0001,l2,embeddings,0.779015,0.814086,0.782074,...,,,,,,,,,,
4,3.164847,0.643077,0.023911,0.010981,0.0010,l1,embeddings,0.736148,0.767518,0.719845,...,,,,,,,,,,
...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...
1133,11.636598,0.159471,1.100575,0.049777,100.0000,,ngrams_min-df,0.334457,0.331641,0.335017,...,"(1, 3)",,,,,,,,,5.0
0,124.322978,2.316239,54.836565,6.778871,,,ngrams_misspell_fw,0.684887,0.679476,0.705664,...,,,,"(1, 10)",,,,,,
1,120.124261,1.387970,54.765248,4.259680,,,ngrams_misspell_fw,0.711792,0.689337,0.738877,...,,,,"(1, 10)",,,,,,
2,103.728925,31.405987,36.457593,26.205035,,,ngrams_misspell_fw,0.708990,0.707632,0.716713,...,,,,"(1, 10)",,,,,,


In [3]:
for col in all_results.columns:
    if all_results[col].dtype == 'object':
        ipy_display(pd.DataFrame(all_results[col].value_counts()))

Unnamed: 0,param_clf__penalty
l1,4
l2,4


Unnamed: 0,params
ngrams_min-df,1134
ngrams_1-10_misspell,200
ngrams,90
ngrams-inclusive,70
embeddings_pos-bin_char,64
ngrams_1-9,60
embeddings_char,16
embeddings_pos-bin,12
embeddings_pos,12
ngrams_1-10_misspell_grouped,12


Unnamed: 0,model
LSVM,835
LogReg,835
300,10
maxlen,1


Unnamed: 0,maxlen
none,1366
300,200
300,50
200,50
200,4


Unnamed: 0,filename
lsvm_logreg__ngrams_min-df,1134
lsvm_logreg__ngrams_1-10_misspell,200
lsvm_logreg__ngrams,90
lsvm_logreg__ngrams-inclusive,70
lsvm_logreg__embeddings_pos-bin_char,64
lsvm_logreg__ngrams_1-9,60
lsvm_logreg__embeddings_char,16
lsvm_logreg__embeddings_pos-bin,12
lsvm_logreg__embeddings_pos,12
lsvm_logreg__ngrams_1-10_misspell_grouped,12


Unnamed: 0,feature_set
ngrams,1500
embeddings,112
ngrams-inclusive,70


Unnamed: 0,param_cxf__chars__ngram_range
"(1, 1)",424
"(1, 3)",406
"(1, 2)",404
"(1, 4)",28
"(1, 10)",26
"(1, 5)",26
"(1, 6)",26
"(1, 7)",26
"(1, 8)",26
"(1, 9)",26


Unnamed: 0,param_cxf__postg__ngram_range
"(1, 1)",398
"(1, 2)",378
"(1, 3)",378
"(2, 2)",20
"(3, 3)",20
"(4, 4)",20
"(5, 5)",20


Unnamed: 0,param_cxf__words__ngram_range
"(1, 1)",398
"(1, 2)",378
"(1, 3)",378
"(2, 2)",20
"(3, 3)",20
"(4, 4)",20
"(5, 5)",20


Unnamed: 0,ngrams
1-grams,14
2-grams,14
3-grams,14
4-grams,14
5-grams,14


Unnamed: 0,feature
chars,30
pos_tags,30
words,30


Unnamed: 0,param_cxf__misspell__ngram_range
"(1, 10)",24
"(1, 1)",20
"(1, 2)",20
"(1, 3)",20
"(1, 4)",20
"(1, 5)",20
"(1, 6)",20
"(1, 7)",20
"(1, 8)",20
"(1, 9)",20


Unnamed: 0,Unnamed: 1
n_grams,1
"(1, 1)",1
"(1, 2)",1
"(1, 3)",1
"(1, 4)",1
"(1, 5)",1
"(1, 6)",1
"(1, 7)",1
"(1, 8)",1
"(1, 9)",1


Unnamed: 0,LSVM
F1-score,1
0.6163753969247431,1
0.7038585338222324,1
0.752318690161236,1
0.7579727839336622,1
0.7600120390185585,1
0.7580826159348278,1
0.7547641411800626,1
0.7521861811472229,1
0.7477099166422752,1


Unnamed: 0,LSVM.1
std,1
0.03423286072891746,1
0.029481343416407336,1
0.03184894373255134,1
0.02870880106600588,1
0.02672975275154204,1
0.0285274470411546,1
0.029009464461613888,1
0.029843311800143196,1
0.030464966665038295,1


Unnamed: 0,LogReg
F1-score,1
0.6194212111257585,1
0.7187347336464429,1
0.760684407797976,1
0.7716026022122275,1
0.7708767809574353,1
0.7674216586001494,1
0.7636116955253343,1
0.7559951564815008,1
0.7503539976202006,1


Unnamed: 0,LogReg.1
std,1
0.03286559604271424,1
0.027593109135276506,1
0.02933259429322496,1
0.030237292866020643,1
0.03045636851014717,1
0.03222079979924426,1
0.032087181356973735,1
0.03290586219129728,1
0.03291812223623205,1


In [4]:
all_results.groupby('filename').count()

Unnamed: 0_level_0,mean_fit_time,std_fit_time,mean_score_time,std_score_time,param_clf__alpha,param_clf__penalty,params,split0_test_score,split1_test_score,split2_test_score,...,param_cxf__words__ngram_range,ngrams,feature,param_cxf__misspell__ngram_range,Unnamed: 1,LSVM,LSVM.1,LogReg,LogReg.1,param_cxf__words__min_df
filename,Unnamed: 1_level_1,Unnamed: 2_level_1,Unnamed: 3_level_1,Unnamed: 4_level_1,Unnamed: 5_level_1,Unnamed: 6_level_1,Unnamed: 7_level_1,Unnamed: 8_level_1,Unnamed: 9_level_1,Unnamed: 10_level_1,Unnamed: 11_level_1,Unnamed: 12_level_1,Unnamed: 13_level_1,Unnamed: 14_level_1,Unnamed: 15_level_1,Unnamed: 16_level_1,Unnamed: 17_level_1,Unnamed: 18_level_1,Unnamed: 19_level_1,Unnamed: 20_level_1,Unnamed: 21_level_1
lsvm_logreg__embeddings,8,8,8,8,8,8,8,8,8,8,...,0,0,0,0,0,0,0,0,0,0
lsvm_logreg__embeddings_char,16,16,16,16,16,0,16,16,16,16,...,0,0,0,0,0,0,0,0,0,0
lsvm_logreg__embeddings_pos,12,12,12,12,12,0,12,12,12,12,...,0,0,0,0,0,0,0,0,0,0
lsvm_logreg__embeddings_pos-bin,12,12,12,12,12,0,12,12,12,12,...,0,0,0,0,0,0,0,0,0,0
lsvm_logreg__embeddings_pos-bin_char,64,64,64,64,64,0,64,64,64,64,...,0,0,0,0,0,0,0,0,0,0
lsvm_logreg__ngrams,90,90,90,90,0,0,90,90,90,90,...,30,0,90,0,0,0,0,0,0,0
lsvm_logreg__ngrams-inclusive,70,70,70,70,70,0,70,70,70,70,...,70,70,0,0,0,0,0,0,0,0
lsvm_logreg__ngrams_1-10_misspell,200,200,200,200,0,0,200,200,200,200,...,0,0,0,200,0,0,0,0,0,0
lsvm_logreg__ngrams_1-10_misspell_grouped,0,0,0,0,0,0,12,0,0,0,...,0,0,0,0,11,11,11,11,11,0
lsvm_logreg__ngrams_1-9,60,60,60,60,0,0,60,60,60,60,...,0,0,0,0,0,0,0,0,0,0


In [5]:
df = all_results.groupby(['model', 'feature_set', 'params'])

markers = ['o', 's', '^', '*', 'p', 'h', 'D', 'v', 'X', 'd', 'P']
colors = ['#a52040', '#404080', '#7d7dfa', '#ff7f0e', '#2ca02c', '#d62728', '#9467bd', '#8c564b', '#e377c2', '#bcbd22', '#17becf']

# fig, ax = plt.subplots(figsize=(10, 6))

all_res = all_results[all_results['model'].isin(['LSVM', 'LogReg'])]
all_res = all_res[['model', 'feature_set', 'params', 'mean_test_score', 'std_test_score']]
all_res = all_res.sort_values(by=['model', 'feature_set', 'params'])
all_res = all_res.groupby(['model', 'feature_set', 'params']).max()
all_res = all_res.unstack('model')
all_res = all_res.swaplevel(axis=1).sort_index(axis=1)

ipy_display(all_res)

all_res_sty = all_res.style.highlight_max(props='bfseries:;')
all_res_sty.to_latex('all_results__per_feature_set__params.tex', encoding='utf-8', position='h*')

Unnamed: 0_level_0,model,LSVM,LSVM,LogReg,LogReg
Unnamed: 0_level_1,Unnamed: 1_level_1,mean_test_score,std_test_score,mean_test_score,std_test_score
feature_set,params,Unnamed: 2_level_2,Unnamed: 3_level_2,Unnamed: 4_level_2,Unnamed: 5_level_2
embeddings,embeddings,0.81778,0.043755,0.819092,0.038526
embeddings,embeddings_char,0.825552,0.048806,0.826143,0.049665
embeddings,embeddings_pos,0.818601,0.048108,0.815482,0.045965
embeddings,embeddings_pos-bin,0.810953,0.041305,0.819494,0.038318
embeddings,embeddings_pos-bin_char,0.820989,0.04507,0.822507,0.044193
ngrams,ngrams,0.774693,0.053506,0.783484,0.052771
ngrams,ngrams_1-10_misspell,0.783624,0.045767,0.786697,0.042822
ngrams,ngrams_1-9,0.79661,0.045355,0.797145,0.055094
ngrams,ngrams_min-df,0.77254,0.151395,0.782999,0.153387
ngrams,ngrams_misspell_fw,0.701601,0.027629,0.717334,0.023856


In [6]:
for i, model in enumerate(['LSVM', 'LogReg']):
    df = all_results[all_results['model'] == model]
    df = df.sort_values(by=['mean_test_score'], ascending=False)
    # df = df.drop_duplicates(subset=['params']) # keep only the best params
    print(model)
    ipy_display(
        pd.DataFrame(
            df.groupby('params')['mean_test_score']
            .max()
            .sort_values(ascending=False)
        )
        .style
        .background_gradient(cmap='RdBu', axis=1,
                             vmin=0.5, vmax=df['mean_test_score'].max() + 0.001)
    )

LSVM


Unnamed: 0_level_0,mean_test_score
params,Unnamed: 1_level_1
embeddings_char,0.825552
embeddings_pos-bin_char,0.820989
embeddings_pos,0.818601
embeddings,0.81778
embeddings_pos-bin,0.810953
ngrams_1-9,0.79661
ngrams_1-10_misspell,0.783624
ngrams,0.774693
ngrams_min-df,0.77254
ngrams-inclusive,0.771822


LogReg


Unnamed: 0_level_0,mean_test_score
params,Unnamed: 1_level_1
embeddings_char,0.826143
embeddings_pos-bin_char,0.822507
embeddings_pos-bin,0.819494
embeddings,0.819092
embeddings_pos,0.815482
ngrams_1-9,0.797145
ngrams_1-10_misspell,0.786697
ngrams,0.783484
ngrams_min-df,0.782999
ngrams-inclusive,0.780557
