In [1]:
%load_ext autoreload
%autoreload 2

import sys
if '..' not in sys.path:
    sys.path.append('..')
from notebooks.results import load_results, classification, clusterization, to_latex_table, GremDataFrame, include_pivot_index
import re
import pandas as pd

In [2]:
def extract_letters(s: str) -> str:
    return re.findall(r'[a-zA-Z]+', s)[0]

def fix_dataset_name(s: str) -> str:
    return {
        'TweeterCyberbullying': 'TwitterCyberbullying'
    }.get(s, s)

def fix_vectorizer_name(s: str) -> str:
    return {
        'DPEBPVectorizer100Avg': 'DPEBPVectorizer',
        'SpacyMorphTagVectorizer': 'MorphTagVectorizer',
    }.get(s, s)

In [13]:
GREMVECS = ['BigramMorphTagVectorizer100', 'BigramMorphTagVectorizer370', 'FullMorphTagVectorizer', 'MorphTagVectorizer']
BOWVECS = ['CountVectorizer1000', 'CountVectorizer5000', 'TfidfVectorizer1000', 'TfidfVectorizer5000', 'CountTfidf1000']
SEMVECS = ['HerbertFT', 'HerbertFrozen', 'RoBERTaFT', 'RoBERTaFrozen']
GREMSEMVECS = ['GremBERT', 'PanGremBERT']

In [3]:
results_df = GremDataFrame(load_results('../results/'))
results_df['dataset'] = results_df['dataset'].apply(fix_dataset_name)
results_df['vectorizer'] = results_df['vectorizer'].apply(fix_vectorizer_name)
results_df['base_head_model'] = results_df['params_name'].apply(extract_letters)
print(len(results_df))
results_df.head()

1597


Unnamed: 0,dataset,datacleaner,vectorizer,params_name,accuracy,f1_score,recall,precision,silhouette,davies_bouldin,calinski_harabasz,bcubed_precission,bcubed_recall,bcubed_f1,base_head_model
0,TwitterCyberbullying,DummyDatacleaner,MorphTagVectorizer,MLP1,0.915423,0.477922,0.5,0.457711,,,,,,,MLP
1,TwitterCyberbullying,DummyDatacleaner,MorphTagVectorizer,RandomForest1,0.915423,0.477922,0.5,0.457711,,,,,,,RandomForest
2,TwitterCyberbullying,DummyDatacleaner,MorphTagVectorizer,LogisticRegression1,0.915423,0.477922,0.5,0.457711,,,,,,,LogisticRegression
3,TwitterCyberbullying,DummyDatacleaner,MorphTagVectorizer,KMeans1.0,,,,,0.086742,3.113713,889.408505,0.845114,0.501334,0.629336,KMeans
4,TwitterCyberbullying,DummyDatacleaner,MorphTagVectorizer,DBSCAN1,,,,,0.213195,2.498378,56.53377,0.844912,0.928915,0.884924,DBSCAN


In [19]:
df = (
    results_df
    .classification()
    .dataset('OldNewspapers')
    .data_cleaner('DummyDatacleaner')
    .sort_values('f1_score')
    .groupby('vectorizer')
    .first()
    [['accuracy', 'f1_score', 'recall', 'precision', 'base_head_model']]
)
df = include_pivot_index(df, 'wektoryzator')
print(to_latex_table(
    df,
    column_names=['wektoryzator', 'dokładność', 'miara f1', 'pełność (recall)', 'precyzja', 'model UM'],
    bold_labels=['f1_score', 'accuracy', 'recall', 'precision'],
))
df.style.highlight_max(color = '#666666', axis = 0)

\begin{table}
	\centering
	\caption{}
	\resizebox{\textwidth}{!}{
	\begin{tabular}{||c|c|c|c|c|c||}
		\hline
		wektoryzator & dokładność & miara f1 & pełność (recall) & precyzja & model UM \\
		\hline
		BigramMorphTagVectorizer100 & 0.193 & 0.071 & 0.079 & 0.179 & RandomForest \\
		BigramMorphTagVectorizer370 & 0.189 & 0.067 & 0.075 & 0.192 & RandomForest \\
		CountTfidf1000 & 0.243 & 0.108 & 0.113 & 0.151 & RandomForest \\
		CountVectorizer1000 & 0.240 & 0.106 & 0.112 & 0.134 & RandomForest \\
		CountVectorizer5000 & 0.279 & 0.143 & 0.142 & 0.203 & RandomForest \\
		DPEBPVectorizer & 0.259 & 0.110 & 0.120 & 0.135 & LogisticRegression \\
		FrozenGremBERT & 0.264 & 0.119 & 0.120 & 0.303 & RandomForest \\
		FrozenPanGremBERT & 0.238 & 0.102 & 0.105 & 0.307 & RandomForest \\
		FullMorphTagVectorizer & 0.245 & 0.099 & 0.108 & 0.126 & LogisticRegression \\
		GremBERT & \textbf{0.501} & 0.299 & 0.294 & \textbf{0.415} & RandomForest \\
		HerbertFT & 0.499 & \textbf{0.302} & \textbf{0.295} & 0

Unnamed: 0_level_0,wektoryzator,accuracy,f1_score,recall,precision,base_head_model
vectorizer,Unnamed: 1_level_1,Unnamed: 2_level_1,Unnamed: 3_level_1,Unnamed: 4_level_1,Unnamed: 5_level_1,Unnamed: 6_level_1
BigramMorphTagVectorizer100,BigramMorphTagVectorizer100,0.192598,0.071075,0.078624,0.178554,RandomForest
BigramMorphTagVectorizer370,BigramMorphTagVectorizer370,0.189393,0.067313,0.075284,0.192414,RandomForest
CountTfidf1000,CountTfidf1000,0.242841,0.107741,0.11307,0.151009,RandomForest
CountVectorizer1000,CountVectorizer1000,0.240153,0.106485,0.112237,0.134233,RandomForest
CountVectorizer5000,CountVectorizer5000,0.279438,0.143477,0.141602,0.202576,RandomForest
DPEBPVectorizer,DPEBPVectorizer,0.258968,0.109958,0.120025,0.135252,LogisticRegression
FrozenGremBERT,FrozenGremBERT,0.263931,0.119162,0.119891,0.302966,RandomForest
FrozenPanGremBERT,FrozenPanGremBERT,0.238499,0.102082,0.105041,0.307494,RandomForest
FullMorphTagVectorizer,FullMorphTagVectorizer,0.244805,0.099195,0.108299,0.125826,LogisticRegression
GremBERT,GremBERT,0.500569,0.29947,0.293831,0.414669,RandomForest


In [23]:
df = (
    results_df
    .clusterization()
    .dataset('Classics5Authors35Books')
    .data_cleaner('DummyDatacleaner')
    .sort_values('f1_score')
    .groupby('vectorizer')
    .first()
    [['bcubed_f1', 'bcubed_recall', 'bcubed_precission', 'silhouette', 'base_head_model']]
)
df = include_pivot_index(df, 'wektoryzator')
print(to_latex_table(
    df,
    column_names=['wektoryzator', 'f1 bcubed', 'pełność bcubed (recall)', 'precyzja bcubed', 'indesk silhouette',  'model UM'],
    bold_labels=['bcubed_f1', 'bcubed_recall', 'bcubed_precission', 'silhouette'],
))
df.style.highlight_max(color = '#666666', axis = 0)

\begin{table}
	\centering
	\caption{}
	\resizebox{\textwidth}{!}{
	\begin{tabular}{||c|c|c|c|c|c||}
		\hline
		wektoryzator & f1 bcubed & pełność bcubed (recall) & precyzja bcubed & indesk silhouette & model UM \\
		\hline
		BigramMorphTagVectorizer100 & 0.407 & 0.999 & 0.256 & 0.515 & DBSCAN \\
		BigramMorphTagVectorizer370 & 0.407 & 0.999 & 0.256 & 0.512 & DBSCAN \\
		CountTfidf1000 & 0.043 & 0.022 & 0.587 & -0.007 & AffinityPropagation \\
		CountVectorizer1000 & 0.407 & \textbf{1.000} & 0.256 & -1.000 & DBSCAN \\
		CountVectorizer5000 & 0.407 & \textbf{1.000} & 0.256 & -1.000 & DBSCAN \\
		DPEBPVectorizer & 0.409 & 0.995 & 0.257 & 0.472 & DBSCAN \\
		FrozenGremBERT & 0.237 & 0.218 & 0.259 & 0.083 & KMeans \\
		FrozenPanGremBERT & 0.014 & 0.007 & 0.309 & 0.021 & AffinityPropagation \\
		FullMorphTagVectorizer & 0.407 & 0.999 & 0.256 & 0.462 & DBSCAN \\
		GremBERT & 0.407 & \textbf{1.000} & 0.256 & -1.000 & DBSCAN \\
		HerbertFT & \textbf{0.991} & 0.991 & 0.991 & \textbf{0.754} & KMea

Unnamed: 0_level_0,wektoryzator,bcubed_f1,bcubed_recall,bcubed_precission,silhouette,base_head_model
vectorizer,Unnamed: 1_level_1,Unnamed: 2_level_1,Unnamed: 3_level_1,Unnamed: 4_level_1,Unnamed: 5_level_1,Unnamed: 6_level_1
BigramMorphTagVectorizer100,BigramMorphTagVectorizer100,0.407146,0.999151,0.255663,0.514989,DBSCAN
BigramMorphTagVectorizer370,BigramMorphTagVectorizer370,0.407146,0.999151,0.255663,0.512334,DBSCAN
CountTfidf1000,CountTfidf1000,0.043236,0.022445,0.586531,-0.006578,AffinityPropagation
CountVectorizer1000,CountVectorizer1000,0.407095,1.0,0.255567,-1.0,DBSCAN
CountVectorizer5000,CountVectorizer5000,0.407095,1.0,0.255567,-1.0,DBSCAN
DPEBPVectorizer,DPEBPVectorizer,0.408601,0.994934,0.257092,0.472485,DBSCAN
FrozenGremBERT,FrozenGremBERT,0.236694,0.217866,0.259085,0.0834,KMeans
FrozenPanGremBERT,FrozenPanGremBERT,0.014223,0.007279,0.309302,0.021473,AffinityPropagation
FullMorphTagVectorizer,FullMorphTagVectorizer,0.407146,0.999151,0.255663,0.462459,DBSCAN
GremBERT,GremBERT,0.407095,1.0,0.255567,-1.0,DBSCAN


In [44]:
for vec in results_df['vectorizer'].unique():
    print(vec)

MorphTagVectorizer
HerbertVectorizer
DPEBPVectorizer
CountVectorizer1000
TfidfVectorizer1000
FullMorphTagVectorizer
BigramMorphTagVectorizer100
BigramMorphTagVectorizer370
StyloMetrix
CountVectorizer5000
TfidfVectorizer5000
RoBERTaFT
HerbertFrozen
HerbertFT
RoBERTaFrozen
FrozenPanGremBERT
GremBERT
PanGremBERT
FrozenGremBERT
CountTfidf1000


In [43]:
pd.options.display.float_format = '{:,.3f}'.format
vecs = list(results_df['vectorizer'].unique())
vecs.remove('HerbertVectorizer')

df = (
    results_df
    .classification()
    .vectorizer(vecs)
    # .data_cleaner('ProperNamesMasker')
    .sort_values('f1_score', ascending=False)
    .drop_duplicates(subset=['dataset', 'vectorizer'])
    .drop(columns=list(results_df.columns[6:-1]) + ['datacleaner', 'params_name'])
    .pivot(index='vectorizer', columns='dataset', values='f1_score')
    .style.highlight_max(color = '#666666', axis = 0)
)
# df = include_pivot_index(df, 'zbiór danych')
df

dataset,Classics5Authors35Books,EroticVsOthers,OldNewspapers,PrusVsSienkiewicz,StarWarsFanfic,StarWarsFanficMedium,StarWarsFanficShort,TwitterCyberbullying,WritingStyle
vectorizer,Unnamed: 1_level_1,Unnamed: 2_level_1,Unnamed: 3_level_1,Unnamed: 4_level_1,Unnamed: 5_level_1,Unnamed: 6_level_1,Unnamed: 7_level_1,Unnamed: 8_level_1,Unnamed: 9_level_1
BigramMorphTagVectorizer100,0.879816,0.596767,0.089154,0.750679,0.963278,0.818753,0.686301,0.500348,0.536801
BigramMorphTagVectorizer370,0.891134,0.60058,0.116297,0.796445,0.900966,0.850944,0.709485,0.615311,0.537584
CountTfidf1000,0.924524,0.646749,0.131315,0.883495,0.987759,0.88322,0.738083,0.632196,0.663568
CountVectorizer1000,0.897451,0.646804,0.128188,0.883495,0.975478,0.876556,0.738083,0.626262,0.656544
CountVectorizer5000,0.914083,0.655272,0.16485,0.899799,0.987759,0.88246,0.760631,0.708266,0.671502
DPEBPVectorizer,0.593334,0.615427,0.151467,0.706392,0.938943,0.864065,0.747152,0.500348,0.561997
FrozenGremBERT,0.66397,0.615279,0.193199,0.763844,0.902206,0.870549,0.806167,0.609427,0.589378
FrozenPanGremBERT,0.812815,0.616076,0.184294,0.68736,0.902206,0.889939,0.789929,0.548467,0.597499
FullMorphTagVectorizer,0.748191,0.604915,0.119493,0.82544,0.926655,0.799182,0.720949,0.632452,0.573628
GremBERT,0.913038,0.667508,0.324053,0.630393,1.0,0.977331,0.863479,0.489798,0.675413


In [32]:
print(to_latex_table(df, bold_labels=list(df.columns[1:]), float_precission=3))

\begin{table}
	\centering
	\begin{tabular}{||c|c|c|c|c|c|c|c|c|c||}
		\hline
		zbiór danych & Classics5Authors35Books & EroticVsOthers & OldNewspapers & PrusVsSienkiewicz & StarWarsFanfic & StarWarsFanficMedium & StarWarsFanficShort & TwitterCyberbullying & WritingStyle \\
		\hline
		BigramMorphTagVectorizer100 & 0.880 & 0.597 & 0.089 & 0.751 & 0.963 & 0.819 & 0.686 & 0.500 & 0.537 \\
		BigramMorphTagVectorizer370 & 0.891 & 0.601 & 0.116 & 0.796 & 0.901 & 0.851 & 0.709 & 0.615 & 0.538 \\
		CountTfidf1000 & \textbf{0.925} & 0.647 & 0.131 & 0.883 & 0.988 & 0.883 & 0.738 & 0.632 & 0.664 \\
		CountVectorizer1000 & 0.897 & 0.647 & 0.128 & 0.883 & 0.975 & 0.877 & 0.738 & 0.626 & 0.657 \\
		CountVectorizer5000 & 0.914 & 0.655 & 0.165 & 0.900 & 0.988 & 0.882 & 0.761 & 0.708 & 0.672 \\
		DPEBPVectorizer100Avg & 0.593 & 0.615 & 0.151 & 0.706 & 0.939 & 0.864 & 0.747 & 0.500 & 0.562 \\
		FrozenGremBERT & 0.664 & 0.615 & 0.193 & 0.764 & 0.902 & 0.871 & 0.806 & 0.609 & 0.589 \\
		FrozenPanGremBERT &

In [12]:
print(to_latex_table(df))

\begin{table}
	\centering
	\begin{tabular}{||c|c|c|c|c|c|c|c|c||}
		\hline
		Classics5Authors35Books & EroticVsOthers & OldNewspapers & PrusVsSienkiewicz & StarWarsFanfic & StarWarsFanficMedium & StarWarsFanficShort & TwitterCyberbullying & WritingStyle \\
		\hline
		0.88 & 0.60 & 0.09 & 0.75 & 0.96 & 0.82 & 0.69 & 0.50 & 0.54 \\
		0.89 & 0.60 & 0.12 & 0.80 & 0.90 & 0.85 & 0.71 & 0.62 & 0.54 \\
		0.92 & 0.65 & 0.13 & 0.88 & 0.99 & 0.88 & 0.74 & 0.63 & 0.66 \\
		0.90 & 0.65 & 0.13 & 0.88 & 0.98 & 0.88 & 0.74 & 0.63 & 0.66 \\
		0.91 & 0.66 & 0.16 & 0.90 & 0.99 & 0.88 & 0.76 & 0.71 & 0.67 \\
		0.59 & 0.62 & 0.15 & 0.71 & 0.94 & 0.86 & 0.75 & 0.50 & 0.56 \\
		0.66 & 0.62 & 0.19 & 0.76 & 0.90 & 0.87 & 0.81 & 0.61 & 0.59 \\
		0.81 & 0.62 & 0.18 & 0.69 & 0.90 & 0.89 & 0.79 & 0.55 & 0.60 \\
		0.75 & 0.60 & 0.12 & 0.83 & 0.93 & 0.80 & 0.72 & 0.63 & 0.57 \\
		0.91 & 0.67 & 0.32 & 0.63 & 1.00 & 0.98 & 0.86 & 0.49 & 0.68 \\
		0.91 & 0.67 & 0.32 & 0.64 & 1.00 & 0.97 & 0.87 & 0.48 & 0.68 \\
		0.39 &

In [8]:
results_df.columns

Index(['dataset', 'datacleaner', 'vectorizer', 'params_name', 'accuracy',
       'f1_score', 'recall', 'precision', 'silhouette', 'davies_bouldin',
       'calinski_harabasz', 'bcubed_precission', 'bcubed_recall', 'bcubed_f1',
       'base_head_model'],
      dtype='object')

In [9]:
to_drop = ['datacleaner', 'params_name', 'accuracy',
       'f1_score', 'recall', 'precision', 'silhouette', 'davies_bouldin',
       'calinski_harabasz', 'bcubed_precission', 'bcubed_recall',]

In [21]:
pd.options.display.float_format = '{:,.3f}'.format
vecs = list(results_df['vectorizer'].unique())
vecs.remove('HerbertVectorizer')

(
    results_df
    .clusterization()
    .vectorizer(vecs)
    .sort_values('bcubed_f1', ascending=False)
    .drop_duplicates(subset=['dataset', 'vectorizer'])
    .drop(columns=to_drop)
    .pivot(index='vectorizer', columns='dataset', values='bcubed_f1')
)

dataset,Classics5Authors35Books,EroticVsOthers,OldNewspapers,PrusVsSienkiewicz,StarWarsFanfic,StarWarsFanficShort,TwitterCyberbullying,WritingStyle
vectorizer,Unnamed: 1_level_1,Unnamed: 2_level_1,Unnamed: 3_level_1,Unnamed: 4_level_1,Unnamed: 5_level_1,Unnamed: 6_level_1,Unnamed: 7_level_1,Unnamed: 8_level_1
BigramMorphTagVectorizer100,0.407,0.658,0.154,0.664,0.657,0.584,0.82,0.68
BigramMorphTagVectorizer370,0.407,0.519,0.158,0.534,0.56,0.628,0.893,0.677
CountVectorizer1000,0.407,0.678,0.16,0.667,0.668,0.618,0.903,0.684
CountVectorizer5000,0.407,0.678,0.158,0.667,0.668,0.667,0.914,0.684
DPEBPVectorizer100Avg,0.409,0.596,0.157,0.624,0.58,0.64,0.907,0.671
FullMorphTagVectorizer,0.407,0.516,0.16,0.568,0.622,0.633,0.903,0.678
SpacyMorphTagVectorizer,0.407,0.678,0.157,0.667,0.665,0.64,0.885,0.684
TfidfVectorizer1000,0.459,0.678,0.16,0.698,0.751,0.616,0.887,0.684
TfidfVectorizer5000,0.531,0.678,0.158,0.986,0.737,0.665,0.913,0.684


In [9]:
(
    results_df
    .dataset(['PrusVsSienkiewicz', 'StarWarsFanfic'])
    .data_cleaner('DummyDatacleaner')
    .vectorizer('SpacyMorphTagVectorizer')
    .classification()
)

Unnamed: 0,dataset,datacleaner,vectorizer,params_name,accuracy,f1_score,recall,precision,silhouette,davies_bouldin,calinski_harabasz,bcubed_precission,bcubed_recall,bcubed_f1,base_head_model
85,PrusVsSienkiewicz,DummyDatacleaner,SpacyMorphTagVectorizer,RandomForest1,0.852688,0.831615,0.832492,0.830762,,,,,,,RandomForest
86,PrusVsSienkiewicz,DummyDatacleaner,SpacyMorphTagVectorizer,MLP1,0.752688,0.727253,0.737683,0.721667,,,,,,,MLP
87,PrusVsSienkiewicz,DummyDatacleaner,SpacyMorphTagVectorizer,MLP2,0.78172,0.761466,0.776675,0.754386,,,,,,,MLP
88,PrusVsSienkiewicz,DummyDatacleaner,SpacyMorphTagVectorizer,MLP3,0.8,0.777115,0.785746,0.771183,,,,,,,MLP
311,StarWarsFanfic,DummyDatacleaner,SpacyMorphTagVectorizer,RandomForest1,0.902439,0.901502,0.899821,0.907599,,,,,,,RandomForest
313,StarWarsFanfic,DummyDatacleaner,SpacyMorphTagVectorizer,MLP1,0.853659,0.851449,0.849732,0.862709,,,,,,,MLP
314,StarWarsFanfic,DummyDatacleaner,SpacyMorphTagVectorizer,MLP2,0.865854,0.864218,0.862552,0.872549,,,,,,,MLP
315,StarWarsFanfic,DummyDatacleaner,SpacyMorphTagVectorizer,MLP3,0.890244,0.889438,0.888193,0.893116,,,,,,,MLP


In [8]:
prus_vs_sienkiewicz = results_df[results_df['dataset'] == 'PrusVsSienkiewicz']
data = prus_vs_sienkiewicz.loc[prus_vs_sienkiewicz.groupby('vectorizer')['accuracy'].idxmax()]
data = data[['vectorizer', 'accuracy', 'f1_score', 'base_head_model']].sort_values('f1_score', ascending=False)
print(to_latex_table(
    data,
    place_modifiers='H',
    separate_header=True,
    column_names=['vectorizer', 'accuracy', 'f1 score', 'base head model'],
    float_precission=2,
    caption='Accuracy i miara F1 dla różnych sposobów wektoryzacji powieści Prusa i Sienkiewicza',
    label='prus_vs_sien:vectorizer_comparison',
    bold_labels=['f1_score', 'accuracy']
))

\begin{table}[H]
	\centering
	\begin{tabular}{||c|c|c|c||}
		\hline
		vectorizer & accuracy & f1 score & base head model \\
		\hline\hline
		CountVectorizer5000 & \textbf{0.86} & \textbf{0.84} & RandomForest \\
		TfidfVectorizer1000 & 0.85 & \textbf{0.84} & MLP \\
		TfidfVectorizer5000 & 0.85 & \textbf{0.84} & RandomForest \\
		CountVectorizer1000 & 0.85 & \textbf{0.84} & MLP \\
		SpacyMorphTagVectorizer & 0.85 & 0.83 & RandomForest \\
		FullMorphTagVectorizer & 0.80 & 0.78 & RandomForest \\
		BigramMorphTagVectorizer370 & 0.77 & 0.75 & MLP \\
		BigramMorphTagVectorizer100 & 0.77 & 0.74 & MLP \\
		DPEBPVectorizer100Avg & 0.66 & 0.65 & MLP \\
		\hline
	\end{tabular}
	\caption{Accuracy i miara F1 dla różnych sposobów wektoryzacji powieści Prusa i Sienkiewicza}
	\label{tab:prus_vs_sien:vectorizer_comparison}
\end{table}


In [12]:
df = results_df[results_df['dataset'] == 'OldNewspapers']
data = df.loc[df.groupby('vectorizer')['accuracy'].idxmax()]
data = data[['vectorizer', 'accuracy', 'f1_score', 'base_head_model']].sort_values('f1_score', ascending=False)
print(to_latex_table(
    data,
    place_modifiers='H',
    separate_header=True,
    column_names=['vectorizer', 'accuracy', 'f1 score', 'base head model'],
    float_precission=2,
    caption='Accuracy i miara F1 dla różnych sposobów wektoryzacji artykułów z gazet',
    label='old_newspapers:vectorizer_comparison',
))

{}
\begin{table}[H]
	\centering
	\begin{tabular}{||c|c|c|c||}
		\hline
		vectorizer & accuracy & f1 score & base head model \\
		\hline\hline
		\hline
	\end{tabular}
	\caption{Accuracy i miara F1 dla różnych sposobów wektoryzacji artykułów z gazet}
	\label{tab:old_newspapers:vectorizer_comparison}
\end{table}


In [25]:
df = results_df
data = df.loc[df.groupby('dataset')['accuracy'].idxmax()]
data = data[['vectorizer', 'dataset', 'accuracy', 'f1_score', 'base_head_model']].sort_values('f1_score', ascending=False)
print(to_latex_table(
    data,
    place_modifiers='H',
    separate_header=True,
    column_names=['vectorizer', 'dataset', 'accuracy', 'f1 score', 'base head model'],
    float_precission=2,
    caption='Najlepsze wektoryzery dla każdego ze zbioru danych (zwględem accuracy)',
    label='best_vectorizer:classification',
))

\begin{table}[H]
	\centering
	\begin{tabular}{||c|c|c|c|c||}
		\hline
		vectorizer & dataset & accuracy & f1 score & base head model \\
		\hline\hline
		TfidfVectorizer5000 & StarWarsFanfic & 1.00 & 1.00 & MLP \\
		TfidfVectorizer1000 & Classics5Authors35Books & 0.93 & 0.91 & MLP \\
		CountVectorizer5000 & PrusVsSienkiewicz & 0.86 & 0.84 & RandomForest \\
		TfidfVectorizer5000 & WritingStyle & 0.72 & 0.69 & RandomForest \\
		FullMorphTagVectorizer & TwitterCyberbullying & 0.93 & 0.67 & MLP \\
		CountVectorizer5000 & EroticVsOthers & 0.66 & 0.66 & RandomForest \\
		HerbertVectorizer & OldNewspapers & 0.40 & 0.24 & MLP \\
		\hline
	\end{tabular}
	\caption{Najlepsze wektoryzery dla każdego ze zbioru danych (zwględem accuracy)}
	\label{tab:best_vectorizer:classification}
\end{table}
