# Pupose
The purpose of this notebook is to analyze the results of the grid hyperparameter output for the Entity Extraction model and process

# Import

## Packages

In [1]:
# General
import os

# Analysis
import numpy as np
import pandas as pd
from siuba import *

## Data

In [2]:
file_dir = "./../data/output/entity_extraction"
file_name_1 = "entity_extraction_evaluation_iter_1_20210305-210403.csv"
file_name_2 = "entity_extraction_evaluation_iter_2_20210308-132504.csv"
file_name_3 = "entity_extraction_evaluation_iter_3_20210309-105137.csv"

path_1 = os.path.join(file_dir, file_name_1)
path_2 = os.path.join(file_dir, file_name_2)
path_3 = os.path.join(file_dir, file_name_3)

df_1 = pd.read_csv(path_1)
df_2 = pd.read_csv(path_2)
df_3 = pd.read_csv(path_3)

# Iteration 1
## Results
The following are notes on the general effectiveness on each processing step / hyperparameter after inspecting the top configurations, based on the average f1 scores for Class 1 and 2.

* **Embedding**: Glove outperforms Fasttext, and both outperform using just an internal Text Vectorization layer
* **Stopwords**: Not analyzed here. Previous aborted attempt showed they did not help and in fact greatly reduced results.
* **Lemmatization**: Mixed bag. For Fasttext lemmatization alomst never improved performance. For Glove it's closer to 50/50.
* **Hidden Dimension**: Larger appears to be better. Worth including in a more detailed hyperparameter analysis.
* **LSTM Stack**: Mixed effectiveness in the top 20, but all top 5 use it. Might be worth exploring a LSTM stack with different hyper parameter dimensions.
* **Drop Out**: Very effective. Keep. Explore different dropout rates.
* **Sample Weights**: Sample weightes were not used in the vast majority of the top n results. Drop.
* **Trainable**: Keep. 


In [3]:
(df_1 
 >> mutate(
     precision_12_avg = (_.precision_1 + _.precision_2)/2,
     recall_12_avg = (_.recall_1 + _.recall_2)/2,
     f1_12_avg = (_.f1_1 + _.f1_2)/2
 )
 >> arrange(-(_.f1_12_avg))
 >> select(_.embedding, _["lemmatization":"accuracy"], _.precision_12_avg, 
           _.recall_12_avg, _.f1_12_avg)
).head(10)

Unnamed: 0,embedding,lemmatization,hidden_dim,lstm_stack,dropout,sample_weights,trainable,accuracy,precision_12_avg,recall_12_avg,f1_12_avg
90,glove,False,128,True,True,False,True,0.973665,0.881386,0.911387,0.896135
66,glove,True,64,True,True,False,True,0.973292,0.885991,0.905343,0.895545
74,glove,False,64,True,True,False,True,0.97354,0.907182,0.882731,0.894775
82,glove,True,128,True,True,False,True,0.973292,0.899277,0.888215,0.893695
58,glove,False,32,True,True,False,True,0.970435,0.886486,0.882838,0.884444
142,fasttext,False,128,False,True,False,True,0.970435,0.897719,0.868354,0.882468
126,fasttext,False,64,False,True,False,True,0.970186,0.891463,0.870404,0.88055
122,fasttext,False,64,True,True,False,True,0.969814,0.892478,0.868363,0.879688
124,fasttext,False,64,False,True,True,True,0.968571,0.882231,0.876359,0.87826
86,glove,True,128,False,True,False,True,0.970311,0.884244,0.870173,0.877147


# Iteration 2

## Results
The following are notes on the general effectiveness on each processing step / hyperparameter after inspecting the top configurations, based on the average f1 scores for Class 1 and 2.

* **Hidden Dimension - Layer 1**: 32
* **Hidden Dimension - Layer 2**: 128


In [4]:
(df_2
 >> mutate(
     precision_12_avg = (_.precision_1 + _.precision_2)/2,
     recall_12_avg = (_.recall_1 + _.recall_2)/2,
     f1_12_avg = (_.f1_1 + _.f1_2)/2
 )
 >> arrange(-(_.f1_12_avg))
 >> select(_.hidden_dim_1, _.hidden_dim_2, _.dropout_rate, _.precision_12_avg, 
           _.recall_12_avg, _.f1_12_avg)
).head(10)

Unnamed: 0,hidden_dim_1,hidden_dim_2,dropout_rate,precision_12_avg,recall_12_avg,f1_12_avg
34,32,128,0.5,0.921466,0.895759,0.908348
39,64,256,0.5,0.904174,0.895289,0.899637
47,256,256,0.5,0.907017,0.887532,0.897134
26,128,128,0.25,0.914451,0.87714,0.89527
46,256,128,0.5,0.904436,0.88526,0.894455
38,64,128,0.5,0.883134,0.902024,0.89238
19,32,256,0.25,0.910683,0.874531,0.891871
21,64,64,0.25,0.898767,0.885012,0.891803
25,128,64,0.25,0.913645,0.870413,0.891391
35,32,256,0.5,0.89588,0.886627,0.890975


# Iteration 3
* Time Distributed: True

In [5]:
df_3.info()

<class 'pandas.core.frame.DataFrame'>
RangeIndex: 4 entries, 0 to 3
Data columns (total 25 columns):
 #   Column            Non-Null Count  Dtype  
---  ------            --------------  -----  
 0   embedding         4 non-null      object 
 1   stop_words        4 non-null      bool   
 2   lemmatization     4 non-null      bool   
 3   hidden_dim_1      4 non-null      int64  
 4   hidden_dim_2      4 non-null      int64  
 5   lstm_stack        4 non-null      bool   
 6   dropout           4 non-null      bool   
 7   dropout_rate      4 non-null      float64
 8   sample_weights    4 non-null      bool   
 9   trainable         4 non-null      bool   
 10  optimizer         4 non-null      object 
 11  time_distributed  4 non-null      bool   
 12  accuracy          4 non-null      float64
 13  precision_0       4 non-null      float64
 14  precision_1       4 non-null      float64
 15  precision_2       4 non-null      float64
 16  precision_macro   4 non-null      float64
 17  r

In [6]:
(df_3
 >> mutate(
     precision_12_avg = (_.precision_1 + _.precision_2)/2,
     recall_12_avg = (_.recall_1 + _.recall_2)/2,
     f1_12_avg = (_.f1_1 + _.f1_2)/2
 )
 >> arrange(-(_.f1_12_avg))
 >> select(_.optimizer, _.time_distributed, _.precision_12_avg, 
           _.recall_12_avg, _.f1_12_avg)
).head(10)

Unnamed: 0,optimizer,time_distributed,precision_12_avg,recall_12_avg,f1_12_avg
2,rmsprop,True,0.893159,0.885695,0.889311
0,adam,True,0.898449,0.880352,0.888772
3,rmsprop,False,0.898814,0.868452,0.883177
1,adam,False,0.910132,0.857971,0.883136
