In [1]:
import pandas as pd
import seaborn as sns
from settings import BASE_DIR
from pandas_profiling import ProfileReport
import json

def load_configs(json_file, interested_model=None):
    x = open("{BaseDir}/TwiiterSentimentAnalysis/{File}".format(BaseDir=BASE_DIR, File=json_file), 'r').read().replace("\n","").replace("\t"," ").split('}',)
    x = [y+'}' for y in x]
    jsons = [json.loads(y) for y in x[:-1]]
    if interested_model != None:
        jsons = [json for json in jsons if json["model"] == interested_model]
    jsons = pd.DataFrame(jsons)
    return jsons

mnb_model_params = load_configs(json_file="RandomSearchModelResults.json")
mnb_model_params[["embedding", "model"]] = mnb_model_params.model.str.split('(TFIDF|W2V)', expand=True).drop(0,axis=1)

In [2]:
sns.set(rc={"figure.figsize":(10, 10)})
graph = sns.scatterplot(
    x='mean_score_time',
    y='mean_test_score',
    size='model',
    sizes=(200,200),
    data=mnb_model_params,
    hue='model',
    style='embedding'
)

graph.legend(loc='center left', bbox_to_anchor=(1, 0.5))

<matplotlib.legend.Legend at 0x7f9b7e03c670>

In [5]:
sns.set(rc={"figure.figsize":(10, 10)})
graph = sns.scatterplot(
    x='mean_score_time',
    y='mean_test_score',
    size='model',
    sizes=(200,200),
    data=mnb_model_params.query('mean_test_score > 0.75'),
    hue='model',
    style='embedding'
)

graph.legend(loc='center left', bbox_to_anchor=(1, 0.5))

<matplotlib.legend.Legend at 0x7f46dc47b3a0>

In [6]:
sns.set(rc={"figure.figsize":(10, 10)})
graph = sns.scatterplot(
    x='mean_score_time',
    y='mean_test_score',
    size='model',
    sizes=(200,200),
    data=mnb_model_params.query('mean_score_time < 0.5'),
    hue='model',
    style='embedding'
)

graph.legend(loc='center left', bbox_to_anchor=(1, 0.5))

<matplotlib.legend.Legend at 0x7f47340f7f10>

In [7]:
sns.set(rc={"figure.figsize":(20, 20)})
graph = sns.boxplot(x='model', y='mean_test_score', data=mnb_model_params, hue='embedding')
graph.legend(loc='center left', bbox_to_anchor=(1, 0.5))

<matplotlib.legend.Legend at 0x7f46db758f40>

In [8]:
sns.set(rc={"figure.figsize":(10, 10)})
graph = sns.boxplot(x='model', y='mean_test_score', data=mnb_model_params.query('mean_score_time < 0.5'), hue='embedding')
graph.legend(loc='center left', bbox_to_anchor=(1, 0.5))

<matplotlib.legend.Legend at 0x7f46db66b730>

In [9]:
mnb_model_params.query("model == 'LinearSVC' and embedding == 'TFIDF'")[["mean_score_time", "mean_test_score"]]

Unnamed: 0,mean_score_time,mean_test_score
1,0.256285,0.780779
6,0.226729,0.771621
7,0.307125,0.773958
10,0.313168,0.776779
14,0.302381,0.771979
20,0.242842,0.7704
25,0.227667,0.777958
31,0.267479,0.773011
32,0.247774,0.773768
34,0.24299,0.774084


In [10]:
mnb_model_params.query("model == 'LogisticRegression' and embedding == 'TFIDF'")[["mean_score_time", "mean_test_score"]]

Unnamed: 0,mean_score_time,mean_test_score
0,0.233022,0.768168
2,0.251969,0.779642
5,0.227926,0.764316
9,0.28701,0.772611
15,0.251577,0.763874
16,0.265554,0.777726
21,0.227624,0.776674
26,0.222484,0.7804
29,0.216716,0.767179
30,0.24963,0.760379


In [16]:
lsv_param = mnb_model_params.query("model == 'LinearSVC' and embedding == 'TFIDF' and mean_test_score > 0.75").reset_index(drop=True).drop(['model', 'mean_test_score', 'mean_train_score', 'mean_score_time', 'mean_fit_time', 'embedding'], axis=1).dropna(axis=1)
lsv_param

Unnamed: 0,model__C,model__max_iter,model__ngram_range,model__penalty,model__tfidf_max_features,model__tol,model__dual,model__intercept_scaling
0,0.587321,481.0,"[1, 2]",l2,26221901.0,1.5e-05,False,0.220479
1,1.253856,568.0,"[1, 2]",l1,25968167.0,0.000784,False,2.223324
2,1.412873,1036.0,"[1, 2]",l2,35938305.0,0.000637,False,9.511401
3,0.989335,1591.0,"[1, 2]",l2,37202359.0,0.000996,False,8.298111
4,1.833503,572.0,"[1, 2]",l2,28401462.0,0.000434,False,3.589749
5,1.799315,1688.0,"[1, 2]",l2,19863308.0,5.7e-05,False,7.125658
6,0.488782,925.0,"[1, 2]",l1,6614237.0,0.000748,False,1.362973
7,2.071368,1391.0,"[1, 2]",l2,13935123.0,0.000457,False,3.168257
8,1.834132,1585.0,"[1, 2]",l2,42564824.0,0.000972,False,8.062331
9,1.363665,177.0,"[1, 2]",l2,32879502.0,0.000129,False,9.221108


In [18]:
lreg_params = mnb_model_params.query("model == 'LogisticRegression' and embedding == 'TFIDF' and mean_test_score > 0.75").reset_index(drop=True).drop(['model', 'mean_test_score', 'mean_train_score', 'mean_score_time', 'mean_fit_time', 'embedding'], axis=1).dropna(axis=1)
lreg_params

Unnamed: 0,model__C,model__fit_intercept,model__l1_ratio,model__max_iter,model__ngram_range,model__penalty,model__solver,model__tfidf_max_features,model__tol
0,0.81092,True,0.724949,288.0,"[1, 2]",none,saga,31729469.0,0.000309
1,1.595602,True,0.957831,223.0,"[1, 2]",l2,saga,16122548.0,0.000119
2,0.155817,False,0.773053,557.0,"[1, 2]",none,saga,7262825.0,0.000925
3,0.576577,True,0.006189,221.0,"[1, 2]",l2,saga,11428081.0,0.00092
4,0.846707,False,0.398087,172.0,"[1, 2]",l1,saga,37194216.0,9.7e-05
5,1.741549,False,0.056185,560.0,"[1, 2]",l2,saga,5414910.0,0.000506
6,1.473912,True,0.894401,109.0,"[1, 2]",l1,saga,710416.0,0.00096
7,1.649408,True,0.027382,923.0,"[1, 2]",elasticnet,saga,48298769.0,0.000166
8,0.355336,False,0.008587,404.0,"[1, 2]",elasticnet,saga,19957998.0,0.000696
9,0.638954,False,0.483996,555.0,"[1, 2]",l1,saga,33863904.0,0.000335


In [19]:
ProfileReport(lsv_param)

Summarize dataset:   0%|          | 0/5 [00:00<?, ?it/s]

Generate report structure:   0%|          | 0/1 [00:00<?, ?it/s]

Render HTML:   0%|          | 0/1 [00:00<?, ?it/s]



In [76]:
ProfileReport(lreg_params)

Summarize dataset:   0%|          | 0/5 [00:00<?, ?it/s]

Generate report structure:   0%|          | 0/1 [00:00<?, ?it/s]

Render HTML:   0%|          | 0/1 [00:00<?, ?it/s]

