In [3]:
import numpy as np
import pandas as pd
import altair as alt
import matplotlib.pyplot as plt
from sklearn import preprocessing
from sklearn.model_selection import train_test_split
from scipy.stats import chi2_contingency
from collections import Counter
from imblearn.over_sampling import SMOTE
import pickle

In [4]:
def model_result_df(_RESULTS):
    frames=[]
    _result = pd.DataFrame(_RESULTS)
    _columns=_result.columns.values
    for i in enumerate(_columns):
        name = i[1]
        _RESULT=_RESULTS[name]
        r = pd.DataFrame(_RESULT)
        r['section']=name
        frames.append(r)
    result = pd.concat(frames)
    result.reset_index(level=0, inplace=True)
    result=result.rename(columns={"index": "name"})
    melted_result=pd.melt(result, id_vars=['name','section'], value_vars=['train','validation','test'], var_name='type', value_name='result')
    return melted_result

In [34]:
with open('EARLY_FUSION_RESULTS.dictionary', 'rb') as EARLY_FUSION_RESULTS_file:
    EARLY_FUSION_RESULTS = pickle.load(EARLY_FUSION_RESULTS_file)  
demo={}
demo['EARLY-FUSION-IMBALANCED']=EARLY_FUSION_RESULTS['EAIRLY-FUSION-IMBALANCED']
demo['EARLY-FUSION-BALANCED']=EARLY_FUSION_RESULTS['EAIRLY-FUSION-BALANCED']
EARLY_FUSION_RESULTS=demo

In [35]:
with open('LATE_FUSION_RESULTS.dictionary', 'rb') as LATE_FUSION_RESULTS_file:
    LATE_FUSION_RESULTS = pickle.load(LATE_FUSION_RESULTS_file)

In [36]:
with open('ENSEMBLE_RESULTS.dictionary', 'rb') as ENSEMBLE_RESULTS_file:
    ENSEMBLE_RESULTS = pickle.load(ENSEMBLE_RESULTS_file)

In [175]:
with open('SVM_ML_RESULTS.dictionary', 'rb') as SVM_ML_RESULTS_file:
    SVM_ = pickle.load(SVM_ML_RESULTS_file)
# for the purpose of variable correction 
SVM_RESULTS={}
_IMBALANCED=SVM_['ML-IMBALANCED']
IMBALANCED={}
IMBALANCED['validation'] = _IMBALANCED['valdation']
IMBALANCED['test'] = _IMBALANCED['test']
IMBALANCED['train'] = _IMBALANCED['train']
_BALANCED=SVM_['ML-BALANCED']
BALANCED={}
BALANCED['validation'] = _BALANCED['valdation']
BALANCED['test'] = _BALANCED['test']
BALANCED['train'] = _BALANCED['train']
SVM_RESULTS['ML-IMBALANCED']=IMBALANCED
SVM_RESULTS['ML-BALANCED']=BALANCED

In [176]:
EXTENDED_RESULT={}
EXTENDED_RESULT['EARLY_FUSION_RESULTS']=EARLY_FUSION_RESULTS
EXTENDED_RESULT['LATE_FUSION_RESULTS']=LATE_FUSION_RESULTS
EXTENDED_RESULT['ENSEMBLE_RESULTS']=ENSEMBLE_RESULTS
EXTENDED_RESULT['SVM_RESULTS']=SVM_RESULTS

# EARLY FUSION RESULTS

In [177]:
EARLY_FUSION_IMBALANCED=EARLY_FUSION_RESULTS['EARLY-FUSION-IMBALANCED']
EARLY_FUSION_BALANCED=EARLY_FUSION_RESULTS['EARLY-FUSION-BALANCED']
pd.DataFrame(EARLY_FUSION_IMBALANCED)

Unnamed: 0,train,validation,test
loss,0.296599,0.517469,0.811109
accuracy,0.909967,0.844416,0.764784
precision,0.962909,0.900108,0.835349
recall,0.874607,0.808769,0.715902
auc,0.997221,0.986933,0.969124
prc,0.98018,0.937054,0.869563


In [178]:
pd.DataFrame(EARLY_FUSION_BALANCED)

Unnamed: 0,train,validation,test
loss,0.604176,0.686126,0.882458
accuracy,0.798118,0.767297,0.73608
precision,0.894093,0.857449,0.800204
recall,0.707409,0.675392,0.714402
auc,0.985228,0.979387,0.968741
prc,0.907073,0.87814,0.859501


In [179]:
early_melted_result=model_result_df(EARLY_FUSION_RESULTS)
alt.Chart(early_melted_result, title="Result on type").mark_bar().encode(
  y='type',
  x='result',
  color='type',
    row='name',
    column='section',
)

In [180]:
alt.Chart(early_melted_result, title="Result on section").mark_bar().encode(
  y='section',
  x='result',
  color='type',
  row='name',
  column='type',
)

In [181]:
alt.Chart(early_melted_result, title="Result on combined").mark_bar().encode(
  y='section',
  x='result',
  color='type',
  row='name',
)

# LATE FUSION RESULTS

In [182]:
LATE_FUSION_IMBALANCED=LATE_FUSION_RESULTS['LATE-FUSION-IMBALANCED']
LATE_FUSION_BALANCED=LATE_FUSION_RESULTS['LATE-FUSION-BALANCED']

In [183]:
pd.DataFrame(LATE_FUSION_IMBALANCED)

Unnamed: 0,train,validation,test
loss,0.442941,0.512082,0.731082
accuracy,0.859273,0.839254,0.770983
precision,0.935438,0.914913,0.860402
recall,0.81459,0.792069,0.69504
auc,0.992989,0.989408,0.976305
prc,0.954687,0.938427,0.882258


In [147]:
pd.DataFrame(LATE_FUSION_BALANCED)

Unnamed: 0,train,validation,test
loss,0.79385,0.822972,0.830269
accuracy,0.733125,0.722568,0.732025
precision,0.878941,0.866446,0.852393
recall,0.600801,0.591162,0.687734
auc,0.973481,0.97092,0.970703
prc,0.851012,0.839967,0.85927


In [148]:
late_melted_result=model_result_df(LATE_FUSION_RESULTS)

In [149]:
alt.Chart(late_melted_result, title="Result on type").mark_bar().encode(
  y='type',
  x='result',
  color='type',
    row='name',
    column='section',
)

In [150]:
alt.Chart(late_melted_result, title="Result on section").mark_bar().encode(
  y='section',
  x='result',
  color='type',
  row='name',
  column='type',
)

In [151]:
alt.Chart(late_melted_result, title="Result on combined").mark_bar().encode(
  y='section',
  x='result',
  color='type',
  row='name',
)

# ENSENBLE MODEL RESULTS

In [152]:
ENSENBLE_IMBALANCED=ENSEMBLE_RESULTS['ENSEMBLE-MODEL-IMBALANCED']
ENSENBLE_BALANCED=ENSEMBLE_RESULTS['ENSEMBLE-MODEL-BALANCED']
pd.DataFrame(ENSENBLE_IMBALANCED)

Unnamed: 0,train,validation,test
loss,0.455471,0.489516,0.675531
accuracy,0.836727,0.830935,0.768491
precision,0.917558,0.908865,0.859244
recall,0.785577,0.776401,0.681814
auc,0.991211,0.989116,0.978506
prc,0.942621,0.934838,0.886651


In [153]:
pd.DataFrame(ENSENBLE_BALANCED)

Unnamed: 0,train,validation,test
loss,0.737274,0.812366,0.997507
accuracy,0.771774,0.747905,0.720659
precision,0.88748,0.85963,0.813535
recall,0.663095,0.639797,0.695171
auc,0.976537,0.970232,0.960362
prc,0.87864,0.853683,0.841063


In [154]:
ensenble_melted_result=model_result_df(ENSEMBLE_RESULTS)
alt.Chart(ensenble_melted_result, title="Result on type").mark_bar().encode(
  y='type',
  x='result',
  color='type',
    row='name',
    column='section',
)


In [155]:
alt.Chart(ensenble_melted_result, title="Result on section").mark_bar().encode(
  y='section',
  x='result',
  color='type',
  row='name',
  column='type',
)


In [156]:
alt.Chart(ensenble_melted_result, title="Result on train").mark_bar().encode(
  y='section',
  x='result',
  color='type',
  row='name',
)

In [185]:
SVM_IMBALANCED=SVM_RESULTS['ML-IMBALANCED']
pd.DataFrame(SVM_IMBALANCED)

Unnamed: 0,validation,test,train
loss,0.457629,0.475231,0.44944
accuracy,0.483391,0.498275,0.491369
precision,0.559045,0.558058,0.55913
recall,0.519099,0.510086,0.5218


In [186]:
SVM_BALANCED=SVM_RESULTS['ML-BALANCED']
pd.DataFrame(SVM_BALANCED)

Unnamed: 0,validation,test,train
loss,0.789054,0.632332,0.791628
accuracy,0.125716,0.332136,0.123199
precision,0.44987,0.500314,0.449236
recall,0.415757,0.460358,0.414921


In [188]:
svm_melted_result=model_result_df(SVM_RESULTS)
alt.Chart(svm_melted_result, title="Result on type").mark_bar().encode(
  y='type',
  x='result',
  color='type',
    row='name',
    column='section',
)

In [189]:
alt.Chart(svm_melted_result, title="Result on section").mark_bar().encode(
  y='section',
  x='result',
  color='type',
  row='name',
  column='type',
)

In [197]:
alt.Chart(svm_melted_result, title="Result on combined").mark_bar().encode(
  y='section',
  x='result',
  color='type',
  row='name',
)

# COMBINED RESULTS

In [191]:
frames=[]
_result = pd.DataFrame(EXTENDED_RESULT)
_columns=_result.columns.values
for i in enumerate(_columns):
    name = i[1]
    RESULT=EXTENDED_RESULT[name]
    r=model_result_df(RESULT)
    r['group']=name
    frames.append(r)
extended_melted_result = pd.concat(frames)
extended_melted_result

Unnamed: 0,name,section,type,result,group
0,loss,EARLY-FUSION-IMBALANCED,train,0.296599,EARLY_FUSION_RESULTS
1,accuracy,EARLY-FUSION-IMBALANCED,train,0.909967,EARLY_FUSION_RESULTS
2,precision,EARLY-FUSION-IMBALANCED,train,0.962909,EARLY_FUSION_RESULTS
3,recall,EARLY-FUSION-IMBALANCED,train,0.874607,EARLY_FUSION_RESULTS
4,auc,EARLY-FUSION-IMBALANCED,train,0.997221,EARLY_FUSION_RESULTS
...,...,...,...,...,...
19,recall,ML-IMBALANCED,test,0.510086,SVM_RESULTS
20,loss,ML-BALANCED,test,0.632332,SVM_RESULTS
21,accuracy,ML-BALANCED,test,0.332136,SVM_RESULTS
22,precision,ML-BALANCED,test,0.500314,SVM_RESULTS


In [195]:
alt.Chart(extended_melted_result, title="Result on section").mark_bar().encode(
  y='section',
  x='result',
  color='type',
  row='name',
)

In [193]:
alt.Chart(extended_melted_result, title="Result on section").mark_bar().encode(
  y='section',
  x=alt.X('result', scale=alt.Scale(domain=[0, 1])),
  color='type',
  row='name',
  column='type',
)

Unnamed: 0,train,valdation,test
loss,0.44944,0.457629,0.475231
accuracy,0.491369,0.483391,0.498275
precision,0.55913,0.559045,0.558058
recall,0.5218,0.519099,0.510086


Unnamed: 0,train,valdation,test
loss,0.791628,0.789054,0.632332
accuracy,0.123199,0.125716,0.332136
precision,0.449236,0.44987,0.500314
recall,0.414921,0.415757,0.460358


{'train': {'loss': 0.4494395101090615, 'accuracy': 0.4913687087223867, 'precision': 0.5591303988299198, 'recall': 0.5217996406791671}, 'valdation': {'loss': 0.4576290628394836, 'accuracy': 0.4833910244731888, 'precision': 0.5590445045384986, 'recall': 0.5190988036679419}, 'test': {'loss': 0.47523080422972636, 'accuracy': 0.4982747902658249, 'precision': 0.558058008604541, 'recall': 0.5100860608756651}}
{'train': {'loss': 0.7916283783783783, 'accuracy': 0.12319932432432433, 'precision': 0.44923585041130737, 'recall': 0.41492117117117117}, 'valdation': {'loss': 0.789054054054054, 'accuracy': 0.1257162162162162, 'precision': 0.44987040624232666, 'recall': 0.4157567567567568}, 'test': {'loss': 0.6323316661058548, 'accuracy': 0.3321356670715919, 'precision': 0.5003136257173089, 'recall': 0.46035819726513855}}


Unnamed: 0,name,section,type,result
0,loss,ML-IMBALANCED,train,0.44944
1,accuracy,ML-IMBALANCED,train,0.491369
2,precision,ML-IMBALANCED,train,0.55913
3,recall,ML-IMBALANCED,train,0.5218
4,loss,ML-BALANCED,train,0.791628
5,accuracy,ML-BALANCED,train,0.123199
6,precision,ML-BALANCED,train,0.449236
7,recall,ML-BALANCED,train,0.414921
8,loss,ML-IMBALANCED,valdation,0.457629
9,accuracy,ML-IMBALANCED,valdation,0.483391


In [165]:
alt.Chart(melted_result, title="Result on type").mark_bar().encode(
  y='type',
  x=alt.X('result', scale=alt.Scale(domain=[0, 1])),
  color='type',
    row='name',
    column='section',
)