In [1]:
from experiment import Experiment
from ucimlrepo import fetch_ucirepo 

In [2]:
import numpy as np
np.random.seed(0)

In [3]:
# Machine Learning models
models = ['LogisticRegression', 'SVC', 'RandomForestClassifier', 'GradientBoostingClassifier','DeepNeuralNetwork']

# Dataset 1: AIDS Clinical Trials Group Study 175

In [None]:
# fetch dataset 
aids_clinical_trials_group_study_175 = fetch_ucirepo(id=890) 
  
# data (as pandas dataframes) 
X = aids_clinical_trials_group_study_175.data.features.to_numpy() 
y = aids_clinical_trials_group_study_175.data.targets.to_numpy()

In [None]:
X.shape, y.shape

In [None]:
results_exp1 = []

for i in range (0,len(models)):
    exp1 = Experiment(models[i], X, y, subsampling=True, feature_reduction=True)
    result = exp1.run()
    results_exp1.append({'Accuracy': exp1.get_mesaure('accuracy'),
           'Log Loss': exp1.get_mesaure('log_loss'),
           'Model size': exp1.get_mesaure('model_size'),
           'AUC': exp1.get_mesaure('auc'),
           'Training time': exp1.get_mesaure('training_time'),
           #'Prediction time': exp1.get_mesaure('prediction_time'),
           'TPR': exp1.get_mesaure('tpr'),
           'FPR': exp1.get_mesaure('fpr')})



'''
exp1 = Experiment(models[0], X, y, subsampling=True, feature_reduction=True)

results1 = exp1.run()

np.mean(exp1.get_mesaure('accuracy'))
'''

In [None]:
results_exp1['Accuracy']

# Dataset 2: Predict students' dropout and academic success

In [None]:
# fetch dataset 
predict_students_dropout_and_academic_success = fetch_ucirepo(id=697) 
  
# data (as pandas dataframes) 
X = predict_students_dropout_and_academic_success.data.features.to_numpy() 
y = predict_students_dropout_and_academic_success.data.targets.to_numpy()
y = np.array([0 if yi == 'Dropout' else 1 for yi in y])
y = np.expand_dims(y, axis=1)

In [None]:
X.shape, y.shape

In [None]:
exp2 = Experiment(models[0], X, y, subsampling=True, feature_reduction=True)

results2 = exp2.run()

np.mean(exp2.get_mesaure('accuracy'))

# Dataset 3: CDC Diabetes Health Indicators

In [None]:
# fetch dataset 
cdc_diabetes_health_indicators = fetch_ucirepo(id=891) 
  
# data (as pandas dataframes) 
X = cdc_diabetes_health_indicators.data.features.to_numpy()
y = cdc_diabetes_health_indicators.data.targets.to_numpy() 

In [None]:
exp3 = Experiment(models[0], X, y, subsampling=True, feature_reduction=True)

results3 = exp3.run()

np.mean(exp3.get_mesaure('accuracy'))

# Dataset 4: Secondary Mushroom Dataset

In [2]:
import pandas as pd
df = pd.read_csv('../datasets/MushroomDataset/secondary_data.csv', sep=';')
df.dropna(axis=0)
df.dropna(axis=1)
print(df.shape)
df.head()


(61069, 21)


Unnamed: 0,class,cap-diameter,cap-shape,cap-surface,cap-color,does-bruise-or-bleed,gill-attachment,gill-spacing,gill-color,stem-height,...,stem-root,stem-surface,stem-color,veil-type,veil-color,has-ring,ring-type,spore-print-color,habitat,season
0,p,15.26,x,g,o,f,e,,w,16.95,...,s,y,w,u,w,t,g,,d,w
1,p,16.6,x,g,o,f,e,,w,17.99,...,s,y,w,u,w,t,g,,d,u
2,p,14.07,x,g,o,f,e,,w,17.8,...,s,y,w,u,w,t,g,,d,w
3,p,14.17,f,h,e,f,e,,w,15.77,...,s,y,w,u,w,t,p,,d,w
4,p,14.64,x,h,o,f,e,,w,16.53,...,s,y,w,u,w,t,p,,d,w


In [None]:
# Data visualisation

In [None]:
import pandas as pd

accuracy = [
    np.mean(exp1.get_mesaure('accuracy')),
    np.mean(exp2.get_mesaure('accuracy')),
    np.mean(exp3.get_mesaure('accuracy'))
]


logloss = [
    np.mean(exp1.get_mesaure('log_loss')),
    np.mean(exp2.get_mesaure('log_loss')),
    np.mean(exp3.get_mesaure('log_loss'))
]

modelsize = [
    np.mean(exp1.get_mesaure('model_size')),
    np.mean(exp2.get_mesaure('model_size')),
    np.mean(exp3.get_mesaure('model_size'))
]

AUC = [
    np.mean(exp1.get_mesaure('auc')),
    np.mean(exp2.get_mesaure('auc')),
    np.mean(exp3.get_mesaure('auc'))
]

traintime = [
    np.mean(exp1.get_mesaure('training_time')),
    np.mean(exp2.get_mesaure('training_time')),
    np.mean(exp3.get_mesaure('training_time'))
]

'''
predtime = [
    np.mean(exp1.get_mesaure('prediction_time')),
    np.mean(exp2.get_mesaure('prediction_time')),
    np.mean(exp3.get_mesaure('prediction_time'))
]

'''
TPR = [
    np.mean(exp1.get_mesaure('tpr')),
    np.mean(exp2.get_mesaure('tpr')),
    np.mean(exp3.get_mesaure('tpr'))
]

FPR = [
    np.mean(exp1.get_mesaure('fpr')),
    np.mean(exp2.get_mesaure('fpr')),
    np.mean(exp3.get_mesaure('fpr'))

]
energy = [
    np.mean(exp1.get_mesaure('training_time'))*np.mean(exp1.get_mesaure('model_size')),
    np.mean(exp2.get_mesaure('training_time'))*np.mean(exp2.get_mesaure('model_size')),
    np.mean(exp3.get_mesaure('training_time'))*np.mean(exp3.get_mesaure('model_size'))
]

'''
accuracy = [
    exp1.get_mesaure('accuracy'),
    exp2.get_mesaure('accuracy'),
    exp3.get_mesaure('accuracy')
]


logloss = [
    exp1.get_mesaure('log_loss'),
    exp2.get_mesaure('log_loss'),
    exp3.get_mesaure('log_loss')
]

modelsize = [
    exp1.get_mesaure('model_size'),
    exp2.get_mesaure('model_size'),
    exp3.get_mesaure('model_size')
]

AUC = [
    exp1.get_mesaure('auc'),
    exp2.get_mesaure('auc'),
    exp3.get_mesaure('auc')
]

traintime = [
    exp1.get_mesaure('training_time'),
    exp2.get_mesaure('training_time'),
    exp3.get_mesaure('training_time')
]


predtime = [
    exp1.get_mesaure('prediction_time'),
    exp2.get_mesaure('prediction_time'),
    exp3.get_mesaure('prediction_time')
]


TPR = [
    exp1.get_mesaure('tpr'),
    exp2.get_mesaure('tpr'),
    exp3.get_mesaure('tpr')
]

FPR = [
    exp1.get_mesaure('fpr'),
    exp2.get_mesaure('fpr'),
    exp3.get_mesaure('fpr')

]

energy = [
    np.mean(exp1.get_mesaure('training_time'))*np.mean(exp1.get_mesaure('model_size')),
    np.mean(exp2.get_mesaure('training_time'))*np.mean(exp2.get_mesaure('model_size')),
    np.mean(exp3.get_mesaure('training_time'))*np.mean(exp3.get_mesaure('model_size'))
]
'''

In [None]:
import plotly.graph_objects as px
from plotly.subplots import make_subplots


X = ['Exp 1', 'Exp 2', 'Exp 3']
fig = make_subplots(rows=2,cols=3)

fig.add_trace(px.Bar(y=accuracy, x=X, name="Accuracy"), 
              row=1, col=1)
fig.add_trace(px.Bar(y=logloss, x=X, name="Log Loss"),
              row=1, col=2)
fig.add_trace(px.Bar(y=AUC, x=X, name="AUC"),
              row=1, col=3)
fig.add_trace(px.Bar(y=modelsize, x=X, name="Model size"),
              row=2, col=1)
fig.add_trace(px.Bar(y=traintime, x=X, name="Training time"),
              row=2, col=2)
fig.add_trace(px.Bar(y=energy, x=X, name="Energy [B*s]"),
              row=2, col=3)

'''
X = ['Exp 1', 'Exp 2', 'Exp 3']
fig = make_subplots(rows=1,cols=3)

fig.add_trace(px.Box(y=accuracy, x=X, name="Accuracy", boxpoints="all"), 
              row=1, col=1)
fig.add_trace(px.Box(y=logloss, x=X, name="Log Loss"),
              row=1, col=2)
fig.add_trace(px.Box(y=AUC, x=X, name="AUC"),
              row=1, col=3)


fig.add_trace(px.Box(y=modelsize, x=X, name="Model size"),
              row=2, col=1)
fig.add_trace(px.Box(y=traintime, x=X, name="Training time"),
              row=2, col=2)
fig.add_trace(px.Box(y=energy, x=X, name="Energy [B*s]"),
              row=2, col=3)

'''



fig.show()

In [None]:
import plotly.graph_objects as px
from plotly.subplots import make_subplots

#fig = make_subplots(rows=2, cols=3,specs=[[{"type": "box"}, {"type": "box"}, {"type": "box"}], [{"type": "box"}, {"type": "box"}, {"type": "box"}]])

fig = make_subplots(rows=1,cols=3)

accuracy = []
logloss = []
auc = []

for i in range (0,len(models)):
    accuracy.append(np.mean(results_exp1[i]['Accuracy']))
    logloss.append(np.mean(results_exp1[i]['Log Loss']))
    auc.append(np.mean(results_exp1[i]['AUC']))




fig.add_box(y=accuracy, x=models, name='Accuracy',
              row=1, col=1)
fig.add_box(y=logloss, x=models, name='Log Loss',
              row=1, col=2)
fig.add_box(y=auc, x=models, name='AUC',
              row=1, col=3)

fig.show()
