# File to run all experiments 

Libraries

In [None]:
import numpy as np
import pandas as pd
from ucimlrepo import fetch_ucirepo 
from experiment import Experiment

import warnings
warnings.simplefilter(action='ignore', category=FutureWarning)
warnings.simplefilter(action='ignore', category=UserWarning)


Datasets

In [None]:
# Datasets
dataset = {}

# AIDS Clinical trials group study
aids_clinical_trials_group_study_175 = fetch_ucirepo(id=890) 
X = aids_clinical_trials_group_study_175.data.features.to_numpy() 
y = aids_clinical_trials_group_study_175.data.targets.to_numpy()
dataset['aids'] = {'X': X, 'y': y}

# Studnets dropout and academic success
predict_students_dropout_and_academic_success = fetch_ucirepo(id=697) 
X = predict_students_dropout_and_academic_success.data.features.to_numpy() 
y = predict_students_dropout_and_academic_success.data.targets.to_numpy()
y = np.array([0 if yi == 'Dropout' else 1 for yi in y])
y = np.expand_dims(y, axis=1)  
dataset['students'] = {'X': X, 'y': y}

# Diabetes health indicators
# cdc_diabetes_health_indicators = fetch_ucirepo(id=891) 
# X = cdc_diabetes_health_indicators.data.features.to_numpy()
# y = cdc_diabetes_health_indicators.data.targets.to_numpy() 
# dataset['diabetes'] = {'X': X, 'y': y}

# TUNADROMD (Malware)
df = pd.read_csv('../datasets/TUANDROMD.csv')
df = df.dropna()
X = df.iloc[:, :-1].to_numpy()
y = df['Label'].to_numpy()
y = np.expand_dims(y, axis=1)  
dataset['malware'] = {'X': X, 'y': y}

Machine learning models and other experiment setup

In [None]:
# Machine Learning models
# models = ['LogisticRegression', 'SVC', 'RandomForestClassifier', 'GradientBoostingClassifier','DeepNeuralNetwork']
models = ['ResidualNeuralNetwork', 'MultiLayerNeuralNetwork', 'GradientBoostingClassifier', 'RandomForestClassifier', 'SVC', 'LogisticRegression']

# Experiment options
subsampling_options = [False, True]
feature_reduction_options = [False, True]

In [None]:
np.random.seed(0)

Execute experiment

In [None]:
metrics = []

for ds_name in dataset:
    X = dataset[ds_name]['X']
    y = dataset[ds_name]['y']
    for model in models:
        for ss_opt in subsampling_options:
            for fr_opt in feature_reduction_options:
                exp = Experiment(model, X, y, subsampling=ss_opt, feature_reduction=fr_opt)
                '''
                metrics.append({'Accuracy': exp.get_mesaure('accuracy'),
                       'Log Loss': exp.get_mesaure('log_loss'),
                       'Model size': exp.get_mesaure('model_size'),
                       'AUC': exp.get_mesaure('auc'),
                       'Training time': exp.get_mesaure('training_time'),
                       'Prediction time': exp.get_mesaure('prediction_time'),
                       'TPR': exp.get_mesaure('tpr'),
                       'FPR': exp.get_mesaure('fpr')})
                '''
                
                results = pd.DataFrame(exp.run())
                # acc = np.mean(exp.get_mesaure('accuracy'))
                # print(f'{ds_name}, {model}, {ss_opt}, {fr_opt}, {acc}')
                # Export results as CSV
                #results.to_csv(models[3], index=False)
                #with pd.ExcelWriter(f'{ds_name}') as excel_writer:
                results.to_csv(f'../results/{ds_name}_{ss_opt}_{fr_opt}_{model}.csv', index=False)
                


