# 1. Imports + Dataset

In [21]:
from utils import *
import random
import numpy as np
from copy import deepcopy as cp

import xgboost as xgb

from sklearn.svm import SVC
from sklearn.preprocessing import LabelEncoder
from sklearn.metrics import precision_score, recall_score
from sklearn.ensemble import RandomForestClassifier
from sklearn.discriminant_analysis import LinearDiscriminantAnalysis
from sklearn.preprocessing import scale
from sklearn.model_selection import RepeatedStratifiedKFold

from imblearn.metrics import specificity_score

import tensorflow as tf

import warnings
warnings.filterwarnings("ignore")

random.seed(1032021)
np.random.seed(1032021)

tf.random.set_seed(1032021)

# 2. Loading The Dataset

In [23]:
ds = pd.read_excel("# 1. Imports + Datasetdata4/07_14_22_data.xls",
                    sheet_name="All BC").T
ds = ds.iloc[1:, :]
ds = ds.drop(["LN Equinox", "Mystery"], axis=0)
ds = ds.reset_index(drop=True)

# the label of the sample
y = ds[0]
label_encoder = LabelEncoder()
y_num = pd.DataFrame(label_encoder.fit_transform(y))

ds = ds.iloc[:, 1:]

# normalize the dataset
x = ds.copy()
x.columns = list(range(x.shape[1]))
x = scale(ds, axis=1)
x = pd.DataFrame(x)

# only keep the peaks
peaks = [198, 262, 1506, 1669, 1967, 4564, 4708, 4760, 4970]
x_peaks = x.iloc[:, peaks]


# 3. Inference


In [24]:

num_splits = 5
num_repeats = 3
kfold = RepeatedStratifiedKFold(n_splits=num_splits,
                                n_repeats=num_repeats,
                                random_state=1032021)

num_splits *= num_repeats

## 3.1 Load the Models

In [25]:

# pca_lda
lda = LinearDiscriminantAnalysis()

# SVM
svm = SVC(C=1000,
          gamma=0.01,
          kernel='rbf')

# RandomForestClassifier
forest = RandomForestClassifier(max_depth=4,
                                max_features='sqrt',
                                min_samples_leaf= 1,
                                n_estimators=300,
                                random_state=1032021)

# XGBoost
xgboost = xgb.XGBClassifier(random_state=1032021,
                            num_class=4,
                            learning_rate=0.001,
                            gamma=1,
                            max_depth=2,
                            n_estimators=20)

# MLP
mlp = create_model(num_neurons=128,
                   drop_out_rate=0.3,
                   input_shape=x_peaks.shape,
                   output_shape=4)

## 3.2 Evaluation

In [27]:
kfold_results, evaluation = kfold_cross_validation(x=x_peaks,
                                                     y=y,
                                                     forest=forest,
                                                     svm=svm,
                                                     xgb=xgboost,
                                                     mlp=mlp
                                                     pca_lda=lda,
                                                     pca_idx=3,
                                                     kfold=kfold, 
                                                     mlp_folder="mlp_model/")

TRAIN: [ 0  1  2  3  4  6  7  9 10 11 12 14 15 16 17 19 20 23 24 25 26 27 28 29
 31] TEST: [ 5  8 13 18 21 22 30]
TRAIN: [ 0  1  2  4  5  6  8  9 10 11 13 14 15 16 17 18 19 20 21 22 23 24 27 29
 30] TEST: [ 3  7 12 25 26 28 31]
TRAIN: [ 2  3  4  5  6  7  8  9 12 13 14 15 16 18 19 20 21 22 23 24 25 26 27 28
 30 31] TEST: [ 0  1 10 11 17 29]
TRAIN: [ 0  1  3  4  5  7  8  9 10 11 12 13 14 15 16 17 18 20 21 22 25 26 28 29
 30 31] TEST: [ 2  6 19 23 24 27]
TRAIN: [ 0  1  2  3  5  6  7  8 10 11 12 13 17 18 19 21 22 23 24 25 26 27 28 29
 30 31] TEST: [ 4  9 14 15 16 20]
TRAIN: [ 0  2  3  4  6  8 10 11 12 14 15 16 17 18 19 20 21 22 23 24 25 26 27 28
 31] TEST: [ 1  5  7  9 13 29 30]
TRAIN: [ 1  2  3  5  6  7  8  9 10 12 13 15 16 17 18 21 22 23 24 25 27 28 29 30
 31] TEST: [ 0  4 11 14 19 20 26]
TRAIN: [ 0  1  2  3  4  5  6  7  9 10 11 12 13 14 15 16 18 19 20 21 23 24 26 27
 29 30] TEST: [ 8 17 22 25 28 31]
TRAIN: [ 0  1  2  4  5  6  7  8  9 11 12 13 14 15 17 19 20 22 23 24 25 26 28 29
 30 31] 

In [29]:
visualize_results(evaluation,y)

Unnamed: 0,Unnamed: 1,AE,AE & CT,CT,ECR
pca_lda,specificity,93.33 +- 9.76,100.0 +- 0.0,100.0 +- 0.0,100.0 +- 0.0
pca_lda,sensitivity,100.0 +- 0.0,80.0 +- 31.62,100.0 +- 0.0,100.0 +- 0.0
pca_lda,precision,84.44 +- 23.12,93.33 +- 25.82,100.0 +- 0.0,100.0 +- 0.0
svm,specificity,96.0 +- 8.28,96.67 +- 6.9,100.0 +- 0.0,100.0 +- 0.0
svm,sensitivity,90.0 +- 20.7,90.0 +- 20.7,100.0 +- 0.0,100.0 +- 0.0
svm,precision,90.0 +- 20.7,90.0 +- 20.7,100.0 +- 0.0,100.0 +- 0.0
forest,specificity,96.0 +- 8.28,91.44 +- 9.7,100.0 +- 0.0,100.0 +- 0.0
forest,sensitivity,73.33 +- 32.0,90.0 +- 20.7,100.0 +- 0.0,100.0 +- 0.0
forest,precision,86.67 +- 29.68,76.67 +- 25.82,100.0 +- 0.0,100.0 +- 0.0
xgb,specificity,93.33 +- 9.76,97.33 +- 10.33,94.44 +- 11.64,98.15 +- 5.56
