In [1]:
import pandas as pd
import numpy as np
import matplotlib.pyplot as plt
import seaborn as sns

In [2]:
from math import sqrt
from sklearn.linear_model import LogisticRegression
from sklearn.neighbors import KNeighborsClassifier
from sklearn.svm import SVC
from sklearn.decomposition import PCA

import torch
from torch import nn

from sklearn.metrics import confusion_matrix, accuracy_score, recall_score, precision_score, f1_score, roc_auc_score
from sklearn.model_selection import train_test_split
from sklearn.metrics import classification_report

from multiclass_performanceMetrics import *
from dataMining_functions import *
from neuralNet_functions import *
from nested_design_analysis import *

In [3]:
from imblearn.over_sampling import SMOTE

In [4]:
# load csv files containing slope of wavelet decomposition extracted from data based on discrete wavelet transform
slope_m = pd.read_csv('features/window512_neuron_slope_rowwindow_Molino.csv', header=None)
slope_p = pd.read_csv('features/window512_neuron_slope_rowwindow_Pachon.csv', header=None)
slope_s = pd.read_csv('features/window512_neuron_slope_rowwindow_Surface.csv', header=None)

In [5]:
min_col = 275  # minimum number of neurons of a fish in the data

# slicing the data set
slope_m = slope_m.iloc[:, :min_col]
slope_p = slope_p.iloc[:, :min_col]
slope_s = slope_s.iloc[:, :min_col]

# compute Hurst exponent
slope_m = (slope_m + 1)/2
slope_p = (slope_p + 1)/2
slope_s = (slope_s + 1)/2


# class lables
# molino = 0, pachon = 1, surface = 2
slope_m['class'] = 0
slope_p['class'] = 1
slope_s['class'] = 2
num_class = 3





In [7]:
slope_m.shape, slope_p.shape, slope_s.shape

((176, 276), (176, 276), (121, 276))

In [8]:
# get the number of nonoverlapping window obtained from the neural signal
num_window = slope_m.shape[0] / 16  # 16 is the number of fishes in molino group in the dataset
num_window

11.0

In [9]:
# combine all fish classes datasets
df = pd.concat([slope_m, slope_p], axis=0)
df = pd.concat([df, slope_s], axis=0)

In [10]:
# apply balanced nested design to the dataset
# to break the dependency caused by subjects(fishes) on the neural signals of a fish
nested_design_df = balanced_nested_design_sampling(df, num_window=num_window)

A value is trying to be set on a copy of a slice from a DataFrame

See the caveats in the documentation: https://pandas.pydata.org/pandas-docs/stable/user_guide/indexing.html#returning-a-view-versus-a-copy
  dfs_by_class[i].drop(labels=target_name, axis=1, inplace=True)
A value is trying to be set on a copy of a slice from a DataFrame

See the caveats in the documentation: https://pandas.pydata.org/pandas-docs/stable/user_guide/indexing.html#returning-a-view-versus-a-copy
  dfs_by_class[i].drop(labels=target_name, axis=1, inplace=True)
A value is trying to be set on a copy of a slice from a DataFrame

See the caveats in the documentation: https://pandas.pydata.org/pandas-docs/stable/user_guide/indexing.html#returning-a-view-versus-a-copy
  dfs_by_class[i].drop(labels=target_name, axis=1, inplace=True)


# Classifications

In [11]:
# shuffle data set for more randomness during training
nested_design_df = nested_design_df.sample(frac=1)
nested_design_df.head()

Unnamed: 0,0,1,2,3,4,5,6,7,8,9,...,266,267,268,269,270,271,272,273,274,class
54,0.539843,0.436554,0.426338,0.455747,0.509146,0.471466,0.446147,0.695605,0.61693,0.57353,...,0.510108,0.544282,0.548536,0.496125,0.454498,0.48411,-0.644036,0.423289,0.426057,0
1,0.715725,0.482597,0.449764,0.629813,0.506171,0.592363,0.492141,0.510445,0.535549,0.615574,...,0.516265,0.277698,-0.272447,0.56594,0.15382,0.161911,0.233812,0.343677,0.5133,0
297,0.389155,0.548061,0.476848,0.338649,0.359915,0.418997,0.316106,-0.142785,0.03756,0.27416,...,0.40085,0.369013,0.468408,-0.406401,-0.115531,-0.018766,0.317244,0.098491,-0.144598,2
96,0.404066,0.518812,0.438924,0.482228,0.395076,0.419317,0.488017,0.394605,0.565388,0.46137,...,-0.466107,0.338014,0.53348,0.408654,-0.142346,-0.228248,0.511782,-0.118785,0.410358,0
280,0.413081,0.319287,0.454424,0.421036,0.361605,0.556575,0.411786,0.426536,0.453365,0.435268,...,0.450448,0.325683,0.231651,-0.105199,0.483392,0.521941,0.45497,0.405637,0.163908,2


In [12]:
# train and test data split
X, y = nested_design_df.drop(labels='class', axis=1), nested_design_df['class']
X_train, X_test, y_train, y_test = train_test_split(X, y, test_size=0.2)

In [14]:
# Apply SMOTE to balance the training data
# sometimes random train and test data split causes minor data imbalance
# not a big issue, but used SMOTE just in case
sm = SMOTE(random_state=42, k_neighbors=12)
X_train, y_train = sm.fit_resample(X_train, y_train)

In [15]:
y_train.value_counts()

2    105
1    105
0    105
Name: class, dtype: int64

In [16]:
# Logistic Regression
lr_mod = LogisticRegression(max_iter=700)
lr_mod.fit(X_train, y_train)
train_pred = lr_mod.predict(X_train)
test_pred = lr_mod.predict(X_test)

In [17]:
target_names = ['Molino', 'Pachon', 'Surface']
print(classification_report(y_train, train_pred, target_names=target_names))
confusion_matrix(y_train, train_pred)

              precision    recall  f1-score   support

      Molino       1.00      1.00      1.00       105
      Pachon       1.00      1.00      1.00       105
     Surface       1.00      1.00      1.00       105

    accuracy                           1.00       315
   macro avg       1.00      1.00      1.00       315
weighted avg       1.00      1.00      1.00       315



array([[105,   0,   0],
       [  0, 105,   0],
       [  0,   0, 105]])

In [18]:
target_names = ['Molino', 'Pachon', 'Surface']
print(classification_report(y_test, test_pred, target_names=target_names))
confusion_matrix(y_test, test_pred)

              precision    recall  f1-score   support

      Molino       1.00      1.00      1.00        31
      Pachon       1.00      1.00      1.00        16
     Surface       1.00      1.00      1.00        26

    accuracy                           1.00        73
   macro avg       1.00      1.00      1.00        73
weighted avg       1.00      1.00      1.00        73



array([[31,  0,  0],
       [ 0, 16,  0],
       [ 0,  0, 26]])

In [19]:
def SMOTE_oversample(data):
    # param: data is a dataframe (normally training data) where the target feature is in the last column
    # return balanced data by applying SMOTE oversampling technique
    # this function will be used in repeat_sampling_and_training function
    #             to apply data preprocessing for every sample
    X, y = data.iloc[:,:-1], data.iloc[:,-1]
    sm = SMOTE(random_state=42, k_neighbors=12)
    X, y = sm.fit_resample(X, y)
    df = pd.concat([X, y], axis=1)
    df = df.sample(frac=1)
    return df

In [20]:
def build_logisticRegression(**kwargs):
    # return an untrained logistic regression model
    return LogisticRegression(max_iter=500)

model_f = build_logisticRegression
predictors = list(df.drop(labels='class', axis=1).columns)  # names of the features
# random sampling and train a new model
# to more accurately test our model performace indepedent of particular samples
# Similar to the idea of K-fold cross validation
res = repeat_sampling_and_training(model_f, [], df,
                                  'class', predictors, num_repeat=100, doMinMaxScaling=False,
                                  num_window=num_window, data_processing_f=balanced_nested_design_sampling,
                                  is_oversample=True, oversample_f=SMOTE_oversample)

A value is trying to be set on a copy of a slice from a DataFrame

See the caveats in the documentation: https://pandas.pydata.org/pandas-docs/stable/user_guide/indexing.html#returning-a-view-versus-a-copy
  dfs_by_class[i].drop(labels=target_name, axis=1, inplace=True)
A value is trying to be set on a copy of a slice from a DataFrame

See the caveats in the documentation: https://pandas.pydata.org/pandas-docs/stable/user_guide/indexing.html#returning-a-view-versus-a-copy
  dfs_by_class[i].drop(labels=target_name, axis=1, inplace=True)
A value is trying to be set on a copy of a slice from a DataFrame

See the caveats in the documentation: https://pandas.pydata.org/pandas-docs/stable/user_guide/indexing.html#returning-a-view-versus-a-copy
  dfs_by_class[i].drop(labels=target_name, axis=1, inplace=True)
A value is trying to be set on a copy of a slice from a DataFrame

See the caveats in the documentation: https://pandas.pydata.org/pandas-docs/stable/user_guide/indexing.html#returning-a-

A value is trying to be set on a copy of a slice from a DataFrame

See the caveats in the documentation: https://pandas.pydata.org/pandas-docs/stable/user_guide/indexing.html#returning-a-view-versus-a-copy
  dfs_by_class[i].drop(labels=target_name, axis=1, inplace=True)
A value is trying to be set on a copy of a slice from a DataFrame

See the caveats in the documentation: https://pandas.pydata.org/pandas-docs/stable/user_guide/indexing.html#returning-a-view-versus-a-copy
  dfs_by_class[i].drop(labels=target_name, axis=1, inplace=True)
A value is trying to be set on a copy of a slice from a DataFrame

See the caveats in the documentation: https://pandas.pydata.org/pandas-docs/stable/user_guide/indexing.html#returning-a-view-versus-a-copy
  dfs_by_class[i].drop(labels=target_name, axis=1, inplace=True)
A value is trying to be set on a copy of a slice from a DataFrame

See the caveats in the documentation: https://pandas.pydata.org/pandas-docs/stable/user_guide/indexing.html#returning-a-

A value is trying to be set on a copy of a slice from a DataFrame

See the caveats in the documentation: https://pandas.pydata.org/pandas-docs/stable/user_guide/indexing.html#returning-a-view-versus-a-copy
  dfs_by_class[i].drop(labels=target_name, axis=1, inplace=True)
A value is trying to be set on a copy of a slice from a DataFrame

See the caveats in the documentation: https://pandas.pydata.org/pandas-docs/stable/user_guide/indexing.html#returning-a-view-versus-a-copy
  dfs_by_class[i].drop(labels=target_name, axis=1, inplace=True)
A value is trying to be set on a copy of a slice from a DataFrame

See the caveats in the documentation: https://pandas.pydata.org/pandas-docs/stable/user_guide/indexing.html#returning-a-view-versus-a-copy
  dfs_by_class[i].drop(labels=target_name, axis=1, inplace=True)
A value is trying to be set on a copy of a slice from a DataFrame

See the caveats in the documentation: https://pandas.pydata.org/pandas-docs/stable/user_guide/indexing.html#returning-a-

A value is trying to be set on a copy of a slice from a DataFrame

See the caveats in the documentation: https://pandas.pydata.org/pandas-docs/stable/user_guide/indexing.html#returning-a-view-versus-a-copy
  dfs_by_class[i].drop(labels=target_name, axis=1, inplace=True)
A value is trying to be set on a copy of a slice from a DataFrame

See the caveats in the documentation: https://pandas.pydata.org/pandas-docs/stable/user_guide/indexing.html#returning-a-view-versus-a-copy
  dfs_by_class[i].drop(labels=target_name, axis=1, inplace=True)
A value is trying to be set on a copy of a slice from a DataFrame

See the caveats in the documentation: https://pandas.pydata.org/pandas-docs/stable/user_guide/indexing.html#returning-a-view-versus-a-copy
  dfs_by_class[i].drop(labels=target_name, axis=1, inplace=True)
A value is trying to be set on a copy of a slice from a DataFrame

See the caveats in the documentation: https://pandas.pydata.org/pandas-docs/stable/user_guide/indexing.html#returning-a-

A value is trying to be set on a copy of a slice from a DataFrame

See the caveats in the documentation: https://pandas.pydata.org/pandas-docs/stable/user_guide/indexing.html#returning-a-view-versus-a-copy
  dfs_by_class[i].drop(labels=target_name, axis=1, inplace=True)
A value is trying to be set on a copy of a slice from a DataFrame

See the caveats in the documentation: https://pandas.pydata.org/pandas-docs/stable/user_guide/indexing.html#returning-a-view-versus-a-copy
  dfs_by_class[i].drop(labels=target_name, axis=1, inplace=True)
A value is trying to be set on a copy of a slice from a DataFrame

See the caveats in the documentation: https://pandas.pydata.org/pandas-docs/stable/user_guide/indexing.html#returning-a-view-versus-a-copy
  dfs_by_class[i].drop(labels=target_name, axis=1, inplace=True)
A value is trying to be set on a copy of a slice from a DataFrame

See the caveats in the documentation: https://pandas.pydata.org/pandas-docs/stable/user_guide/indexing.html#returning-a-

A value is trying to be set on a copy of a slice from a DataFrame

See the caveats in the documentation: https://pandas.pydata.org/pandas-docs/stable/user_guide/indexing.html#returning-a-view-versus-a-copy
  dfs_by_class[i].drop(labels=target_name, axis=1, inplace=True)
A value is trying to be set on a copy of a slice from a DataFrame

See the caveats in the documentation: https://pandas.pydata.org/pandas-docs/stable/user_guide/indexing.html#returning-a-view-versus-a-copy
  dfs_by_class[i].drop(labels=target_name, axis=1, inplace=True)
A value is trying to be set on a copy of a slice from a DataFrame

See the caveats in the documentation: https://pandas.pydata.org/pandas-docs/stable/user_guide/indexing.html#returning-a-view-versus-a-copy
  dfs_by_class[i].drop(labels=target_name, axis=1, inplace=True)
A value is trying to be set on a copy of a slice from a DataFrame

See the caveats in the documentation: https://pandas.pydata.org/pandas-docs/stable/user_guide/indexing.html#returning-a-

A value is trying to be set on a copy of a slice from a DataFrame

See the caveats in the documentation: https://pandas.pydata.org/pandas-docs/stable/user_guide/indexing.html#returning-a-view-versus-a-copy
  dfs_by_class[i].drop(labels=target_name, axis=1, inplace=True)
A value is trying to be set on a copy of a slice from a DataFrame

See the caveats in the documentation: https://pandas.pydata.org/pandas-docs/stable/user_guide/indexing.html#returning-a-view-versus-a-copy
  dfs_by_class[i].drop(labels=target_name, axis=1, inplace=True)
A value is trying to be set on a copy of a slice from a DataFrame

See the caveats in the documentation: https://pandas.pydata.org/pandas-docs/stable/user_guide/indexing.html#returning-a-view-versus-a-copy
  dfs_by_class[i].drop(labels=target_name, axis=1, inplace=True)
A value is trying to be set on a copy of a slice from a DataFrame

See the caveats in the documentation: https://pandas.pydata.org/pandas-docs/stable/user_guide/indexing.html#returning-a-

A value is trying to be set on a copy of a slice from a DataFrame

See the caveats in the documentation: https://pandas.pydata.org/pandas-docs/stable/user_guide/indexing.html#returning-a-view-versus-a-copy
  dfs_by_class[i].drop(labels=target_name, axis=1, inplace=True)
A value is trying to be set on a copy of a slice from a DataFrame

See the caveats in the documentation: https://pandas.pydata.org/pandas-docs/stable/user_guide/indexing.html#returning-a-view-versus-a-copy
  dfs_by_class[i].drop(labels=target_name, axis=1, inplace=True)
A value is trying to be set on a copy of a slice from a DataFrame

See the caveats in the documentation: https://pandas.pydata.org/pandas-docs/stable/user_guide/indexing.html#returning-a-view-versus-a-copy
  dfs_by_class[i].drop(labels=target_name, axis=1, inplace=True)
A value is trying to be set on a copy of a slice from a DataFrame

See the caveats in the documentation: https://pandas.pydata.org/pandas-docs/stable/user_guide/indexing.html#returning-a-

A value is trying to be set on a copy of a slice from a DataFrame

See the caveats in the documentation: https://pandas.pydata.org/pandas-docs/stable/user_guide/indexing.html#returning-a-view-versus-a-copy
  dfs_by_class[i].drop(labels=target_name, axis=1, inplace=True)
A value is trying to be set on a copy of a slice from a DataFrame

See the caveats in the documentation: https://pandas.pydata.org/pandas-docs/stable/user_guide/indexing.html#returning-a-view-versus-a-copy
  dfs_by_class[i].drop(labels=target_name, axis=1, inplace=True)
A value is trying to be set on a copy of a slice from a DataFrame

See the caveats in the documentation: https://pandas.pydata.org/pandas-docs/stable/user_guide/indexing.html#returning-a-view-versus-a-copy
  dfs_by_class[i].drop(labels=target_name, axis=1, inplace=True)
A value is trying to be set on a copy of a slice from a DataFrame

See the caveats in the documentation: https://pandas.pydata.org/pandas-docs/stable/user_guide/indexing.html#returning-a-

A value is trying to be set on a copy of a slice from a DataFrame

See the caveats in the documentation: https://pandas.pydata.org/pandas-docs/stable/user_guide/indexing.html#returning-a-view-versus-a-copy
  dfs_by_class[i].drop(labels=target_name, axis=1, inplace=True)
A value is trying to be set on a copy of a slice from a DataFrame

See the caveats in the documentation: https://pandas.pydata.org/pandas-docs/stable/user_guide/indexing.html#returning-a-view-versus-a-copy
  dfs_by_class[i].drop(labels=target_name, axis=1, inplace=True)
A value is trying to be set on a copy of a slice from a DataFrame

See the caveats in the documentation: https://pandas.pydata.org/pandas-docs/stable/user_guide/indexing.html#returning-a-view-versus-a-copy
  dfs_by_class[i].drop(labels=target_name, axis=1, inplace=True)
A value is trying to be set on a copy of a slice from a DataFrame

See the caveats in the documentation: https://pandas.pydata.org/pandas-docs/stable/user_guide/indexing.html#returning-a-

A value is trying to be set on a copy of a slice from a DataFrame

See the caveats in the documentation: https://pandas.pydata.org/pandas-docs/stable/user_guide/indexing.html#returning-a-view-versus-a-copy
  dfs_by_class[i].drop(labels=target_name, axis=1, inplace=True)
A value is trying to be set on a copy of a slice from a DataFrame

See the caveats in the documentation: https://pandas.pydata.org/pandas-docs/stable/user_guide/indexing.html#returning-a-view-versus-a-copy
  dfs_by_class[i].drop(labels=target_name, axis=1, inplace=True)
A value is trying to be set on a copy of a slice from a DataFrame

See the caveats in the documentation: https://pandas.pydata.org/pandas-docs/stable/user_guide/indexing.html#returning-a-view-versus-a-copy
  dfs_by_class[i].drop(labels=target_name, axis=1, inplace=True)
A value is trying to be set on a copy of a slice from a DataFrame

See the caveats in the documentation: https://pandas.pydata.org/pandas-docs/stable/user_guide/indexing.html#returning-a-

In [23]:
# print out model performance and confusion matrix (averaged over all sampling repetitions)
performance_dict, avg_cm = res
print_performance_metrics(performance_dict)
avg_cm

Mean of training_accuracy: 1.0
Standard deviation of training_accuracy: 0.0
Mean of testing_accuracy: 0.9925688073394496
Standard deviation of testing_accuracy: 0.00827673454038277
Mean of AUC_score: 1.0
Standard deviation of AUC_score: 0.0
Mean of recall_class0: 0.9925195508794893
Standard deviation of recall_class0: 0.013129291416067319
Mean of recall_class1: 0.9927118598003644
Standard deviation of recall_class1: 0.012509509343260372
Mean of recall_class2: 0.9928679303383047
Standard deviation of recall_class2: 0.014938433864092144
Mean of precision_class0: 0.992319895373986
Standard deviation of precision_class0: 0.014461100015802572
Mean of precision_class1: 0.9923847506913394
Standard deviation of precision_class1: 0.013280879907109952
Mean of precision_class2: 0.9931044415437635
Standard deviation of precision_class2: 0.01266386681270397
Mean of f1_class0: 0.9923309849134556
Standard deviation of f1_class0: 0.010284333635155237
Mean of f1_class1: 0.9924566345202893
Standard devi

Unnamed: 0,Predicted Class 0,Predicted Class 1,Predicted Class 2
Actual Class 0,35.81,0.15,0.13
Actual Class 1,0.15,36.17,0.12
Actual Class 2,0.13,0.13,36.21


In [24]:
# L1 penalty(Lasso) Logistic Regression
# to check if some neurons are responding more to the stimulation in vision
# if yes, then the corresponding column index of a neuron responds less to the stimulus
#               will have 0 coefficient in the weight matrix of model
from sklearn.model_selection import GridSearchCV

# 5-fold cross-validation based grid search
# to find the best hyper parameters for L1 penalty Logistic Regression
parameters = {'solver':['liblinear', 'saga'], 'C':[10, 1, 0.1, 0.01]}
model = LogisticRegression(max_iter=1000, penalty='l1')
gs = GridSearchCV(model, parameters, n_jobs=-1, cv=5)
gs.fit(X_train, y_train)
gs.best_params_



{'C': 10, 'solver': 'saga'}

In [25]:
# train a model based on the best hyperparameter found in the given hyperparameter space
lr_mod = LogisticRegression(max_iter=1000, penalty='l1', C=10, solver='saga')
lr_mod.fit(X_train, y_train)
train_pred = lr_mod.predict(X_train)
test_pred = lr_mod.predict(X_test)



In [26]:
target_names = ['Molino', 'Pachon', 'Surface']
print(classification_report(y_train, train_pred, target_names=target_names))
confusion_matrix(y_train, train_pred)

              precision    recall  f1-score   support

      Molino       1.00      1.00      1.00       105
      Pachon       1.00      1.00      1.00       105
     Surface       1.00      1.00      1.00       105

    accuracy                           1.00       315
   macro avg       1.00      1.00      1.00       315
weighted avg       1.00      1.00      1.00       315



array([[105,   0,   0],
       [  0, 105,   0],
       [  0,   0, 105]])

In [27]:
target_names = ['Molino', 'Pachon', 'Surface']
print(classification_report(y_test, test_pred, target_names=target_names))
confusion_matrix(y_test, test_pred)

              precision    recall  f1-score   support

      Molino       0.93      0.90      0.92        31
      Pachon       0.88      0.94      0.91        16
     Surface       0.96      0.96      0.96        26

    accuracy                           0.93        73
   macro avg       0.93      0.93      0.93        73
weighted avg       0.93      0.93      0.93        73



array([[28,  2,  1],
       [ 1, 15,  0],
       [ 1,  0, 25]])

In [28]:
w = lr_mod.coef_
eps = 1e-4
count = 0
for j in range(w.shape[1]):
    if all(w[:, j] < eps):
        count += 1
print(f"The number of components of the weight less than {eps} is {count} out of {w.shape[1]} number of features")

The number of components of the weight less than 0.0001 is 168 out of 275 number of features


The L1 penalty logistic regression model still classify eyeless fishes well. However, as we can see from the weight matrix of the model, 172 neurons out of 275 don't contribute in distinguishing fish types.

In [29]:
# KNN classifier without data balancing
knn_mod = KNeighborsClassifier()
knn_mod.fit(X_train, y_train)
train_pred = knn_mod.predict(X_train)
test_pred = knn_mod.predict(X_test)

  mode, _ = stats.mode(_y[neigh_ind, k], axis=1)
  mode, _ = stats.mode(_y[neigh_ind, k], axis=1)


In [30]:
print(classification_report(y_train, train_pred, target_names=target_names))
confusion_matrix(y_train, train_pred)

              precision    recall  f1-score   support

      Molino       1.00      1.00      1.00       105
      Pachon       1.00      1.00      1.00       105
     Surface       1.00      1.00      1.00       105

    accuracy                           1.00       315
   macro avg       1.00      1.00      1.00       315
weighted avg       1.00      1.00      1.00       315



array([[105,   0,   0],
       [  0, 105,   0],
       [  0,   0, 105]])

In [31]:
print(classification_report(y_test, test_pred, target_names=target_names))
confusion_matrix(y_test, test_pred)

              precision    recall  f1-score   support

      Molino       1.00      1.00      1.00        31
      Pachon       1.00      1.00      1.00        16
     Surface       1.00      1.00      1.00        26

    accuracy                           1.00        73
   macro avg       1.00      1.00      1.00        73
weighted avg       1.00      1.00      1.00        73



array([[31,  0,  0],
       [ 0, 16,  0],
       [ 0,  0, 26]])

In [32]:
# KNN
def build_knn(**kwargs):
    # return an untrained KNN model
    return KNeighborsClassifier()

model_f = build_knn
# random sampling and train a new model
# to more accurately test our model performace indepedent of particular samples
# Similar to the idea of K-fold cross validation
res = repeat_sampling_and_training(model_f, [], df,
                                  'class', predictors, num_repeat=100, doMinMaxScaling=False,
                                  num_window=num_window, data_processing_f=balanced_nested_design_sampling,
                                  is_oversample=True, oversample_f=SMOTE_oversample)

A value is trying to be set on a copy of a slice from a DataFrame

See the caveats in the documentation: https://pandas.pydata.org/pandas-docs/stable/user_guide/indexing.html#returning-a-view-versus-a-copy
  dfs_by_class[i].drop(labels=target_name, axis=1, inplace=True)
A value is trying to be set on a copy of a slice from a DataFrame

See the caveats in the documentation: https://pandas.pydata.org/pandas-docs/stable/user_guide/indexing.html#returning-a-view-versus-a-copy
  dfs_by_class[i].drop(labels=target_name, axis=1, inplace=True)
A value is trying to be set on a copy of a slice from a DataFrame

See the caveats in the documentation: https://pandas.pydata.org/pandas-docs/stable/user_guide/indexing.html#returning-a-view-versus-a-copy
  dfs_by_class[i].drop(labels=target_name, axis=1, inplace=True)
  mode, _ = stats.mode(_y[neigh_ind, k], axis=1)
  mode, _ = stats.mode(_y[neigh_ind, k], axis=1)
A value is trying to be set on a copy of a slice from a DataFrame

See the caveats in the

  mode, _ = stats.mode(_y[neigh_ind, k], axis=1)
  mode, _ = stats.mode(_y[neigh_ind, k], axis=1)
A value is trying to be set on a copy of a slice from a DataFrame

See the caveats in the documentation: https://pandas.pydata.org/pandas-docs/stable/user_guide/indexing.html#returning-a-view-versus-a-copy
  dfs_by_class[i].drop(labels=target_name, axis=1, inplace=True)
A value is trying to be set on a copy of a slice from a DataFrame

See the caveats in the documentation: https://pandas.pydata.org/pandas-docs/stable/user_guide/indexing.html#returning-a-view-versus-a-copy
  dfs_by_class[i].drop(labels=target_name, axis=1, inplace=True)
A value is trying to be set on a copy of a slice from a DataFrame

See the caveats in the documentation: https://pandas.pydata.org/pandas-docs/stable/user_guide/indexing.html#returning-a-view-versus-a-copy
  dfs_by_class[i].drop(labels=target_name, axis=1, inplace=True)
  mode, _ = stats.mode(_y[neigh_ind, k], axis=1)
  mode, _ = stats.mode(_y[neigh_ind, k],

  mode, _ = stats.mode(_y[neigh_ind, k], axis=1)
  mode, _ = stats.mode(_y[neigh_ind, k], axis=1)
A value is trying to be set on a copy of a slice from a DataFrame

See the caveats in the documentation: https://pandas.pydata.org/pandas-docs/stable/user_guide/indexing.html#returning-a-view-versus-a-copy
  dfs_by_class[i].drop(labels=target_name, axis=1, inplace=True)
A value is trying to be set on a copy of a slice from a DataFrame

See the caveats in the documentation: https://pandas.pydata.org/pandas-docs/stable/user_guide/indexing.html#returning-a-view-versus-a-copy
  dfs_by_class[i].drop(labels=target_name, axis=1, inplace=True)
A value is trying to be set on a copy of a slice from a DataFrame

See the caveats in the documentation: https://pandas.pydata.org/pandas-docs/stable/user_guide/indexing.html#returning-a-view-versus-a-copy
  dfs_by_class[i].drop(labels=target_name, axis=1, inplace=True)
  mode, _ = stats.mode(_y[neigh_ind, k], axis=1)
  mode, _ = stats.mode(_y[neigh_ind, k],

  mode, _ = stats.mode(_y[neigh_ind, k], axis=1)
  mode, _ = stats.mode(_y[neigh_ind, k], axis=1)
A value is trying to be set on a copy of a slice from a DataFrame

See the caveats in the documentation: https://pandas.pydata.org/pandas-docs/stable/user_guide/indexing.html#returning-a-view-versus-a-copy
  dfs_by_class[i].drop(labels=target_name, axis=1, inplace=True)
A value is trying to be set on a copy of a slice from a DataFrame

See the caveats in the documentation: https://pandas.pydata.org/pandas-docs/stable/user_guide/indexing.html#returning-a-view-versus-a-copy
  dfs_by_class[i].drop(labels=target_name, axis=1, inplace=True)
A value is trying to be set on a copy of a slice from a DataFrame

See the caveats in the documentation: https://pandas.pydata.org/pandas-docs/stable/user_guide/indexing.html#returning-a-view-versus-a-copy
  dfs_by_class[i].drop(labels=target_name, axis=1, inplace=True)
  mode, _ = stats.mode(_y[neigh_ind, k], axis=1)
  mode, _ = stats.mode(_y[neigh_ind, k],

  mode, _ = stats.mode(_y[neigh_ind, k], axis=1)
  mode, _ = stats.mode(_y[neigh_ind, k], axis=1)
A value is trying to be set on a copy of a slice from a DataFrame

See the caveats in the documentation: https://pandas.pydata.org/pandas-docs/stable/user_guide/indexing.html#returning-a-view-versus-a-copy
  dfs_by_class[i].drop(labels=target_name, axis=1, inplace=True)
A value is trying to be set on a copy of a slice from a DataFrame

See the caveats in the documentation: https://pandas.pydata.org/pandas-docs/stable/user_guide/indexing.html#returning-a-view-versus-a-copy
  dfs_by_class[i].drop(labels=target_name, axis=1, inplace=True)
A value is trying to be set on a copy of a slice from a DataFrame

See the caveats in the documentation: https://pandas.pydata.org/pandas-docs/stable/user_guide/indexing.html#returning-a-view-versus-a-copy
  dfs_by_class[i].drop(labels=target_name, axis=1, inplace=True)
  mode, _ = stats.mode(_y[neigh_ind, k], axis=1)
  mode, _ = stats.mode(_y[neigh_ind, k],

  mode, _ = stats.mode(_y[neigh_ind, k], axis=1)
  mode, _ = stats.mode(_y[neigh_ind, k], axis=1)
A value is trying to be set on a copy of a slice from a DataFrame

See the caveats in the documentation: https://pandas.pydata.org/pandas-docs/stable/user_guide/indexing.html#returning-a-view-versus-a-copy
  dfs_by_class[i].drop(labels=target_name, axis=1, inplace=True)
A value is trying to be set on a copy of a slice from a DataFrame

See the caveats in the documentation: https://pandas.pydata.org/pandas-docs/stable/user_guide/indexing.html#returning-a-view-versus-a-copy
  dfs_by_class[i].drop(labels=target_name, axis=1, inplace=True)
A value is trying to be set on a copy of a slice from a DataFrame

See the caveats in the documentation: https://pandas.pydata.org/pandas-docs/stable/user_guide/indexing.html#returning-a-view-versus-a-copy
  dfs_by_class[i].drop(labels=target_name, axis=1, inplace=True)
  mode, _ = stats.mode(_y[neigh_ind, k], axis=1)
  mode, _ = stats.mode(_y[neigh_ind, k],

  mode, _ = stats.mode(_y[neigh_ind, k], axis=1)
  mode, _ = stats.mode(_y[neigh_ind, k], axis=1)
A value is trying to be set on a copy of a slice from a DataFrame

See the caveats in the documentation: https://pandas.pydata.org/pandas-docs/stable/user_guide/indexing.html#returning-a-view-versus-a-copy
  dfs_by_class[i].drop(labels=target_name, axis=1, inplace=True)
A value is trying to be set on a copy of a slice from a DataFrame

See the caveats in the documentation: https://pandas.pydata.org/pandas-docs/stable/user_guide/indexing.html#returning-a-view-versus-a-copy
  dfs_by_class[i].drop(labels=target_name, axis=1, inplace=True)
A value is trying to be set on a copy of a slice from a DataFrame

See the caveats in the documentation: https://pandas.pydata.org/pandas-docs/stable/user_guide/indexing.html#returning-a-view-versus-a-copy
  dfs_by_class[i].drop(labels=target_name, axis=1, inplace=True)
  mode, _ = stats.mode(_y[neigh_ind, k], axis=1)
  mode, _ = stats.mode(_y[neigh_ind, k],

  mode, _ = stats.mode(_y[neigh_ind, k], axis=1)
  mode, _ = stats.mode(_y[neigh_ind, k], axis=1)
A value is trying to be set on a copy of a slice from a DataFrame

See the caveats in the documentation: https://pandas.pydata.org/pandas-docs/stable/user_guide/indexing.html#returning-a-view-versus-a-copy
  dfs_by_class[i].drop(labels=target_name, axis=1, inplace=True)
A value is trying to be set on a copy of a slice from a DataFrame

See the caveats in the documentation: https://pandas.pydata.org/pandas-docs/stable/user_guide/indexing.html#returning-a-view-versus-a-copy
  dfs_by_class[i].drop(labels=target_name, axis=1, inplace=True)
A value is trying to be set on a copy of a slice from a DataFrame

See the caveats in the documentation: https://pandas.pydata.org/pandas-docs/stable/user_guide/indexing.html#returning-a-view-versus-a-copy
  dfs_by_class[i].drop(labels=target_name, axis=1, inplace=True)
  mode, _ = stats.mode(_y[neigh_ind, k], axis=1)
  mode, _ = stats.mode(_y[neigh_ind, k],

  mode, _ = stats.mode(_y[neigh_ind, k], axis=1)
  mode, _ = stats.mode(_y[neigh_ind, k], axis=1)
A value is trying to be set on a copy of a slice from a DataFrame

See the caveats in the documentation: https://pandas.pydata.org/pandas-docs/stable/user_guide/indexing.html#returning-a-view-versus-a-copy
  dfs_by_class[i].drop(labels=target_name, axis=1, inplace=True)
A value is trying to be set on a copy of a slice from a DataFrame

See the caveats in the documentation: https://pandas.pydata.org/pandas-docs/stable/user_guide/indexing.html#returning-a-view-versus-a-copy
  dfs_by_class[i].drop(labels=target_name, axis=1, inplace=True)
A value is trying to be set on a copy of a slice from a DataFrame

See the caveats in the documentation: https://pandas.pydata.org/pandas-docs/stable/user_guide/indexing.html#returning-a-view-versus-a-copy
  dfs_by_class[i].drop(labels=target_name, axis=1, inplace=True)
  mode, _ = stats.mode(_y[neigh_ind, k], axis=1)
  mode, _ = stats.mode(_y[neigh_ind, k],

  mode, _ = stats.mode(_y[neigh_ind, k], axis=1)
  mode, _ = stats.mode(_y[neigh_ind, k], axis=1)
A value is trying to be set on a copy of a slice from a DataFrame

See the caveats in the documentation: https://pandas.pydata.org/pandas-docs/stable/user_guide/indexing.html#returning-a-view-versus-a-copy
  dfs_by_class[i].drop(labels=target_name, axis=1, inplace=True)
A value is trying to be set on a copy of a slice from a DataFrame

See the caveats in the documentation: https://pandas.pydata.org/pandas-docs/stable/user_guide/indexing.html#returning-a-view-versus-a-copy
  dfs_by_class[i].drop(labels=target_name, axis=1, inplace=True)
A value is trying to be set on a copy of a slice from a DataFrame

See the caveats in the documentation: https://pandas.pydata.org/pandas-docs/stable/user_guide/indexing.html#returning-a-view-versus-a-copy
  dfs_by_class[i].drop(labels=target_name, axis=1, inplace=True)
  mode, _ = stats.mode(_y[neigh_ind, k], axis=1)
  mode, _ = stats.mode(_y[neigh_ind, k],

  mode, _ = stats.mode(_y[neigh_ind, k], axis=1)
  mode, _ = stats.mode(_y[neigh_ind, k], axis=1)
A value is trying to be set on a copy of a slice from a DataFrame

See the caveats in the documentation: https://pandas.pydata.org/pandas-docs/stable/user_guide/indexing.html#returning-a-view-versus-a-copy
  dfs_by_class[i].drop(labels=target_name, axis=1, inplace=True)
A value is trying to be set on a copy of a slice from a DataFrame

See the caveats in the documentation: https://pandas.pydata.org/pandas-docs/stable/user_guide/indexing.html#returning-a-view-versus-a-copy
  dfs_by_class[i].drop(labels=target_name, axis=1, inplace=True)
A value is trying to be set on a copy of a slice from a DataFrame

See the caveats in the documentation: https://pandas.pydata.org/pandas-docs/stable/user_guide/indexing.html#returning-a-view-versus-a-copy
  dfs_by_class[i].drop(labels=target_name, axis=1, inplace=True)
  mode, _ = stats.mode(_y[neigh_ind, k], axis=1)
  mode, _ = stats.mode(_y[neigh_ind, k],

  mode, _ = stats.mode(_y[neigh_ind, k], axis=1)
  mode, _ = stats.mode(_y[neigh_ind, k], axis=1)
A value is trying to be set on a copy of a slice from a DataFrame

See the caveats in the documentation: https://pandas.pydata.org/pandas-docs/stable/user_guide/indexing.html#returning-a-view-versus-a-copy
  dfs_by_class[i].drop(labels=target_name, axis=1, inplace=True)
A value is trying to be set on a copy of a slice from a DataFrame

See the caveats in the documentation: https://pandas.pydata.org/pandas-docs/stable/user_guide/indexing.html#returning-a-view-versus-a-copy
  dfs_by_class[i].drop(labels=target_name, axis=1, inplace=True)
A value is trying to be set on a copy of a slice from a DataFrame

See the caveats in the documentation: https://pandas.pydata.org/pandas-docs/stable/user_guide/indexing.html#returning-a-view-versus-a-copy
  dfs_by_class[i].drop(labels=target_name, axis=1, inplace=True)
  mode, _ = stats.mode(_y[neigh_ind, k], axis=1)
  mode, _ = stats.mode(_y[neigh_ind, k],

  mode, _ = stats.mode(_y[neigh_ind, k], axis=1)
  mode, _ = stats.mode(_y[neigh_ind, k], axis=1)
A value is trying to be set on a copy of a slice from a DataFrame

See the caveats in the documentation: https://pandas.pydata.org/pandas-docs/stable/user_guide/indexing.html#returning-a-view-versus-a-copy
  dfs_by_class[i].drop(labels=target_name, axis=1, inplace=True)
A value is trying to be set on a copy of a slice from a DataFrame

See the caveats in the documentation: https://pandas.pydata.org/pandas-docs/stable/user_guide/indexing.html#returning-a-view-versus-a-copy
  dfs_by_class[i].drop(labels=target_name, axis=1, inplace=True)
A value is trying to be set on a copy of a slice from a DataFrame

See the caveats in the documentation: https://pandas.pydata.org/pandas-docs/stable/user_guide/indexing.html#returning-a-view-versus-a-copy
  dfs_by_class[i].drop(labels=target_name, axis=1, inplace=True)
  mode, _ = stats.mode(_y[neigh_ind, k], axis=1)
  mode, _ = stats.mode(_y[neigh_ind, k],

  mode, _ = stats.mode(_y[neigh_ind, k], axis=1)
  mode, _ = stats.mode(_y[neigh_ind, k], axis=1)
A value is trying to be set on a copy of a slice from a DataFrame

See the caveats in the documentation: https://pandas.pydata.org/pandas-docs/stable/user_guide/indexing.html#returning-a-view-versus-a-copy
  dfs_by_class[i].drop(labels=target_name, axis=1, inplace=True)
A value is trying to be set on a copy of a slice from a DataFrame

See the caveats in the documentation: https://pandas.pydata.org/pandas-docs/stable/user_guide/indexing.html#returning-a-view-versus-a-copy
  dfs_by_class[i].drop(labels=target_name, axis=1, inplace=True)
A value is trying to be set on a copy of a slice from a DataFrame

See the caveats in the documentation: https://pandas.pydata.org/pandas-docs/stable/user_guide/indexing.html#returning-a-view-versus-a-copy
  dfs_by_class[i].drop(labels=target_name, axis=1, inplace=True)
  mode, _ = stats.mode(_y[neigh_ind, k], axis=1)
  mode, _ = stats.mode(_y[neigh_ind, k],

  mode, _ = stats.mode(_y[neigh_ind, k], axis=1)
  mode, _ = stats.mode(_y[neigh_ind, k], axis=1)
A value is trying to be set on a copy of a slice from a DataFrame

See the caveats in the documentation: https://pandas.pydata.org/pandas-docs/stable/user_guide/indexing.html#returning-a-view-versus-a-copy
  dfs_by_class[i].drop(labels=target_name, axis=1, inplace=True)
A value is trying to be set on a copy of a slice from a DataFrame

See the caveats in the documentation: https://pandas.pydata.org/pandas-docs/stable/user_guide/indexing.html#returning-a-view-versus-a-copy
  dfs_by_class[i].drop(labels=target_name, axis=1, inplace=True)
A value is trying to be set on a copy of a slice from a DataFrame

See the caveats in the documentation: https://pandas.pydata.org/pandas-docs/stable/user_guide/indexing.html#returning-a-view-versus-a-copy
  dfs_by_class[i].drop(labels=target_name, axis=1, inplace=True)
  mode, _ = stats.mode(_y[neigh_ind, k], axis=1)
  mode, _ = stats.mode(_y[neigh_ind, k],

  mode, _ = stats.mode(_y[neigh_ind, k], axis=1)
  mode, _ = stats.mode(_y[neigh_ind, k], axis=1)
A value is trying to be set on a copy of a slice from a DataFrame

See the caveats in the documentation: https://pandas.pydata.org/pandas-docs/stable/user_guide/indexing.html#returning-a-view-versus-a-copy
  dfs_by_class[i].drop(labels=target_name, axis=1, inplace=True)
A value is trying to be set on a copy of a slice from a DataFrame

See the caveats in the documentation: https://pandas.pydata.org/pandas-docs/stable/user_guide/indexing.html#returning-a-view-versus-a-copy
  dfs_by_class[i].drop(labels=target_name, axis=1, inplace=True)
A value is trying to be set on a copy of a slice from a DataFrame

See the caveats in the documentation: https://pandas.pydata.org/pandas-docs/stable/user_guide/indexing.html#returning-a-view-versus-a-copy
  dfs_by_class[i].drop(labels=target_name, axis=1, inplace=True)
  mode, _ = stats.mode(_y[neigh_ind, k], axis=1)
  mode, _ = stats.mode(_y[neigh_ind, k],

  mode, _ = stats.mode(_y[neigh_ind, k], axis=1)
  mode, _ = stats.mode(_y[neigh_ind, k], axis=1)
A value is trying to be set on a copy of a slice from a DataFrame

See the caveats in the documentation: https://pandas.pydata.org/pandas-docs/stable/user_guide/indexing.html#returning-a-view-versus-a-copy
  dfs_by_class[i].drop(labels=target_name, axis=1, inplace=True)
A value is trying to be set on a copy of a slice from a DataFrame

See the caveats in the documentation: https://pandas.pydata.org/pandas-docs/stable/user_guide/indexing.html#returning-a-view-versus-a-copy
  dfs_by_class[i].drop(labels=target_name, axis=1, inplace=True)
A value is trying to be set on a copy of a slice from a DataFrame

See the caveats in the documentation: https://pandas.pydata.org/pandas-docs/stable/user_guide/indexing.html#returning-a-view-versus-a-copy
  dfs_by_class[i].drop(labels=target_name, axis=1, inplace=True)
  mode, _ = stats.mode(_y[neigh_ind, k], axis=1)
  mode, _ = stats.mode(_y[neigh_ind, k],

  mode, _ = stats.mode(_y[neigh_ind, k], axis=1)
  mode, _ = stats.mode(_y[neigh_ind, k], axis=1)
A value is trying to be set on a copy of a slice from a DataFrame

See the caveats in the documentation: https://pandas.pydata.org/pandas-docs/stable/user_guide/indexing.html#returning-a-view-versus-a-copy
  dfs_by_class[i].drop(labels=target_name, axis=1, inplace=True)
A value is trying to be set on a copy of a slice from a DataFrame

See the caveats in the documentation: https://pandas.pydata.org/pandas-docs/stable/user_guide/indexing.html#returning-a-view-versus-a-copy
  dfs_by_class[i].drop(labels=target_name, axis=1, inplace=True)
A value is trying to be set on a copy of a slice from a DataFrame

See the caveats in the documentation: https://pandas.pydata.org/pandas-docs/stable/user_guide/indexing.html#returning-a-view-versus-a-copy
  dfs_by_class[i].drop(labels=target_name, axis=1, inplace=True)
  mode, _ = stats.mode(_y[neigh_ind, k], axis=1)
  mode, _ = stats.mode(_y[neigh_ind, k],

  mode, _ = stats.mode(_y[neigh_ind, k], axis=1)
  mode, _ = stats.mode(_y[neigh_ind, k], axis=1)
A value is trying to be set on a copy of a slice from a DataFrame

See the caveats in the documentation: https://pandas.pydata.org/pandas-docs/stable/user_guide/indexing.html#returning-a-view-versus-a-copy
  dfs_by_class[i].drop(labels=target_name, axis=1, inplace=True)
A value is trying to be set on a copy of a slice from a DataFrame

See the caveats in the documentation: https://pandas.pydata.org/pandas-docs/stable/user_guide/indexing.html#returning-a-view-versus-a-copy
  dfs_by_class[i].drop(labels=target_name, axis=1, inplace=True)
A value is trying to be set on a copy of a slice from a DataFrame

See the caveats in the documentation: https://pandas.pydata.org/pandas-docs/stable/user_guide/indexing.html#returning-a-view-versus-a-copy
  dfs_by_class[i].drop(labels=target_name, axis=1, inplace=True)
  mode, _ = stats.mode(_y[neigh_ind, k], axis=1)
  mode, _ = stats.mode(_y[neigh_ind, k],

  mode, _ = stats.mode(_y[neigh_ind, k], axis=1)
  mode, _ = stats.mode(_y[neigh_ind, k], axis=1)
A value is trying to be set on a copy of a slice from a DataFrame

See the caveats in the documentation: https://pandas.pydata.org/pandas-docs/stable/user_guide/indexing.html#returning-a-view-versus-a-copy
  dfs_by_class[i].drop(labels=target_name, axis=1, inplace=True)
A value is trying to be set on a copy of a slice from a DataFrame

See the caveats in the documentation: https://pandas.pydata.org/pandas-docs/stable/user_guide/indexing.html#returning-a-view-versus-a-copy
  dfs_by_class[i].drop(labels=target_name, axis=1, inplace=True)
A value is trying to be set on a copy of a slice from a DataFrame

See the caveats in the documentation: https://pandas.pydata.org/pandas-docs/stable/user_guide/indexing.html#returning-a-view-versus-a-copy
  dfs_by_class[i].drop(labels=target_name, axis=1, inplace=True)
  mode, _ = stats.mode(_y[neigh_ind, k], axis=1)
  mode, _ = stats.mode(_y[neigh_ind, k],

In [33]:
performance_dict, avg_cm = res
print_performance_metrics(performance_dict)
avg_cm

Mean of training_accuracy: 0.9994711588807506
Standard deviation of training_accuracy: 0.0017050786845914695
Mean of testing_accuracy: 0.9952293577981652
Standard deviation of testing_accuracy: 0.010255551275216223
Mean of AUC_score: 1.0
Standard deviation of AUC_score: 0.0
Mean of recall_class0: 0.9977819521690929
Standard deviation of recall_class0: 0.00757009753732328
Mean of recall_class1: 0.9945432261394075
Standard deviation of recall_class1: 0.015354569669876013
Mean of recall_class2: 0.9935140488084973
Standard deviation of recall_class2: 0.026020422763144105
Mean of precision_class0: 0.9928019075112354
Standard deviation of precision_class0: 0.017593734002716643
Mean of precision_class1: 0.9964774723668429
Standard deviation of precision_class1: 0.01645308785774324
Mean of precision_class2: 0.9970944206722733
Standard deviation of precision_class2: 0.009005990847450563
Mean of f1_class0: 0.9951875027752933
Standard deviation of f1_class0: 0.009636759524494548
Mean of f1_class1

Unnamed: 0,Predicted Class 0,Predicted Class 1,Predicted Class 2
Actual Class 0,36.16,0.01,0.07
Actual Class 1,0.17,36.52,0.04
Actual Class 2,0.1,0.13,35.8


In [34]:
# SVM
svm_mod = SVC()
svm_mod.fit(X_train, y_train)
train_pred = svm_mod.predict(X_train)
test_pred = svm_mod.predict(X_test)

In [35]:
print(classification_report(y_train, train_pred, target_names=target_names))
confusion_matrix(y_train, train_pred)

              precision    recall  f1-score   support

      Molino       1.00      1.00      1.00       105
      Pachon       1.00      1.00      1.00       105
     Surface       1.00      1.00      1.00       105

    accuracy                           1.00       315
   macro avg       1.00      1.00      1.00       315
weighted avg       1.00      1.00      1.00       315



array([[105,   0,   0],
       [  0, 105,   0],
       [  0,   0, 105]])

In [36]:
print(classification_report(y_test, test_pred, target_names=target_names))
confusion_matrix(y_test, test_pred)

              precision    recall  f1-score   support

      Molino       1.00      1.00      1.00        31
      Pachon       1.00      1.00      1.00        16
     Surface       1.00      1.00      1.00        26

    accuracy                           1.00        73
   macro avg       1.00      1.00      1.00        73
weighted avg       1.00      1.00      1.00        73



array([[31,  0,  0],
       [ 0, 16,  0],
       [ 0,  0, 26]])

In [37]:
# SVM

def build_svm(**kwargs):
    # return an untrained SVM model
    return SVC(probability=True)

model_f = build_svm
# random sampling and train a new model
# to more accurately test our model performace indepedent of particular samples
# Similar to the idea of K-fold cross validation
res = repeat_sampling_and_training(model_f, [], df,
                                  'class', predictors, num_repeat=100, doMinMaxScaling=False,
                                  num_window=num_window, data_processing_f=balanced_nested_design_sampling,
                                  is_oversample=True, oversample_f=SMOTE_oversample)

A value is trying to be set on a copy of a slice from a DataFrame

See the caveats in the documentation: https://pandas.pydata.org/pandas-docs/stable/user_guide/indexing.html#returning-a-view-versus-a-copy
  dfs_by_class[i].drop(labels=target_name, axis=1, inplace=True)
A value is trying to be set on a copy of a slice from a DataFrame

See the caveats in the documentation: https://pandas.pydata.org/pandas-docs/stable/user_guide/indexing.html#returning-a-view-versus-a-copy
  dfs_by_class[i].drop(labels=target_name, axis=1, inplace=True)
A value is trying to be set on a copy of a slice from a DataFrame

See the caveats in the documentation: https://pandas.pydata.org/pandas-docs/stable/user_guide/indexing.html#returning-a-view-versus-a-copy
  dfs_by_class[i].drop(labels=target_name, axis=1, inplace=True)
A value is trying to be set on a copy of a slice from a DataFrame

See the caveats in the documentation: https://pandas.pydata.org/pandas-docs/stable/user_guide/indexing.html#returning-a-

A value is trying to be set on a copy of a slice from a DataFrame

See the caveats in the documentation: https://pandas.pydata.org/pandas-docs/stable/user_guide/indexing.html#returning-a-view-versus-a-copy
  dfs_by_class[i].drop(labels=target_name, axis=1, inplace=True)
A value is trying to be set on a copy of a slice from a DataFrame

See the caveats in the documentation: https://pandas.pydata.org/pandas-docs/stable/user_guide/indexing.html#returning-a-view-versus-a-copy
  dfs_by_class[i].drop(labels=target_name, axis=1, inplace=True)
A value is trying to be set on a copy of a slice from a DataFrame

See the caveats in the documentation: https://pandas.pydata.org/pandas-docs/stable/user_guide/indexing.html#returning-a-view-versus-a-copy
  dfs_by_class[i].drop(labels=target_name, axis=1, inplace=True)
A value is trying to be set on a copy of a slice from a DataFrame

See the caveats in the documentation: https://pandas.pydata.org/pandas-docs/stable/user_guide/indexing.html#returning-a-

A value is trying to be set on a copy of a slice from a DataFrame

See the caveats in the documentation: https://pandas.pydata.org/pandas-docs/stable/user_guide/indexing.html#returning-a-view-versus-a-copy
  dfs_by_class[i].drop(labels=target_name, axis=1, inplace=True)
A value is trying to be set on a copy of a slice from a DataFrame

See the caveats in the documentation: https://pandas.pydata.org/pandas-docs/stable/user_guide/indexing.html#returning-a-view-versus-a-copy
  dfs_by_class[i].drop(labels=target_name, axis=1, inplace=True)
A value is trying to be set on a copy of a slice from a DataFrame

See the caveats in the documentation: https://pandas.pydata.org/pandas-docs/stable/user_guide/indexing.html#returning-a-view-versus-a-copy
  dfs_by_class[i].drop(labels=target_name, axis=1, inplace=True)
A value is trying to be set on a copy of a slice from a DataFrame

See the caveats in the documentation: https://pandas.pydata.org/pandas-docs/stable/user_guide/indexing.html#returning-a-

A value is trying to be set on a copy of a slice from a DataFrame

See the caveats in the documentation: https://pandas.pydata.org/pandas-docs/stable/user_guide/indexing.html#returning-a-view-versus-a-copy
  dfs_by_class[i].drop(labels=target_name, axis=1, inplace=True)
A value is trying to be set on a copy of a slice from a DataFrame

See the caveats in the documentation: https://pandas.pydata.org/pandas-docs/stable/user_guide/indexing.html#returning-a-view-versus-a-copy
  dfs_by_class[i].drop(labels=target_name, axis=1, inplace=True)
A value is trying to be set on a copy of a slice from a DataFrame

See the caveats in the documentation: https://pandas.pydata.org/pandas-docs/stable/user_guide/indexing.html#returning-a-view-versus-a-copy
  dfs_by_class[i].drop(labels=target_name, axis=1, inplace=True)
A value is trying to be set on a copy of a slice from a DataFrame

See the caveats in the documentation: https://pandas.pydata.org/pandas-docs/stable/user_guide/indexing.html#returning-a-

A value is trying to be set on a copy of a slice from a DataFrame

See the caveats in the documentation: https://pandas.pydata.org/pandas-docs/stable/user_guide/indexing.html#returning-a-view-versus-a-copy
  dfs_by_class[i].drop(labels=target_name, axis=1, inplace=True)
A value is trying to be set on a copy of a slice from a DataFrame

See the caveats in the documentation: https://pandas.pydata.org/pandas-docs/stable/user_guide/indexing.html#returning-a-view-versus-a-copy
  dfs_by_class[i].drop(labels=target_name, axis=1, inplace=True)
A value is trying to be set on a copy of a slice from a DataFrame

See the caveats in the documentation: https://pandas.pydata.org/pandas-docs/stable/user_guide/indexing.html#returning-a-view-versus-a-copy
  dfs_by_class[i].drop(labels=target_name, axis=1, inplace=True)
A value is trying to be set on a copy of a slice from a DataFrame

See the caveats in the documentation: https://pandas.pydata.org/pandas-docs/stable/user_guide/indexing.html#returning-a-

A value is trying to be set on a copy of a slice from a DataFrame

See the caveats in the documentation: https://pandas.pydata.org/pandas-docs/stable/user_guide/indexing.html#returning-a-view-versus-a-copy
  dfs_by_class[i].drop(labels=target_name, axis=1, inplace=True)
A value is trying to be set on a copy of a slice from a DataFrame

See the caveats in the documentation: https://pandas.pydata.org/pandas-docs/stable/user_guide/indexing.html#returning-a-view-versus-a-copy
  dfs_by_class[i].drop(labels=target_name, axis=1, inplace=True)
A value is trying to be set on a copy of a slice from a DataFrame

See the caveats in the documentation: https://pandas.pydata.org/pandas-docs/stable/user_guide/indexing.html#returning-a-view-versus-a-copy
  dfs_by_class[i].drop(labels=target_name, axis=1, inplace=True)
A value is trying to be set on a copy of a slice from a DataFrame

See the caveats in the documentation: https://pandas.pydata.org/pandas-docs/stable/user_guide/indexing.html#returning-a-

A value is trying to be set on a copy of a slice from a DataFrame

See the caveats in the documentation: https://pandas.pydata.org/pandas-docs/stable/user_guide/indexing.html#returning-a-view-versus-a-copy
  dfs_by_class[i].drop(labels=target_name, axis=1, inplace=True)
A value is trying to be set on a copy of a slice from a DataFrame

See the caveats in the documentation: https://pandas.pydata.org/pandas-docs/stable/user_guide/indexing.html#returning-a-view-versus-a-copy
  dfs_by_class[i].drop(labels=target_name, axis=1, inplace=True)
A value is trying to be set on a copy of a slice from a DataFrame

See the caveats in the documentation: https://pandas.pydata.org/pandas-docs/stable/user_guide/indexing.html#returning-a-view-versus-a-copy
  dfs_by_class[i].drop(labels=target_name, axis=1, inplace=True)
A value is trying to be set on a copy of a slice from a DataFrame

See the caveats in the documentation: https://pandas.pydata.org/pandas-docs/stable/user_guide/indexing.html#returning-a-

A value is trying to be set on a copy of a slice from a DataFrame

See the caveats in the documentation: https://pandas.pydata.org/pandas-docs/stable/user_guide/indexing.html#returning-a-view-versus-a-copy
  dfs_by_class[i].drop(labels=target_name, axis=1, inplace=True)
A value is trying to be set on a copy of a slice from a DataFrame

See the caveats in the documentation: https://pandas.pydata.org/pandas-docs/stable/user_guide/indexing.html#returning-a-view-versus-a-copy
  dfs_by_class[i].drop(labels=target_name, axis=1, inplace=True)
A value is trying to be set on a copy of a slice from a DataFrame

See the caveats in the documentation: https://pandas.pydata.org/pandas-docs/stable/user_guide/indexing.html#returning-a-view-versus-a-copy
  dfs_by_class[i].drop(labels=target_name, axis=1, inplace=True)
A value is trying to be set on a copy of a slice from a DataFrame

See the caveats in the documentation: https://pandas.pydata.org/pandas-docs/stable/user_guide/indexing.html#returning-a-

A value is trying to be set on a copy of a slice from a DataFrame

See the caveats in the documentation: https://pandas.pydata.org/pandas-docs/stable/user_guide/indexing.html#returning-a-view-versus-a-copy
  dfs_by_class[i].drop(labels=target_name, axis=1, inplace=True)
A value is trying to be set on a copy of a slice from a DataFrame

See the caveats in the documentation: https://pandas.pydata.org/pandas-docs/stable/user_guide/indexing.html#returning-a-view-versus-a-copy
  dfs_by_class[i].drop(labels=target_name, axis=1, inplace=True)
A value is trying to be set on a copy of a slice from a DataFrame

See the caveats in the documentation: https://pandas.pydata.org/pandas-docs/stable/user_guide/indexing.html#returning-a-view-versus-a-copy
  dfs_by_class[i].drop(labels=target_name, axis=1, inplace=True)
A value is trying to be set on a copy of a slice from a DataFrame

See the caveats in the documentation: https://pandas.pydata.org/pandas-docs/stable/user_guide/indexing.html#returning-a-

A value is trying to be set on a copy of a slice from a DataFrame

See the caveats in the documentation: https://pandas.pydata.org/pandas-docs/stable/user_guide/indexing.html#returning-a-view-versus-a-copy
  dfs_by_class[i].drop(labels=target_name, axis=1, inplace=True)
A value is trying to be set on a copy of a slice from a DataFrame

See the caveats in the documentation: https://pandas.pydata.org/pandas-docs/stable/user_guide/indexing.html#returning-a-view-versus-a-copy
  dfs_by_class[i].drop(labels=target_name, axis=1, inplace=True)
A value is trying to be set on a copy of a slice from a DataFrame

See the caveats in the documentation: https://pandas.pydata.org/pandas-docs/stable/user_guide/indexing.html#returning-a-view-versus-a-copy
  dfs_by_class[i].drop(labels=target_name, axis=1, inplace=True)
A value is trying to be set on a copy of a slice from a DataFrame

See the caveats in the documentation: https://pandas.pydata.org/pandas-docs/stable/user_guide/indexing.html#returning-a-

A value is trying to be set on a copy of a slice from a DataFrame

See the caveats in the documentation: https://pandas.pydata.org/pandas-docs/stable/user_guide/indexing.html#returning-a-view-versus-a-copy
  dfs_by_class[i].drop(labels=target_name, axis=1, inplace=True)
A value is trying to be set on a copy of a slice from a DataFrame

See the caveats in the documentation: https://pandas.pydata.org/pandas-docs/stable/user_guide/indexing.html#returning-a-view-versus-a-copy
  dfs_by_class[i].drop(labels=target_name, axis=1, inplace=True)
A value is trying to be set on a copy of a slice from a DataFrame

See the caveats in the documentation: https://pandas.pydata.org/pandas-docs/stable/user_guide/indexing.html#returning-a-view-versus-a-copy
  dfs_by_class[i].drop(labels=target_name, axis=1, inplace=True)
A value is trying to be set on a copy of a slice from a DataFrame

See the caveats in the documentation: https://pandas.pydata.org/pandas-docs/stable/user_guide/indexing.html#returning-a-

A value is trying to be set on a copy of a slice from a DataFrame

See the caveats in the documentation: https://pandas.pydata.org/pandas-docs/stable/user_guide/indexing.html#returning-a-view-versus-a-copy
  dfs_by_class[i].drop(labels=target_name, axis=1, inplace=True)
A value is trying to be set on a copy of a slice from a DataFrame

See the caveats in the documentation: https://pandas.pydata.org/pandas-docs/stable/user_guide/indexing.html#returning-a-view-versus-a-copy
  dfs_by_class[i].drop(labels=target_name, axis=1, inplace=True)
A value is trying to be set on a copy of a slice from a DataFrame

See the caveats in the documentation: https://pandas.pydata.org/pandas-docs/stable/user_guide/indexing.html#returning-a-view-versus-a-copy
  dfs_by_class[i].drop(labels=target_name, axis=1, inplace=True)
A value is trying to be set on a copy of a slice from a DataFrame

See the caveats in the documentation: https://pandas.pydata.org/pandas-docs/stable/user_guide/indexing.html#returning-a-

A value is trying to be set on a copy of a slice from a DataFrame

See the caveats in the documentation: https://pandas.pydata.org/pandas-docs/stable/user_guide/indexing.html#returning-a-view-versus-a-copy
  dfs_by_class[i].drop(labels=target_name, axis=1, inplace=True)
A value is trying to be set on a copy of a slice from a DataFrame

See the caveats in the documentation: https://pandas.pydata.org/pandas-docs/stable/user_guide/indexing.html#returning-a-view-versus-a-copy
  dfs_by_class[i].drop(labels=target_name, axis=1, inplace=True)
A value is trying to be set on a copy of a slice from a DataFrame

See the caveats in the documentation: https://pandas.pydata.org/pandas-docs/stable/user_guide/indexing.html#returning-a-view-versus-a-copy
  dfs_by_class[i].drop(labels=target_name, axis=1, inplace=True)
A value is trying to be set on a copy of a slice from a DataFrame

See the caveats in the documentation: https://pandas.pydata.org/pandas-docs/stable/user_guide/indexing.html#returning-a-

In [38]:
performance_dict, avg_cm = res
print_performance_metrics(performance_dict)
avg_cm

Mean of training_accuracy: 1.0
Standard deviation of training_accuracy: 0.0
Mean of testing_accuracy: 0.9987155963302753
Standard deviation of testing_accuracy: 0.003674309063211142
Mean of AUC_score: 1.0
Standard deviation of AUC_score: 0.0
Mean of recall_class0: 0.9987754123957119
Standard deviation of recall_class0: 0.005378401189148913
Mean of recall_class1: 0.999167352972231
Standard deviation of recall_class1: 0.004754107767493109
Mean of recall_class2: 0.9983350684403317
Standard deviation of recall_class2: 0.007596251711156925
Mean of precision_class0: 0.9981776681698293
Standard deviation of precision_class0: 0.0066817673888893035
Mean of precision_class1: 0.9987916187815381
Standard deviation of precision_class1: 0.005932841262733751
Mean of precision_class2: 0.999031746031746
Standard deviation of precision_class2: 0.005535769755653131
Mean of f1_class0: 0.9984589708670006
Standard deviation of f1_class0: 0.004467575605368219
Mean of f1_class1: 0.9989642724796228
Standard de

Unnamed: 0,Predicted Class 0,Predicted Class 1,Predicted Class 2
Actual Class 0,36.57,0.03,0.02
Actual Class 1,0.02,36.55,0.01
Actual Class 2,0.05,0.01,35.74
