In [1]:
import os
import warnings
from pathlib import Path
from IPython.display import clear_output
from IPython.display import display
from pandas.api.types import CategoricalDtype

# Basic libraries
import matplotlib.pyplot as plt
import numpy as np
import pandas as pd
import pandas_profiling as pp
import seaborn as sns

# Clustering
from sklearn.cluster import KMeans

# Principal Component Analysis (PCA)
from sklearn.decomposition import PCA

#Mutual Information
from sklearn.feature_selection import mutual_info_regression

# Cross Validation
from sklearn.model_selection import KFold, cross_val_score, StratifiedKFold, learning_curve, train_test_split

# Encoders
from category_encoders import MEstimateEncoder
from sklearn.preprocessing import LabelEncoder
from category_encoders import MEstimateEncoder

# Algorithms
from xgboost import XGBRegressor
from lightgbm import LGBMRegressor
from catboost import CatBoostRegressor
from sklearn.ensemble import GradientBoostingRegressor, ExtraTreesRegressor

# Optuna - Bayesian Optimization 
import optuna
from optuna.samplers import TPESampler

# Plotly
import plotly.express as px
from plotly.subplots import make_subplots
import plotly.figure_factory as ff
import plotly.offline as offline
import plotly.graph_objs as go

# Metric
from sklearn.metrics import mean_absolute_error as mae

# Permutation Importance
import eli5
from eli5.sklearn import PermutationImportance

In [2]:
t_class = [0.85, 0.77, 0.83, 0.91, 0.86]
t_red = [0,0,0,0,0]
tiempo = [0.00379,0.00538,0.00486,0.00273,0.00460]
ionosphere_1nn = pd.DataFrame({'T_class':t_class, 'T_red':t_red,'Tiempo':tiempo})
ionosphere_1nn['Agr'] = ionosphere_1nn['T_class']/2
ionosphere_1nn['KFold'] = [1,2,3,4,5]
ionosphere_1nn['Algoritm'] = '1-NN'

t_class = [0.86, 0.79, 0.80, 0.91, 0.86]
t_red = [0.03,0.03,0.09,0.03,0.03]
tiempo = [0.01411,0.01957,0.02395,0.02126,0.01514]
ionosphere_relief = pd.DataFrame({'T_class':t_class, 'T_red':t_red,'Tiempo':tiempo})
ionosphere_relief['Agr'] = (ionosphere_relief['T_class']+ionosphere_relief['T_red'])/2
ionosphere_relief['KFold'] = [1,2,3,4,5]
ionosphere_relief['Algoritm'] = 'Greedy Relief'

t_class = [0.89, 0.84, 0.86, 0.91, 0.9]
t_red = [0.79,0.91,0.82,0.88,0.85]
tiempo = [29.36188,53.92413,23.91487,34.33943,43.97262]
ionosphere_ls = pd.DataFrame({'T_class':t_class, 'T_red':t_red,'Tiempo':tiempo})
ionosphere_ls['Agr'] = (ionosphere_ls['T_class']+ionosphere_ls['T_red'])/2
ionosphere_ls['KFold'] = [1,2,3,4,5]
ionosphere_ls['Algoritm'] = 'Local Search'
ionosphere=pd.concat([ionosphere_1nn, ionosphere_relief, ionosphere_ls])

In [3]:
t_class = [0.72, 0.82, 0.95, 0.74, 0.67]
t_red = [0,0,0,0,0]
tiempo = [0.00099,0.00098,0.00099,0.00099,0.00099]
parkinson_1nn = pd.DataFrame({'T_class':t_class, 'T_red':t_red,'Tiempo':tiempo})
parkinson_1nn['Agr'] = parkinson_1nn['T_class']/2
parkinson_1nn['KFold'] = [1,2,3,4,5]
parkinson_1nn['Algoritm'] = '1-NN'

t_class = [0.72, 0.82, 0.95, 0.69, 0.67]
t_red = [0.05,0.0,0.05,0.05,0.0]
tiempo = [0.00733,0.00480,0.00478,0.00481,0.00481]
parkinson_relief = pd.DataFrame({'T_class':t_class, 'T_red':t_red,'Tiempo':tiempo})
parkinson_relief['Agr'] = (parkinson_relief['T_class']+parkinson_relief['T_red'])/2
parkinson_relief['KFold'] = [1,2,3,4,5]
parkinson_relief['Algoritm'] = 'Greedy Relief'

t_class = [0.72, 0.77, 0.90, 0.69, 0.69]
t_red = [1,0.91,1,1,0.86]
tiempo = [10.67630,4.41078,8.15252,11.47030,4.23660]
parkinson_ls = pd.DataFrame({'T_class':t_class, 'T_red':t_red,'Tiempo':tiempo})
parkinson_ls['Agr'] = (parkinson_ls['T_class']+parkinson_ls['T_red'])/2
parkinson_ls['KFold'] = [1,2,3,4,5]
parkinson_ls['Algoritm'] = 'Local Search'
parkinson=pd.concat([parkinson_1nn, parkinson_relief, parkinson_ls])

In [4]:
t_class = [0.97, 0.90, 0.71, 0.77, 0.99]
t_red = [0,0,0,0,0]
tiempo = [0.00308,0.00314,0.00322,0.00306,0.00307]
heart_1nn = pd.DataFrame({'T_class':t_class, 'T_red':t_red,'Tiempo':tiempo})
heart_1nn['Agr'] = heart_1nn['T_class']/2
heart_1nn['KFold'] = [1,2,3,4,5]
heart_1nn['Algoritm'] = '1-NN'

t_class = [0.97, 0.91, 0.74, 0.74, 0.99]
t_red = [0.11,0.0,0.0,0.0,0.09]
tiempo = [0.01292,0.01299,0.01318,0.01554,0.01282]
heart_relief = pd.DataFrame({'T_class':t_class, 'T_red':t_red,'Tiempo':tiempo})
heart_relief['Agr'] = (heart_relief['T_class']+heart_relief['T_red'])/2
heart_relief['KFold'] = [1,2,3,4,5]
heart_relief['Algoritm'] = 'Greedy Relief'

t_class = [0.96, 0.90, 0.73, 0.84, 0.99]
t_red = [0.7,0.82,0.89,0.89,0.89]
tiempo = [58.32199,45.53995,63.36239,75.65064,54.88951]
heart_ls = pd.DataFrame({'T_class':t_class, 'T_red':t_red,'Tiempo':tiempo})
heart_ls['Agr'] = (heart_ls['T_class']+heart_ls['T_red'])/2
heart_ls['KFold'] = [1,2,3,4,5]
heart_ls['Algoritm'] = 'Local Search'
heart=pd.concat([heart_1nn, heart_relief, heart_ls])

In [5]:
def plot_box(fig, df, feature, r, c, color_number):
    fig.add_trace(go.Box(x=df['Algoritm'].astype(object), y=df[feature], marker = dict(color= px.colors.sequential.Viridis_r[color_number])), row =r, col = c)
    fig.update_xaxes(showgrid = False, showline = True, linecolor = 'gray', linewidth = 2, zeroline = False,row = r, col = c)
    fig.update_yaxes(showgrid = False, gridcolor = 'gray', gridwidth = 0.5, showline = True, linecolor = 'gray', linewidth = 2, row = r, col = c)
    
fig = make_subplots(rows=1, cols=3, column_widths=[0.34, 0.33, 0.33], 
                    vertical_spacing=0.1, horizontal_spacing=0.1, subplot_titles=('Ionosphere T_Class','Parkinson T_Class','Heart-spectf T_Class',))

plot_box(fig, ionosphere[ionosphere.Algoritm == '1-NN'], 'T_class', 1, 1, 4)
plot_box(fig, ionosphere[ionosphere.Algoritm == 'Greedy Relief'], 'T_class', 1, 1, 6)
plot_box(fig, ionosphere[ionosphere.Algoritm == 'Local Search'], 'T_class', 1, 1, 8)

plot_box(fig, parkinson[parkinson.Algoritm == '1-NN'], 'T_class', 1, 2, 4)
plot_box(fig, parkinson[parkinson.Algoritm == 'Greedy Relief'], 'T_class', 1, 2, 6)
plot_box(fig, parkinson[parkinson.Algoritm == 'Local Search'], 'T_class', 1, 2, 8)

plot_box(fig, heart[heart.Algoritm == '1-NN'], 'T_class', 1, 3, 4)
plot_box(fig, heart[heart.Algoritm == 'Greedy Relief'], 'T_class', 1, 3, 6)
plot_box(fig, heart[heart.Algoritm == 'Local Search'], 'T_class', 1, 3, 8)

# General Styling
fig.update_layout(height=500, bargap=0.2,
                  margin=dict(b=50,r=30,l=100),
                  title = "<span style='font-size:36px; font-family:Times New Roman'>Classification Rate Analysis</span>",                  
                  plot_bgcolor='rgb(242,242,242)',
                  paper_bgcolor = 'rgb(242,242,242)',
                  font=dict(family="Times New Roman", size= 14),
                  hoverlabel=dict(font_color="floralwhite"),
                  showlegend=False)

In [6]:
fig = make_subplots(rows=1, cols=3, column_widths=[0.34, 0.33, 0.33], 
                    vertical_spacing=0.1, horizontal_spacing=0.1, subplot_titles=('Ionosphere Agr','Parkinson Agr','Heart-spectf Agr'))

plot_box(fig, ionosphere[ionosphere.Algoritm == '1-NN'], 'Agr', 1, 1, 4)
plot_box(fig, ionosphere[ionosphere.Algoritm == 'Greedy Relief'], 'Agr', 1, 1, 6)
plot_box(fig, ionosphere[ionosphere.Algoritm == 'Local Search'], 'Agr', 1, 1, 8)

plot_box(fig, parkinson[parkinson.Algoritm == '1-NN'], 'Agr', 1, 2, 4)
plot_box(fig, parkinson[parkinson.Algoritm == 'Greedy Relief'], 'Agr', 1, 2, 6)
plot_box(fig, parkinson[parkinson.Algoritm == 'Local Search'], 'Agr', 1, 2, 8)

plot_box(fig, heart[heart.Algoritm == '1-NN'], 'Agr', 1, 3, 4)
plot_box(fig, heart[heart.Algoritm == 'Greedy Relief'], 'Agr', 1, 3, 6)
plot_box(fig, heart[heart.Algoritm == 'Local Search'], 'Agr', 1, 3, 8)

# General Styling
fig.update_layout(height=500, bargap=0.2,
                  margin=dict(b=50,r=30,l=100),
                  title = "<span style='font-size:36px; font-family:Times New Roman'>Objective Function Analysis</span>",                  
                  plot_bgcolor='rgb(242,242,242)',
                  paper_bgcolor = 'rgb(242,242,242)',
                  font=dict(family="Times New Roman", size= 14),
                  hoverlabel=dict(font_color="floralwhite"),
                  showlegend=False)

In [7]:
def plot_bar(fig, df, feature, r, c, color_number):
    fig.add_trace(go.Bar(x=df['KFold'], y=df[feature], marker = dict(color=px.colors.sequential.Viridis_r[color_number])), row =r, col = c)
    fig.update_xaxes(showgrid = False, showline = True, linecolor = 'gray', linewidth = 2, zeroline = False,row = r, col = c)
    fig.update_yaxes(showgrid = False, gridcolor = 'gray', gridwidth = 0.5, showline = True, linecolor = 'gray', linewidth = 2, row = r, col = c)
    
fig = make_subplots(rows=3, cols=1, vertical_spacing=0.1, horizontal_spacing=0.1, subplot_titles=('1-NN','Greedy Relief','Local Search'))

# Upper row -> 1-NN Analysis
plot_bar(fig, ionosphere_1nn, 'Tiempo', 1, 1, 0, )
plot_bar(fig, parkinson_1nn, 'Tiempo', 1, 1, 5)
plot_bar(fig, heart_1nn, 'Tiempo', 1, 1, 8)

# Middle row -> Greedy Relief Analysis
plot_bar(fig, ionosphere_relief, 'Tiempo', 2, 1, 0)
plot_bar(fig, parkinson_relief, 'Tiempo', 2, 1, 5)
plot_bar(fig, heart_relief, 'Tiempo', 2, 1, 8)

# Bottom row -> Local Search Analysis
plot_bar(fig, ionosphere_ls, 'Tiempo', 3, 1, 0)
plot_bar(fig, parkinson_ls, 'Tiempo', 3, 1, 5)
plot_bar(fig, heart_ls, 'Tiempo', 3, 1, 8)

# General Styling
fig.update_layout(height=750, bargap=0.2,
                  margin=dict(b=50,r=30,l=100),
                  title = "<span style='font-size:36px; font-family:Times New Roman'>Execution Time (seg) per KFold</span>",                  
                  plot_bgcolor='rgb(242,242,242)',
                  paper_bgcolor = 'rgb(242,242,242)',
                  font=dict(family="Times New Roman", size= 14),
                  hoverlabel=dict(font_color="floralwhite"),
                  showlegend=True)

fig.update_layout(legend=dict(
    orientation="h",
    yanchor="top",
    y=1.125,
    xanchor="right",
    x=1,
    #bgcolor="lightgray",
    #bordercolor="Black",
    font=dict(
        family="Times New Roman", size= 14
    #    color="black"
    ),
))