### Gráficos

In [1]:
import pandas as pd
import plotly.express as px

In [2]:
heart_ga = pd.read_csv('./final_results_heart/FINAL_RESULT_GA_HEART.csv')[['Algorithm', 'Accuracy', 'F1', 'AUC']]
heart_pso = pd.read_csv('./final_results_heart/FINAL_RESULT_PSO_HEART.csv')[['Algorithm', 'Accuracy', 'F1', 'AUC']]
heart_optuna = pd.read_csv('./final_results_heart/FINAL_RESULT_OPTUNA_HEART.csv')[['Algorithm', 'Accuracy', 'F1', 'AUC']]

### fruit
fruit_ga = pd.read_csv('./final_results_fruit/FINAL_RESULT_GA_FRUIT.csv')[['Algorithm', 'Accuracy', 'F1', 'AUC']]
fruit_pso = pd.read_csv('./final_results_fruit/FINAL_RESULT_PSO_FRUIT.csv')[['Algorithm', 'Accuracy', 'F1', 'AUC']]
fruit_optuna = pd.read_csv('./final_results_fruit/FINAL_RESULT_OPTUNA_FRUIT.csv')[['Algorithm', 'Accuracy', 'F1', 'AUC']]

In [3]:
fig = px.histogram(heart_ga, x='F1', nbins=8, labels={'x':'F1 Score', 'y':'count'},color_discrete_sequence=['teal'],
                   title="Histogram F1 Score Obtained by Genetic Algorithm Optimization")
mean = heart_ga['F1'].mean()
median = heart_ga['F1'].median()
fig.update_layout(bargap=0.2, width=800, height=600)
fig.update_layout(template = 'plotly_dark')
fig.add_vline(x=mean, line_width=3, line_dash="solid", line_color='turquoise', annotation_text='Mean', annotation_x=0.8993)
#fig.add_scatter(x = heart_ga['F1'], y=heart_ga['F1'], mode='lines')
#['solid', 'dot', 'dash', 'longdash', 'dashdot', 'longdashdot']
'''
sandybrown, seagreen, seashell, sienna, silver,
            skyblue, slateblue, slategray, slategrey, snow,
            springgreen, steelblue, tan, teal, thistle, tomato,
            turquoise, violet, wheat, white, whitesmoke,
            yellow, yellowgreen
'''
fig.add_vline(x=median, line_width=3, line_dash="dash", line_color="wheat",annotation_text='Median', annotation_x=0.898)
fig.show()

In [4]:

import plotly.figure_factory as ff
import plotly.graph_objects as go
import plotly.express as px
import numpy as np

hist_data = [heart_ga['F1'], heart_pso['F1'], heart_optuna['F1']]
group_labels = ['GA', 'PSO', 'Optuna'] # name of the dataset


fig = ff.create_distplot(hist_data, group_labels, curve_type='kde', bin_size=.005, show_hist=True)
fig.update_layout(template = 'plotly_dark')
fig.update_layout(bargap=0.1, width=800, height=600, title='Distribution Plot Heart Dataset F1 Score')
fig.show()

In [5]:

hist_data = [heart_ga['Accuracy'], heart_pso['Accuracy'], heart_optuna['Accuracy']]
group_labels = ['GA', 'PSO', 'Optuna'] # name of the dataset


fig = ff.create_distplot(hist_data, group_labels, curve_type='kde', bin_size=.005, show_hist=True)
fig.update_layout(template = 'plotly_dark')
fig.update_layout(bargap=0.1, width=800, height=600, title='Distribution Plot Heart Dataset Accuracy Score')
fig.show()

In [6]:
hist_data = [heart_ga['AUC'], heart_pso['AUC'], heart_optuna['AUC']]
group_labels = ['GA', 'PSO', 'Optuna'] # name of the dataset


fig = ff.create_distplot(hist_data, group_labels, curve_type='kde', bin_size=.005, show_hist=True)
fig.update_layout(template = 'plotly_dark')
fig.update_layout(bargap=0.1, width=800, height=600, title='Distribution Plot Heart Dataset AUC Score')
fig.show()

In [7]:

df = pd.DataFrame(dict(
    GA=heart_ga['F1'],
    PSO=heart_pso['F1'],
    Optuna=heart_optuna['F1']
)).melt(var_name="Algorithm")


fig = px.box(df, y="value", facet_col="Algorithm", color="Algorithm",
             boxmode="overlay", points='all')
fig.update_layout(template = 'plotly_dark')
fig.update_traces(quartilemethod="exclusive", jitter=0, col=1)
fig.update_traces(quartilemethod="exclusive", jitter=0, col=2)
fig.update_traces(quartilemethod="exclusive", jitter=0, col=3)
fig.update_layout(bargap=0.1, width=800, height=600, title='BoxPlot Heart Dataset F1 Score')
fig.show()

### Teste Mann-Whitney U Test

In [37]:
from scipy.stats import mannwhitneyu

# Teste entre GA e PSO com a métrica F1

#Hipotese nula: as duas distribuições não tem diferença estatística
#Hipótese alternativa: as duas distribuções são diferentes
def realiza_teste(amostra1, amostra2, metrica, dataset_name):
    U1, p = mannwhitneyu(amostra1[str(metrica)], amostra2[str(metrica)], method="asymptotic")
    print('=='*22)
    print(f'GA x Optuna - {str(metrica)} score - {dataset_name} Dataset')
    print('=='*22)
    print(f'U1, sendo 1 o algoritmo genético: {U1}, \np-value correspondente: {p}')

    nx, ny = len(amostra1[str(metrica)]), len(amostra2[str(metrica)])
    U2 = nx*ny - U1
    print(f'U2, sendo 2 optuna: {U2}')

In [39]:
realiza_teste(heart_ga, heart_optuna, 'F1', 'Heart')

GA x Optuna - F1 score - Heart Dataset
U1, sendo 1 o algoritmo genético: 1718.0, 
p-value correspondente: 8.80862961794707e-06
U2, sendo 2 optuna: 526.0


In [40]:
realiza_teste(heart_ga, heart_optuna, 'Accuracy', 'Heart')

GA x Optuna - Accuracy score - Heart Dataset
U1, sendo 1 o algoritmo genético: 1502.0, 
p-value correspondente: 0.004608907386193569
U2, sendo 2 optuna: 742.0


In [41]:
realiza_teste(heart_ga, heart_optuna, 'AUC', 'Heart')

GA x Optuna - AUC score - Heart Dataset
U1, sendo 1 o algoritmo genético: 696.0, 
p-value correspondente: 0.0014946159721076434
U2, sendo 2 optuna: 1548.0


In [42]:
realiza_teste(fruit_ga, fruit_optuna, 'F1', 'Fruit')

GA x Optuna - F1 score - Fruit Dataset
U1, sendo 1 o algoritmo genético: 1033.0, 
p-value correspondente: 3.890985978647318e-10
U2, sendo 2 optuna: 56.0


In [43]:
realiza_teste(fruit_ga, fruit_optuna, 'Accuracy', 'Fruit')

GA x Optuna - Accuracy score - Fruit Dataset
U1, sendo 1 o algoritmo genético: 985.5, 
p-value correspondente: 1.6088620262274905e-08
U2, sendo 2 optuna: 103.5


In [44]:
realiza_teste(fruit_ga, fruit_optuna, 'AUC', 'Fruit')

GA x Optuna - AUC score - Fruit Dataset
U1, sendo 1 o algoritmo genético: 1062.0, 
p-value correspondente: 3.3512155113591304e-11
U2, sendo 2 optuna: 27.0
