## Libraries

In [23]:
import pandas as pd
import plotly.express as px

from sklearn.metrics import accuracy_score, recall_score, precision_score, f1_score

from functions import confusion_matrix_generator

## Generating Results Dataframe

In [9]:
all_df = []

for number_of_execution in range(1,11):
    df = pd.read_csv(f'results_exec_{number_of_execution}.csv')
    all_df.append(df)

output = pd.concat(all_df).reset_index()

In [10]:
'''
Due the external saving of the files some unnecessary columns were inserted
'''
output.columns

Index(['index', 'Unnamed: 0', 'diagnosis', 'Error', 'Accuracy', 'Precision',
       'Recall', 'F1-Score', 'Support', 'K-Value'],
      dtype='object')

In [11]:
output.drop(columns=['Unnamed: 0','index'], inplace=True)

In [12]:
output.head()

Unnamed: 0,diagnosis,Error,Accuracy,Precision,Recall,F1-Score,Support,K-Value
0,Benign,0.096491,0.903509,0.87013,0.985294,0.924138,68.0,1
1,Malignant,0.096491,0.903509,0.972973,0.782609,0.86747,46.0,1
2,Benign,0.105263,0.894737,0.85,1.0,0.918919,68.0,2
3,Malignant,0.105263,0.894737,1.0,0.73913,0.85,46.0,2
4,Benign,0.096491,0.903509,0.88,0.970588,0.923077,68.0,3


## Results Analysis 

In [26]:
error_df = output.groupby('K-Value')['Error'].mean().to_frame()
fig = px.line(error_df, x=error_df.index, y='Error', color_discrete_sequence=['black'])
fig.update_layout(title_text=f'Error by K-Value', title_x=0.5) 
fig.update_xaxes(title_text='K-Value')
fig.show()

In [37]:
# Sorting mean by F1-Score
f1_score_df = output.groupby('K-Value')['F1-Score'].mean().to_frame()
fig = px.line(f1_score_df, x=f1_score_df.index, y='F1-Score', color_discrete_sequence=['black'])
fig.update_layout(title_text=f'F1-Score by K-Value', title_x=0.5) 
fig.update_xaxes(title_text='K-Value')
fig.show()

In [22]:
# Sorting mean by recall
recall = output.groupby('K-Value')['Recall'].mean().to_frame()
fig = px.line(recall, x=recall.index, y='Recall', color_discrete_sequence=['black'])
fig.update_layout(title_text=f'Error by K-Value', title_x=0.5) 
fig.update_xaxes(title_text='K-Value')
fig.show()

In [34]:
fig = px.box(output, x='K-Value', y='Recall')
fig.update_layout(title_text='Recall by K-Value (10 Executions)', title_x=0.5)
fig.show()

In [27]:
# Sorting mean by precision
output.groupby('K-Value')[['Precision','Recall','F1-Score']].mean().sort_values(by='Precision', ascending=False)

Unnamed: 0_level_0,Precision,Recall,F1-Score
K-Value,Unnamed: 1_level_1,Unnamed: 2_level_1,Unnamed: 3_level_1
5,0.8895,0.876,0.8795
6,0.889,0.866,0.873
9,0.8885,0.8775,0.88
4,0.8885,0.862,0.871
3,0.8875,0.874,0.8785
8,0.8865,0.865,0.8725
7,0.8855,0.875,0.876
2,0.8835,0.841,0.854
1,0.8595,0.8545,0.8555


In [35]:
fig = px.box(output, x='K-Value', y='Precision')
fig.update_layout(title_text='Precision by K-Value (10 Executions)', title_x=0.5)
fig.show()

In [28]:
# Sorting mean by f1-score
output.groupby('K-Value')[['Precision','Recall','F1-Score']].mean().sort_values(by='F1-Score', ascending=False)

Unnamed: 0_level_0,Precision,Recall,F1-Score
K-Value,Unnamed: 1_level_1,Unnamed: 2_level_1,Unnamed: 3_level_1
9,0.8885,0.8775,0.88
5,0.8895,0.876,0.8795
3,0.8875,0.874,0.8785
7,0.8855,0.875,0.876
6,0.889,0.866,0.873
8,0.8865,0.865,0.8725
4,0.8885,0.862,0.871
1,0.8595,0.8545,0.8555
2,0.8835,0.841,0.854


In [36]:
fig = px.box(output, x='K-Value', y='F1-Score')
fig.update_layout(title_text='F1-Score by K-Value (10 Executions)', title_x=0.5)
fig.show()