In [27]:
import plotly.graph_objects as go
import pandas as pd
from plot_functions import paper_boxplot_comparison, paper_boxplot_comparison_single_metric, paper_boxplot_method_comparison
import numpy as np

In [28]:
process_rwr = pd.read_csv('../../models/GAP-MINE/process/cv_results/rwr.txt', sep=' ', names=['mean','std'])
disease_rwr = pd.read_csv('../../models/GAP-MINE/disease/cv_results/disease_rwr.txt', sep=' ', names=['mean','std'])
disease_conservative_rwr = pd.read_csv('../../models/GAP-MINE/disease/cv_results/disease_rwr_conservative.txt', sep=' ', names=['mean','std'])

In [29]:
process_rwr_test = pd.read_csv('../../models/GAP-MINE/process/probability/rwr_lgr_proba.csv')
disease_rwr_test = pd.read_csv('../../models/GAP-MINE/disease/probability/disease_rwr_lgr_proba.csv')
disease_conservative_rwr_test = pd.read_csv('../../models/GAP-MINE/disease/probability/disease_rwr_lgr_proba_conservative.csv')

In [30]:
process_rwr['coef'] = process_rwr['std']/process_rwr['mean'] 
disease_rwr['coef'] = disease_rwr['std']/disease_rwr['mean'] 
disease_conservative_rwr['coef'] = disease_conservative_rwr['std']/disease_conservative_rwr['mean']

In [31]:
process_rwr['name'] = 'Process'
process_rwr['cv'] = 'CV Validation Set'
disease_rwr['name'] = 'SCA'
disease_rwr['cv'] = 'CV Validation Set'
disease_conservative_rwr['name'] = 'Conservative'
disease_conservative_rwr['cv'] = 'CV Validation Set'

In [44]:
prox_metrics = {
    'CV Validation Sets':{'Process': process_rwr,
                'SCA': disease_rwr,
                'Conservative': disease_conservative_rwr},
    'Test Set':{'Process': process_rwr_test,
                'SCA': disease_rwr_test,
                'Conservative': disease_conservative_rwr_test}}

clf_dict = {'clf': [], 'module': [], 'score': []}

for clf, metric_dict in prox_metrics.items():
    for method, df in metric_dict.items():
        for i, row in df.iterrows():
            clf_dict['clf'].append(clf)
            clf_dict['module'].append(method)
            if clf == 'CV Validation Sets':
                clf_dict['score'].append(row['mean'])
            else: 
                clf_dict['score'].append(row['f_measure'])
df = pd.DataFrame(clf_dict)

In [45]:
colors=['#59C3C3', '#FDA96D']    
traces = []
for i, md in enumerate(df.clf.unique()):
    df_plot = df[(df.clf == md)]
    trace = go.Box(
            x=df_plot.module,
            y=df_plot.score,
            boxpoints='outliers',
            name=md,
            fillcolor=colors[i],
            line_color='#000000',
            line_width=3,
            marker=dict(size=20, color=colors[i]))
    traces.append(trace)
fig = go.Figure()
fig.add_traces(traces)
fig.update_layout(
        boxmode="group",
        yaxis_title_text='Score',
        height=1000,
        width=2000,
        title_x=0.5,
        legend=dict(font=dict(size=60, color='black'), yanchor="bottom",
                    y=1.01, xanchor="right", x=0.99, orientation='h'),
        xaxis=dict(tickfont=dict(size=60), showline=True, linewidth=3,
                   linecolor='black', mirror=True, color='black'),
        yaxis=dict(tickfont=dict(size=60), titlefont=dict(size=60), title_standoff=30,
                   showline=True, linewidth=3, linecolor='black', mirror=True, color='black'),
        boxgroupgap=0.2,
        boxgap=0.1,
        paper_bgcolor='rgba(255,255,255,1)',
        plot_bgcolor='rgba(255,255,255,1)',
        margin=dict(l=20, r=20, t=25, b=20),
        showlegend=True)
fig.write_image('../../../python/reports/plots/cv_average_fmeasure.jpeg')
fig.show()

In [59]:
traces = []
for df in [process_rwr, disease_rwr, disease_conservative_rwr]:
    trace = go.Box(
            x=df.name,
            y=df['coef'],
            boxpoints='outliers',
            fillcolor='#59C3C3',
            line_color='#000000',
            line_width=3,
            marker=dict(size=20, color='#59C3C3'))
    traces.append(trace)
fig = go.Figure()
fig.add_traces(traces)
fig.update_layout(
        yaxis_title_text='CV Coeffient of Variation',
        height=1000,
        width=2000,
        title_x=0.5,
        xaxis=dict(tickfont=dict(size=60), showline=True, linewidth=3,
                   linecolor='black', mirror=True, color='black'),
        yaxis=dict(tickfont=dict(size=60), titlefont=dict(size=60), title_standoff=30,
                   showline=True, linewidth=3, linecolor='black', mirror=True, color='black'),
        boxgroupgap=0.2,
        boxgap=0.1,
        paper_bgcolor='rgba(255,255,255,1)',
        plot_bgcolor='rgba(255,255,255,1)',
        margin=dict(l=20, r=20, t=25, b=20),
        showlegend=False)
fig.write_image('../../../python/reports/plots/cv_coef_variation.jpeg')
fig.show()