In [22]:
import wandb
import pandas as pd
import numpy as np
import bokeh
from bokeh.plotting import figure, show
from bokeh.io import output_notebook
from bokeh.models import ColumnDataSource, Whisker
from bokeh.transform import factor_cmap, jitter
from bokeh.layouts import gridplot


In [23]:
api = wandb.Api()
runs = api.runs("hmr24/FundusDRGrading-src_fundusClassif_scripts")

important_keys = ['lr', 'ema', 'swa', 'mixup', 'preprocessing', 'as_regression']
list_configs = []
for i, r in enumerate(runs):
    if i < 55:
        kappa = r.summary_metrics.get("Validation Quadratic Kappa", None)
        if kappa is None:
            continue
        config = {k: v for k, v in r.config.items() if k in important_keys}
        config['name'] = r.name
        config['kappa'] = kappa
    
        list_configs.append(config)
    else :
        pass
    
    
df = pd.DataFrame(list_configs)

df['ema'] = df['ema'].map({1: 'EMA', 0: 'No EMA'})
df['swa'] = df['swa'].map({1: 'SWA', 0: 'No SWA'})
df['mixup'] = df['mixup'].map({1: 'Mixup', 0: 'No Mixup'})
df['as_regression'] = df['as_regression'].map({1: 'Regression', 0: 'Classification'})
df


Unnamed: 0,lr,ema,swa,mixup,as_regression,preprocessing,name,kappa
0,3.4e-05,No EMA,SWA,Mixup,Classification,autobalance,fiery-sweep-1,0.931952
1,2.5e-05,No EMA,SWA,No Mixup,Classification,seoud,logical-sweep-2,0.943428
2,4.9e-05,EMA,No SWA,No Mixup,Regression,sarki,deft-sweep-4,0.749284
3,2.5e-05,EMA,SWA,No Mixup,Classification,clahe_rgb,brisk-sweep-5,0.76577
4,4.1e-05,EMA,SWA,Mixup,Classification,clahe_lab,effortless-sweep-6,0.705447
5,2.6e-05,EMA,SWA,Mixup,Classification,sarki,skilled-sweep-7,0.626413
6,3.9e-05,EMA,No SWA,Mixup,Classification,clahe_lab,resilient-sweep-8,0.687334
7,1.7e-05,EMA,No SWA,No Mixup,Regression,clahe_rgb,glamorous-sweep-11,0.703558
8,2.8e-05,EMA,SWA,Mixup,Classification,absent,avid-sweep-12,0.721227
9,3.7e-05,EMA,SWA,Mixup,Classification,clahe_lab,solar-sweep-13,0.704996


In [24]:
import statsmodels
import statsmodels.api as sm
import statsmodels.api
import statsmodels.formula
import statsmodels.formula.api

def map_pvalue_to_star(p):
    sym = "⋆"
    if p < 0.001:
        return sym*3
    if p < 0.01:
        return sym*2
    if p < 0.05:
        return sym
    return ""

def get_graph(df, key):
    df.sort_values(key, inplace=True)
    result = statsmodels.formula.api.ols(f'kappa ~ {key}', data=df).fit()
    table = statsmodels.api.stats.anova_lm(result)
    stats_test = table.loc[key, 'PR(>F)']
    pvalue = stats_test.round(3)
    #pvalue_star = map_pvalue_to_star(stats_test)
    g = df.groupby(key)
    groups = g.groups
    order_of_groups = list(groups.keys())
    print("Ordre des groupes :", order_of_groups)

    upper = g.kappa.quantile(0.90)
    lower = g.kappa.quantile(0.10)

    datasource = ColumnDataSource(df)
    output_notebook()
    
    classes = list(df[key].unique())
    classes.sort()
    print(classes)
    

    p = figure(height=500, x_range=classes, title=f"Kappa by {key.upper()}", tools='')

    source = ColumnDataSource(data=dict(base=classes,upper=upper, lower=lower, middle=g.kappa.mean()))

    error = Whisker(base="base", upper="upper", lower="lower", source=source,
                    level="annotation", line_width=2, line_alpha=.5)
    middle = Whisker(base="base", upper="middle", lower="middle", source=source, line_width=4, line_alpha=0.5)
    
    middle.upper_head.size=20
    middle.lower_head.size=20
    
    p.add_layout(middle)
    
    error.upper_head.size=10
    error.lower_head.size=10
    #p.text(x=1, y=.965, text=[pvalue_star], text_align='center', text_baseline='middle', text_font_size='12pt')
    #p.text(x=1, y=.95, text=[f'p = {pvalue}'], text_align='center', text_baseline='middle', text_font_size='12pt')
    p.add_layout(error)

    p.scatter(jitter(key, 0.1, range=p.x_range), y='kappa', 
            source=datasource, 
            size=10, 
            line_color="white",
            color=factor_cmap(key, "Light7", classes),
            alpha=0.8)
    return p


p_ema = get_graph(df, 'ema')
p_mixup = get_graph(df, 'mixup')
p_swa = get_graph(df, 'swa')
p_as_regression = get_graph(df, 'as_regression')

grid = gridplot([[p_as_regression, p_swa], [p_mixup, p_ema]])
show(grid)

Ordre des groupes : ['EMA', 'No EMA']


['EMA', 'No EMA']
Ordre des groupes : ['Mixup', 'No Mixup']


['Mixup', 'No Mixup']
Ordre des groupes : ['No SWA', 'SWA']


['No SWA', 'SWA']
Ordre des groupes : ['Classification', 'Regression']


['Classification', 'Regression']


In [25]:
p = get_graph(df, 'preprocessing')

show(p)

Ordre des groupes : ['absent', 'autobalance', 'clahe_lab', 'clahe_max_green_gsc', 'clahe_rgb', 'sarki', 'seoud']


['absent', 'autobalance', 'clahe_lab', 'clahe_max_green_gsc', 'clahe_rgb', 'sarki', 'seoud']
