In [29]:
import wandb
import pandas as pd
import numpy as np
import bokeh
from bokeh.plotting import figure, show
from bokeh.io import output_notebook
from bokeh.models import ColumnDataSource, Whisker
from bokeh.transform import factor_cmap, jitter
from bokeh.layouts import gridplot


In [30]:
api = wandb.Api()
runs = api.runs("hmr24/FundusDRGrading-src_fundusClassif_scripts")

important_keys = ['lr','preprocessing', 'data_augmentation_type']
list_configs = []
for i, r in enumerate(runs):
    if i < 55:
        pass
    else :
        kappa = r.summary_metrics.get("Validation Quadratic Kappa", None)
        if kappa is None:
            continue
        config = {k: v for k, v in r.config.items() if k in important_keys}
        config['name'] = r.name
        config['kappa'] = kappa
    
        list_configs.append(config)

df = pd.DataFrame(list_configs)
df

Unnamed: 0,lr,preprocessing,data_augmentation_type,name,kappa
0,1e-05,absent,light,celestial-sweep-1,0.947246
1,1e-05,autobalance,light,scarlet-sweep-2,0.939814
2,1e-05,clahe_lab,light,rare-sweep-3,0.948831
3,1e-05,clahe_rgb,light,flowing-sweep-4,0.947908
4,1e-05,clahe_max_green_gsc,light,icy-sweep-5,0.94448
5,1e-05,seoud,light,fresh-sweep-6,0.928836
6,1e-05,sarki,light,fanciful-sweep-7,0.941713
7,5e-05,absent,light,fallen-sweep-8,0.919412
8,5e-05,autobalance,light,brisk-sweep-9,0.948844
9,5e-05,clahe_lab,light,curious-sweep-10,0.956653


In [31]:
import statsmodels
import statsmodels.api as sm
import statsmodels.api
import statsmodels.formula
import statsmodels.formula.api


def get_graph(df, key):
    df.sort_values(key, inplace=True)
    result = statsmodels.formula.api.ols(f'kappa ~ {key}', data=df).fit()
    table = statsmodels.api.stats.anova_lm(result)
    stats_test = table.loc[key, 'PR(>F)']
    g = df.groupby(key)
    groups = g.groups
    order_of_groups = list(groups.keys())
    print("Ordre des groupes :", order_of_groups)

    upper = g.kappa.quantile(0.90)
    lower = g.kappa.quantile(0.10)

    datasource = ColumnDataSource(df)
    output_notebook()
    
    classes = list(df[key].unique())
    classes.sort()
    print(classes)
    

    p = figure(height=500, x_range=classes, title=f"Kappa by {key.upper()}", tools='')

    source = ColumnDataSource(data=dict(base=classes,upper=upper, lower=lower, middle=g.kappa.mean()))

    error = Whisker(base="base", upper="upper", lower="lower", source=source,
                    level="annotation", line_width=2, line_alpha=.5)
    middle = Whisker(base="base", upper="middle", lower="middle", source=source, line_width=4, line_alpha=0.5)
    
    middle.upper_head.size=20
    middle.lower_head.size=20
    
    p.add_layout(middle)
    
    error.upper_head.size=10
    error.lower_head.size=10
    
    p.add_layout(error)

    p.scatter(jitter(key, 0.1, range=p.x_range), y='kappa', 
            source=datasource, 
            size=10, 
            line_color="white",
            color=factor_cmap(key, "Light7", classes),
            alpha=0.8)
    return p


p_ema = get_graph(df, 'data_augmentation_type')


grid = gridplot([[p_ema]])
show(grid)

Ordre des groupes : ['light']


['light']


In [32]:
p = get_graph(df, 'preprocessing')

show(p)

Ordre des groupes : ['absent', 'autobalance', 'clahe_lab', 'clahe_max_green_gsc', 'clahe_rgb', 'sarki', 'seoud']


['absent', 'autobalance', 'clahe_lab', 'clahe_max_green_gsc', 'clahe_rgb', 'sarki', 'seoud']
