In [505]:
import wandb
import pandas as pd
import numpy as np
import bokeh
from bokeh.plotting import figure, show
from bokeh.io import output_notebook
from bokeh.models import ColumnDataSource, Whisker
from bokeh.transform import factor_cmap, jitter
from bokeh.layouts import gridplot

from fundusClassif.data.data_factory import get_datamodule_from_config
from nntools.utils import Config


In [506]:
api = wandb.Api()
runs = api.runs("hmr24/FundusDRGrading-src_fundusClassif_scripts")

important_keys = ['lr','preprocessing', 'data_augmentation_type']
list_configs = []
for i, r in enumerate(runs):
    if i < 74:
        pass
    else :
        kappa = r.summary_metrics.get("Validation Quadratic Kappa", None)
        kappa_ddr = r.summary_metrics.get("Quadratic Kappa_DDR_test", None)
        kappa_eyepacs = r.summary_metrics.get("Quadratic Kappa_EYEPACS_test", None)
        kappa_idrid = r.summary_metrics.get("Quadratic Kappa_IDRID_test", None)
        if kappa is None:
            continue
        config = {k: v for k, v in r.config.items() if k in important_keys}
        config['name'] = r.name
        config['kappa'] = kappa
        config['kappa_ddr'] = kappa_ddr
        config['kappa_eyepacs'] = kappa_eyepacs
        config['kappa_idrid'] = kappa_idrid
    
        list_configs.append(config)

df = pd.DataFrame(list_configs)
df

Unnamed: 0,lr,preprocessing,data_augmentation_type,name,kappa,kappa_ddr,kappa_eyepacs,kappa_idrid
0,0.00100,sarki,,upbeat-sweep-1,0.932644,0.658787,0.616012,0.688647
1,0.00100,absent,,sweepy-sweep-2,0.879902,0.573273,0.478262,0.637572
2,0.00100,autobalance,,royal-sweep-3,0.897062,0.623832,0.531265,0.662328
3,0.00100,clahe_lab,,dulcet-sweep-4,0.922191,0.648264,0.567507,0.586553
4,0.00100,clahe_rgb,,amber-sweep-5,0.923834,0.654336,0.614074,0.717875
...,...,...,...,...,...,...,...,...
65,0.00001,autobalance,light,proud-sweep-66,0.940386,0.699467,0.673494,0.730624
66,0.00001,clahe_lab,light,graceful-sweep-67,0.945784,0.729886,0.676506,0.669743
67,0.00001,clahe_rgb,light,lucky-sweep-68,0.945258,0.740294,0.697102,0.766788
68,0.00001,clahe_max_green_gsc,light,light-sweep-69,0.944662,0.752400,0.691975,0.726111


In [507]:
df_preprocessing_type = df[df['preprocessing'] == 'sarki']

config = Config('../configs/config.yaml')
datamodule = get_datamodule_from_config(config['datasets'], config['data'])
test_dataloader = datamodule.test_dataloader()
test_datasets_ids = [d.dataset.id for i, d in enumerate(test_dataloader)]
dataloader0_weight = 0
dataloader1_weight = 0
dataloader2_weight = 0
lenght_total_test = 0
for i, d in enumerate(test_dataloader):
    lenght_total_test += len(d.dataset)
#print(lenght_total_test)

for i, d in enumerate(test_dataloader):
    if d.dataset.id == test_datasets_ids[0]:
        dataloader0_weight = len(d.dataset)/lenght_total_test
        #print(dataloader0_weight)
        #print(d.dataset.id)
    elif d.dataset.id == test_datasets_ids[1]:
        dataloader1_weight += len(d.dataset)/lenght_total_test
        #print(dataloader1_weight)
        #print(d.dataset.id)
    elif d.dataset.id == test_datasets_ids[2]:
        dataloader2_weight += len(d.dataset)/lenght_total_test
        #print(dataloader2_weight)
        #print(d.dataset.id)

df_preprocessing_type['kappa_weighted'] = 0 
for i in range(len(df_preprocessing_type)):
    eyepacs = df_preprocessing_type['kappa_eyepacs'].iloc[i]*dataloader0_weight
    idrid = df_preprocessing_type['kappa_idrid'].iloc[i]*dataloader1_weight
    ddr = df_preprocessing_type['kappa_ddr'].iloc[i]*dataloader2_weight
    df_preprocessing_type['kappa_weighted'].iloc[i] =  eyepacs + idrid + ddr




A value is trying to be set on a copy of a slice from a DataFrame

See the caveats in the documentation: https://pandas.pydata.org/pandas-docs/stable/user_guide/indexing.html#returning-a-view-versus-a-copy
  df.sort_values(self.file_column, inplace=True)
A value is trying to be set on a copy of a slice from a DataFrame.
Try using .loc[row_indexer,col_indexer] = value instead

See the caveats in the documentation: https://pandas.pydata.org/pandas-docs/stable/user_guide/indexing.html#returning-a-view-versus-a-copy
  df_preprocessing_type['kappa_weighted'] = 0
You are setting values through chained assignment. Currently this works in certain cases, but when using Copy-on-Write (which will become the default behaviour in pandas 3.0) this will never work to update the original DataFrame or Series, because the intermediate object on which we are setting values will behave as a copy.
A typical example is when you are setting values in a column of a DataFrame, like:

df["col"][row_indexer] = v

In [508]:
from bokeh.models import ColumnDataSource, FactorRange
from bokeh.palettes import MediumContrast3
from bokeh.plotting import figure, show
from bokeh.transform import factor_cmap

#creer un dataframe avec les meilleurs runs pour chaque type de preprocessing
df = df[df['name'].isin(['comfy-sweep-51', 'elated-sweep-47', 'prime-sweep-53', 'copper-sweep-55', 'hopeful-sweep-52', 'driven-sweep-63', 'peachy-sweep-43'])]
df['kappa_weighted'] = 0 
for i in range(len(df)):
    eyepacs = df['kappa_eyepacs'].iloc[i]*dataloader0_weight
    idrid = df['kappa_idrid'].iloc[i]*dataloader1_weight
    ddr = df['kappa_ddr'].iloc[i]*dataloader2_weight
    df['kappa_weighted'].iloc[i] =  eyepacs + idrid + ddr

df

A value is trying to be set on a copy of a slice from a DataFrame.
Try using .loc[row_indexer,col_indexer] = value instead

See the caveats in the documentation: https://pandas.pydata.org/pandas-docs/stable/user_guide/indexing.html#returning-a-view-versus-a-copy
  df['kappa_weighted'] = 0
You are setting values through chained assignment. Currently this works in certain cases, but when using Copy-on-Write (which will become the default behaviour in pandas 3.0) this will never work to update the original DataFrame or Series, because the intermediate object on which we are setting values will behave as a copy.
A typical example is when you are setting values in a column of a DataFrame, like:

df["col"][row_indexer] = value

Use `df.loc[row_indexer, "col"] = values` instead, to perform the assignment in a single step and ensure this keeps updating the original `df`.

See the caveats in the documentation: https://pandas.pydata.org/pandas-docs/stable/user_guide/indexing.html#returning-a-vie

Unnamed: 0,lr,preprocessing,data_augmentation_type,name,kappa,kappa_ddr,kappa_eyepacs,kappa_idrid,kappa_weighted
42,0.0005,sarki,light,peachy-sweep-43,0.893344,0.749304,0.706002,0.769652,0.70895
46,0.0005,clahe_rgb,light,elated-sweep-47,0.879705,0.776353,0.73898,0.753452,0.741452
50,0.0001,absent,light,comfy-sweep-51,0.912008,0.775938,0.728072,0.739399,0.731225
51,0.0001,autobalance,light,hopeful-sweep-52,0.938343,0.782284,0.722699,0.734009,0.726619
52,0.0001,clahe_lab,light,prime-sweep-53,0.924313,0.77863,0.737154,0.811892,0.740003
54,0.0001,clahe_max_green_gsc,light,copper-sweep-55,0.953488,0.791074,0.725876,0.730903,0.730152
62,5e-05,seoud,light,driven-sweep-63,0.938968,0.76195,0.71371,0.81673,0.717052


In [509]:
output_notebook()
preprocessing = ['absent', 'clahe_rgb', 'clahe_lab','clahe_max_green_gsc', 'autobalance', 'seoud',  'sarki']
databases_name = ['EYEPACS', 'IDRID', 'DDR']
weighted_averages = []
data = {'preprocessing' : preprocessing,
        'EYEPACS'   : [],
        'IDRID'   : [],
        'DDR'   : []}

for p in preprocessing :
    for d in df['preprocessing']:
        if p == d:
            data['EYEPACS'].append(df['kappa_eyepacs'][df['preprocessing'] == d].values[0])
            data['IDRID'].append(df['kappa_idrid'][df['preprocessing'] == d].values[0])
            data['DDR'].append(df['kappa_ddr'][df['preprocessing'] == d].values[0])
            weighted_averages.append(df['kappa_weighted'][df['preprocessing'] == d].values[0])
print(weighted_averages)
print(data)

preprocessing = ['absent', 'clahe_rgb', 'clahe_lab','clahe_mgg', 'autobalance', 'seoud',  'sarki']

x = [ (preprocess, bdd) for preprocess in preprocessing for bdd in databases_name ]
counts = sum(zip(data['EYEPACS'], data['IDRID'], data['DDR']), ())

source = ColumnDataSource(data=dict(x=x, counts=counts))

p = figure(x_range=FactorRange(*x), height=350, title="Quadratic Kappa test on different preprocessing",
           toolbar_location=None, tools="",output_backend="svg")
p.line(x=[(preprocessing[0], 'EYEPACS'), (preprocessing[-1], 'EYEPACS')], y=[data['EYEPACS'][0], data['EYEPACS'][0]], line_width=1, color="#6699CC")
p.line(x=[(preprocessing[0], 'IDRID'), (preprocessing[-1], 'IDRID')], y=[data['IDRID'][0], data['IDRID'][0]], line_width=1, color="#004488")
p.line(x=[(preprocessing[0], 'DDR'), (preprocessing[-1], 'DDR')], y=[data['DDR'][0], data['DDR'][0]], line_width=1, color="#EECC66")
p.vbar(x='x', top='counts', width=0.9, source=source, line_color="white",
       fill_color=factor_cmap('x', palette=MediumContrast3, factors=databases_name, start=1, end=2))

p.y_range.start = 0.6
p.x_range.range_padding = 0.05
p.xaxis.major_label_orientation = 1
p.xgrid.grid_line_color = None

x_coords = [(preprocess, 'IDRID') for preprocess in preprocessing] 
p.scatter(x=preprocessing, y=weighted_averages, size=4, color="red", marker="circle")

p.line(x=[(preprocessing[0], 'IDRID'), (preprocessing[-1], 'IDRID')], y=[weighted_averages[0], weighted_averages[0]], line_width=1, color="red")

p.circle(x=preprocessing[0], y=weighted_averages[0], size=5, color="black")

show(p)




[0.731225311919059, 0.7414519911114401, 0.7400027251132345, 0.7301516740850571, 0.7266187606090801, 0.7170522626012884, 0.7089501159101512]
{'preprocessing': ['absent', 'clahe_rgb', 'clahe_lab', 'clahe_max_green_gsc', 'autobalance', 'seoud', 'sarki'], 'EYEPACS': [0.7280724048614502, 0.7389801740646362, 0.7371543049812317, 0.725875735282898, 0.7226989269256592, 0.7137104272842407, 0.7060020565986633], 'IDRID': [0.7393988370895386, 0.7534524202346802, 0.8118916749954224, 0.730902910232544, 0.7340087294578552, 0.8167302012443542, 0.7696517109870911], 'DDR': [0.7759380340576172, 0.7763526439666748, 0.7786298990249634, 0.791073739528656, 0.7822835445404053, 0.7619503736495972, 0.749303936958313]}


ERROR:bokeh.core.validation.check:E-1001 (BAD_COLUMN_NAME): Glyph refers to nonexistent column name. This could either be due to a misspelling or typo, or due to an expected column being missing. : x='absent' [no close matches] {renderer: GlyphRenderer(id='p6646', ...)}
