In [4]:
import wandb
import pandas as pd
import numpy as np
import bokeh
from bokeh.plotting import figure, show
from bokeh.io import output_notebook
from bokeh.models import ColumnDataSource, Whisker
from bokeh.transform import factor_cmap, jitter
from bokeh.layouts import gridplot
from fundusClassif.data.data_factory import get_datamodule_from_config
from nntools.utils import Config

In [5]:
api = wandb.Api()
runs = api.runs("hmr24/Grading-DiabeticRetinopathy-Comparisons-V3")

important_keys = ['lr', 'ema', 'swa', 'mixup', 'preprocessing', 'as_regression']
list_configs = []

for i, r in enumerate(runs):
    kappa = r.summary_metrics.get("Validation Quadratic Kappa", None)
    kappa_ddr = r.summary_metrics.get("Quadratic Kappa_DDR_test", None)
    kappa_eyepacs = r.summary_metrics.get("Quadratic Kappa_EYEPACS_test", None)
    kappa_idrid = r.summary_metrics.get("Quadratic Kappa_IDRID_test", None)
    preprocessing = r.config.get("data_preprocessing/name", None)
    if kappa is None:
        continue
    config = {k: v for k, v in r.config.items() if k in important_keys}
    config['name'] = r.name
    config['kappa'] = kappa
    config['kappa_ddr'] = kappa_ddr
    config['kappa_eyepacs'] = kappa_eyepacs
    config['kappa_idrid'] = kappa_idrid
    config['preprocessing'] = preprocessing
    if i < 12:
        pass 
    else:
        list_configs.append(config)
    
df = pd.DataFrame(list_configs)

#df['ema'] = df['ema'].map({1: 'EMA', 0: 'No EMA'})
#df['swa'] = df['swa'].map({1: 'SWA', 0: 'No SWA'})
#df['mixup'] = df['mixup'].map({1: 'Mixup', 0: 'No Mixup'})
#df['as_regression'] = df['as_regression'].map({1: 'Regression', 0: 'Classification'})
df

Unnamed: 0,name,kappa,kappa_ddr,kappa_eyepacs,kappa_idrid,preprocessing
0,clahe_rgb,0.957595,0.774146,0.726701,0.67932,clahe_rgb
1,clahe_lab,0.952968,0.770446,0.720667,0.718123,clahe_lab
2,absent,0.960145,0.793029,0.716451,0.753644,absent
3,autobalance,0.953048,0.7625,0.718422,0.712796,autobalance
4,seoud,0.949285,0.733957,0.683287,0.659757,seoud
5,sarki,0.949643,0.718678,0.669793,0.712313,sarki
6,graham1,0.93962,0.700987,0.61418,0.660877,graham_meth1
7,clahe_max_green_gsc,0.958999,0.77414,0.71909,0.634716,clahe_max_green_gsc
8,graham2,0.948413,0.710607,0.67812,0.640334,graham_meth2


In [6]:
from bokeh.models import ColumnDataSource, FactorRange
from bokeh.palettes import MediumContrast3
from bokeh.plotting import figure, show
from bokeh.transform import factor_cmap
output_notebook()
preprocessing = ['absent', 'clahe_rgb', 'clahe_lab','clahe_max_green_gsc', 'autobalance', 'seoud',  'sarki', 'graham_meth1','graham_meth2']
databases_name = ['EYEPACS', 'IDRID', 'DDR']

data = {'preprocessing' : preprocessing,
        'EYEPACS'   : [],
        'IDRID'   : [],
        'DDR'   : []}

#associer les valeurs de kappa aux bonnes valeurs de preprocessing 
for p in preprocessing :
    for d in df['preprocessing']:
        if p == d:
            data['EYEPACS'].append(df['kappa_eyepacs'][df['preprocessing'] == d].values[0])
            data['IDRID'].append(df['kappa_idrid'][df['preprocessing'] == d].values[0])
            data['DDR'].append(df['kappa_ddr'][df['preprocessing'] == d].values[0])
            
print(data)

#affichage plus lisible
preprocessing = ['absent', 'clahe_rgb', 'clahe_lab','clahe_mgg', 'autobalance', 'seoud',  'sarki', 'graham1','graham2']

x = [ (preprocess, bdd) for preprocess in preprocessing for bdd in databases_name ]
counts = sum(zip(data['EYEPACS'], data['IDRID'], data['DDR']), ())

source = ColumnDataSource(data=dict(x=x, counts=counts))

p = figure(x_range=FactorRange(*x), height=350, title="Quadratic Kappa test on different preprocessing",
           toolbar_location=None, tools="",output_backend="svg")

p.vbar(x='x', top='counts', width=0.9, source=source, line_color="white",
       fill_color=factor_cmap('x', palette=MediumContrast3, factors=databases_name, start=1, end=2))

p.y_range.start = 0.6
p.x_range.range_padding = 0.05
p.xaxis.major_label_orientation = 1
p.xgrid.grid_line_color = None

#calculer la moyenne ponderee des kappa pour chaque preprocessing
config = Config('../configs/config.yaml')
datamodule = get_datamodule_from_config(config['datasets'], config['data'])
test_dataloader = datamodule.test_dataloader()
test_datasets_ids = [d.dataset.id for i, d in enumerate(test_dataloader)]
lenght_total_test = 0
dataloader0_weight = 0
dataloader1_weight = 0
dataloader2_weight = 0
for i, d in enumerate(test_dataloader):
    lenght_total_test += len(d.dataset)
print(lenght_total_test)

for i, d in enumerate(test_dataloader):
    if d.dataset.id == test_datasets_ids[0]:
        dataloader0_weight = len(d.dataset)/lenght_total_test
        print(dataloader0_weight)
    elif d.dataset.id == test_datasets_ids[1]:
        dataloader1_weight += len(d.dataset)/lenght_total_test
        print(dataloader1_weight)
    elif d.dataset.id == test_datasets_ids[2]:
        dataloader2_weight += len(d.dataset)/lenght_total_test
        print(dataloader2_weight)

weighted_averages = []
for i in range(len(preprocessing)):
    #print(f"Preprocessing {preprocessing[i]}")
    data['EYEPACS'][i] = data['EYEPACS'][i]*dataloader0_weight
    data['IDRID'][i] = data['IDRID'][i]*dataloader1_weight
    data['DDR'][i] = data['DDR'][i]*dataloader2_weight
    mean = data['EYEPACS'][i]+data['IDRID'][i]+data['DDR'][i]
    weighted_averages.append(mean)
    #print(f"Kappa moyenne ponderee: {mean}")

x_coords = [(preprocess, 'IDRID') for preprocess in preprocessing] 
p.scatter(x=x_coords, y=weighted_averages, size=5, color="black", marker="circle")

#trace une ligne en pointillee horizontale sur le graphique en ayant pour valeur la wieghted average de absent preprocessing
p.line(x=[(preprocessing[0], 'IDRID'), (preprocessing[-1], 'IDRID')], y=[weighted_averages[0], weighted_averages[0]], line_dash="dashed", line_width=0.2, color="black")
#ajoute une legende pour les points 
p.circle(x=preprocessing[0], y=weighted_averages[0], size=5, color="black", legend_label="Weighted average")


show(p)

{'preprocessing': ['absent', 'clahe_rgb', 'clahe_lab', 'clahe_max_green_gsc', 'autobalance', 'seoud', 'sarki', 'graham_meth1', 'graham_meth2'], 'EYEPACS': [0.7164513468742371, 0.7267009019851685, 0.7206674814224243, 0.7190903425216675, 0.7184218168258667, 0.6832872629165649, 0.669792890548706, 0.6141797304153442, 0.6781201362609863], 'IDRID': [0.7536443471908569, 0.6793195009231567, 0.7181226015090942, 0.634716272354126, 0.712795615196228, 0.6597574353218079, 0.7123125791549683, 0.6608767509460449, 0.6403340101242065], 'DDR': [0.7930291295051575, 0.7741455435752869, 0.7704459428787231, 0.7741395235061646, 0.7625001072883606, 0.7339574694633484, 0.7186775207519531, 0.7009872198104858, 0.7106068134307861]}


A value is trying to be set on a copy of a slice from a DataFrame

See the caveats in the documentation: https://pandas.pydata.org/pandas-docs/stable/user_guide/indexing.html#returning-a-view-versus-a-copy
  df.sort_values(self.file_column, inplace=True)
ERROR:bokeh.core.validation.check:E-1001 (BAD_COLUMN_NAME): Glyph refers to nonexistent column name. This could either be due to a misspelling or typo, or due to an expected column being missing. : x='absent' [no close matches] {renderer: GlyphRenderer(id='p1126', ...)}


<fundus_data_toolkit.datamodules.classification.EyePACSDataModule object at 0x7b7b77342d50>
Train 28101 Val 7025 Test 53575
<fundus_data_toolkit.datamodules.classification.AptosDataModule object at 0x7b7b7729e930>
Train 2930 Val 732 <fundus_data_toolkit.datamodules.classification.IDRiDDataModule object at 0x7b7bd48c6420>
Train 331 Val 82 Test 103
<fundus_data_toolkit.datamodules.classification.DDRDataModule object at 0x7b7b77241d30>
Train 6260 Val 2503 Test 3759
57437
0.9327611121750787
0.0017932691470654803
0.06544561867785574
