In [2]:
import pandas as pd 
import wandb
api = wandb.Api()
import json

In [3]:
# Project is specified by <entity/project-name>
runs = api.runs(path="alexaatm/segm-eval",
    filters={
        "$and":[
            {"config.dataset.name": "mutinfo_val_carotid"},
            {"State": "finished"},
            {"tags": {"$in": ["majority_vote_unique", "rerun_after_bugfix"]}},
        ]
    }
    )

In [4]:
summary_list, config_list, name_list, run_ids, logged_artifacts = [], [], [], [], []
for run in runs: 
    # .summary contains the output keys/values for metrics like accuracy.
    #  We call ._json_dict to omit large files 
    summary_list.append(run.summary._json_dict)

    # .config contains the hyperparameters.
    #  We remove special values that start with _.
    config_list.append(
        {k: v for k,v in run.config.items()
          if not k.startswith('_')})

    # .name is the human-readable name of the run.
    name_list.append(run.name)
    run_ids.append(run.id)
    logged_artifacts.append(run.logged_artifacts())

In [5]:
runs_df = pd.DataFrame({
    "summary": summary_list,
    "config": config_list,
    "name": name_list,
    "run_id": run_ids,
    "logged_artifacts": logged_artifacts
    })

# runs_df.to_csv("project.csv")

In [6]:
run.config['dataset']['name']

'mutinfo_val_carotid'

In [7]:
# Extract only some fields
dataset_pred_dir_list, eval_per_image_list, iou_thresh_list, miou_list, jacc_class0_list, jacc_class1_list, name_list, run_ids = [], [], [], [], [], [], [], []
for run in runs: 
    dataset_pred_dir_list.append(run.config['dataset']['pred_dir'])

    eval_per_image_list.append(run.config['eval_per_image'])

    iou_thresh_list.append(run.config['iou_thresh'])


    miou_list.append(run.summary['mIoU'])


    # get logged artifacts
    artifacts = run.logged_artifacts()
    jacc_table_artifact = [artifact for artifact in artifacts if "jaccard_table" in artifact.name][0]
    table_dir = jacc_table_artifact.download()
    table_path = f"{table_dir}/jaccard_table.table.json"
    jacc_table = json.load(open(table_path))
    df = pd.DataFrame(jacc_table["data"], columns=jacc_table["columns"])    

    jacc_class0_list.append(df['GT_class0'].iloc[0])
    jacc_class1_list.append(df['GT_class1'].iloc[0])

    # .name is the human-readable name of the run.
    name_list.append(run.name)
    run_ids.append(run.id)



[34m[1mwandb[0m:   1 of 1 files downloaded.  
[34m[1mwandb[0m:   1 of 1 files downloaded.  
[34m[1mwandb[0m:   1 of 1 files downloaded.  
[34m[1mwandb[0m:   1 of 1 files downloaded.  
[34m[1mwandb[0m:   1 of 1 files downloaded.  
[34m[1mwandb[0m:   1 of 1 files downloaded.  
[34m[1mwandb[0m:   1 of 1 files downloaded.  
[34m[1mwandb[0m:   1 of 1 files downloaded.  
[34m[1mwandb[0m:   1 of 1 files downloaded.  
[34m[1mwandb[0m:   1 of 1 files downloaded.  
[34m[1mwandb[0m:   1 of 1 files downloaded.  
[34m[1mwandb[0m:   1 of 1 files downloaded.  
[34m[1mwandb[0m:   1 of 1 files downloaded.  
[34m[1mwandb[0m:   1 of 1 files downloaded.  
[34m[1mwandb[0m:   1 of 1 files downloaded.  
[34m[1mwandb[0m:   1 of 1 files downloaded.  
[34m[1mwandb[0m:   1 of 1 files downloaded.  
[34m[1mwandb[0m:   1 of 1 files downloaded.  
[34m[1mwandb[0m:   1 of 1 files downloaded.  
[34m[1mwandb[0m:   1 of 1 files downloaded.  
[34m[1mwandb[0m: 

In [8]:
runs_df = pd.DataFrame({
    "pred_dir": dataset_pred_dir_list,
    "eval_per_image": eval_per_image_list,
    "iou_thresh": iou_thresh_list,
    "mIoU": miou_list,
    "jacc_class0": jacc_class0_list,
    "jacc_class1": jacc_class1_list,
    "name": name_list,
    "id": run_ids
    })

In [9]:
runs_df.head()

Unnamed: 0,pred_dir,eval_per_image,iou_thresh,mIoU,jacc_class0,jacc_class1,name,id
0,/home/guests/oleksandra_tmenova/test/project/t...,False,0.5,0.350834,0.7001,0.001568,eval_mutinfo_val_carotid_2023-08-23_12-14-00,la706285
1,/home/guests/oleksandra_tmenova/test/project/t...,False,0.0,0.486643,0.92754,0.045747,eval_mutinfo_val_carotid_2023-08-23_12-13-56,2qhhvb01
2,/home/guests/oleksandra_tmenova/test/project/t...,False,0.5,0.331705,0.659396,0.004014,eval_mutinfo_val_carotid_2023-08-23_12-13-56,3aaun7jg
3,/home/guests/oleksandra_tmenova/test/project/t...,False,0.0,0.652332,0.974484,0.330181,eval_mutinfo_val_carotid_2023-08-23_12-13-56,4dqqjk1p
4,/home/guests/oleksandra_tmenova/test/project/t...,False,0.0,0.491897,0.933176,0.050617,eval_mutinfo_val_carotid_2023-08-23_12-13-56,8nagf3vu


In [10]:
grouped = runs_df.groupby('pred_dir')

In [11]:
from tabulate import tabulate

In [12]:
# Define a function to extract additional columns from 'pred_dir'
def extract_info(pred_dir):
    if pred_dir=="/home/guests/oleksandra_tmenova/test/project/thesis-codebase/deep-spectral-segmentation/outputs/pipeline/mutinfo_val_carotid/2023-08-14--13-59-23/semantic_segmentations/laplacian/crf_segmaps":
        series = pd.Series(["dino", 15, "crf_segmaps"], index=['features', 'n_clusters', 'prediction'])
    elif pred_dir=="/home/guests/oleksandra_tmenova/test/project/thesis-codebase/deep-spectral-segmentation/outputs/pipeline/mutinfo_val_carotid/2023-08-13--20-47-28/semantic_segmentations/laplacian/crf_multi_region":
        series = pd.Series(["dino", 15, "crf_multi_region"], index=['features', 'n_clusters', 'prediction'])
    elif pred_dir == "/home/guests/oleksandra_tmenova/test/project/thesis-codebase/deep-spectral-segmentation/outputs/pipeline/mutinfo_val_carotid/2023-08-16--18-13-13/semantic_segmentations/laplacian/crf_segmaps":
        series = pd.Series(["dino_ssd1", 15, "crf_segmaps"], index=['features', 'n_clusters', 'prediction'])
    elif pred_dir == "/home/guests/oleksandra_tmenova/test/project/thesis-codebase/deep-spectral-segmentation/outputs/pipeline/mutinfo_val_carotid/2023-08-16--18-13-13/semantic_segmentations/laplacian/crf_multi_region":
        series = pd.Series(["dino_ssd1", 15, "crf_multi_region"], index=['features', 'n_clusters', 'prediction'])
    elif pred_dir == "/home/guests/oleksandra_tmenova/test/project/thesis-codebase/deep-spectral-segmentation/outputs/pipeline/mutinfo_val_carotid/2023-08-16--22-45-22/semantic_segmentations/laplacian/crf_segmaps":
        series = pd.Series(["dino_ssd2", 15, "crf_segmaps"], index=['features', 'n_clusters', 'prediction'])
    elif pred_dir == "/home/guests/oleksandra_tmenova/test/project/thesis-codebase/deep-spectral-segmentation/outputs/pipeline/mutinfo_val_carotid/2023-08-16--22-45-22/semantic_segmentations/laplacian/crf_multi_region":
        series = pd.Series(["dino_ssd2", 15, "crf_multi_region"], index=['features', 'n_clusters', 'prediction'])

    # Modify this function to extract 'features', 'n_clusters', and 'prediction'
    # features = "only_dino" if "only_dino" in pred_dir else "other"
    # n_clusters = int(pred_dir.split("_")[-1])
    # prediction = "crf_segmaps" if "crf_segmaps" in pred_dir else "other_prediction"
    return series

# Apply the function and add columns to the DataFrame
runs_df[['features', 'n_clusters', 'prediction']] = runs_df['pred_dir'].apply(extract_info)


In [13]:
runs_df

Unnamed: 0,pred_dir,eval_per_image,iou_thresh,mIoU,jacc_class0,jacc_class1,name,id,features,n_clusters,prediction
0,/home/guests/oleksandra_tmenova/test/project/t...,False,0.5,0.350834,0.7001,0.001568,eval_mutinfo_val_carotid_2023-08-23_12-14-00,la706285,dino_ssd1,15,crf_multi_region
1,/home/guests/oleksandra_tmenova/test/project/t...,False,0.0,0.486643,0.92754,0.045747,eval_mutinfo_val_carotid_2023-08-23_12-13-56,2qhhvb01,dino_ssd2,15,crf_multi_region
2,/home/guests/oleksandra_tmenova/test/project/t...,False,0.5,0.331705,0.659396,0.004014,eval_mutinfo_val_carotid_2023-08-23_12-13-56,3aaun7jg,dino_ssd1,15,crf_segmaps
3,/home/guests/oleksandra_tmenova/test/project/t...,False,0.0,0.652332,0.974484,0.330181,eval_mutinfo_val_carotid_2023-08-23_12-13-56,4dqqjk1p,dino_ssd1,15,crf_segmaps
4,/home/guests/oleksandra_tmenova/test/project/t...,False,0.0,0.491897,0.933176,0.050617,eval_mutinfo_val_carotid_2023-08-23_12-13-56,8nagf3vu,dino_ssd1,15,crf_multi_region
5,/home/guests/oleksandra_tmenova/test/project/t...,False,0.5,0.350823,0.695406,0.006239,eval_mutinfo_val_carotid_2023-08-23_12-13-56,aznxkeaq,dino_ssd2,15,crf_segmaps
6,/home/guests/oleksandra_tmenova/test/project/t...,False,0.0,0.542428,0.944299,0.140557,eval_mutinfo_val_carotid_2023-08-23_12-13-56,ghsl6heg,dino_ssd2,15,crf_segmaps
7,/home/guests/oleksandra_tmenova/test/project/t...,False,0.5,0.376668,0.752283,0.001053,eval_mutinfo_val_carotid_2023-08-23_12-13-56,jumfd98z,dino_ssd2,15,crf_multi_region
8,/home/guests/oleksandra_tmenova/test/project/t...,False,0.5,0.264943,0.522603,0.007284,eval_mutinfo_val_carotid_2023-08-23_12-13-56,km0yaq6i,dino,15,crf_segmaps
9,/home/guests/oleksandra_tmenova/test/project/t...,False,0.0,0.561639,0.97345,0.149827,eval_mutinfo_val_carotid_2023-08-23_12-13-56,phw1k3xn,dino,15,crf_segmaps


In [14]:
grouped_by_features = runs_df.groupby('features')
# Dictionary to store DataFrames for each group
grouped_df = {}

# Iterate through the groups and print the relevant information
for feature, group_data in grouped_by_features:
    print(f"Group: {feature}")
    # Sort the group by 'jacc_class1' column in descending order
    sorted_group = group_data.sort_values(by=['eval_per_image', 'prediction', 'iou_thresh', 'jacc_class1'], ascending=[False, False, True, False])

    grouped_df[feature] = sorted_group[['eval_per_image','prediction', 'iou_thresh','jacc_class0', 'jacc_class1', 'mIoU','n_clusters',]]

    # print(sorted_group[['prediction', 'n_clusters','eval_per_image', 'iou_thresh', 'mIoU', 'jacc_class0', 'jacc_class1']])
    # print("\n")

Group: dino
Group: dino_ssd1
Group: dino_ssd2


In [35]:
grouped_df['dino']

KeyError: 'dino'

In [40]:
def bold_rows(row, column, sign, value):
    if sign==">":
        return ['font-weight: bold' if row[column] > value else '' for _ in row]
    elif sign=="<":
        return ['font-weight: bold' if row[column] < value else '' for _ in row]

In [17]:
# Define a function to apply bold style to the row with the maximum value in a specified column
def bold_max_row(series):
    max_index = series.idxmax()
    return ['font-weight: bold' if idx == max_index else '' for idx in df.index]

In [34]:
styled_df = grouped_df['dino'].style.apply(bold_rows, column='jacc_class1', sign=">", value=0.2, axis=1)
# styled_df = grouped_df['dino'].style.apply(bold_max_row, subset=['jacc_class1'])


KeyError: 'dino'

In [19]:
styled_df

Unnamed: 0,eval_per_image,prediction,iou_thresh,jacc_class0,jacc_class1,mIoU,n_clusters
20,True,crf_segmaps,0.0,0.913491,0.165103,0.539297,15
17,True,crf_segmaps,0.5,0.658805,0.028191,0.343498,15
14,True,crf_multi_region,0.0,0.953321,0.236096,0.594708,15
23,True,crf_multi_region,0.5,0.719467,0.031927,0.375697,15
9,False,crf_segmaps,0.0,0.97345,0.149827,0.561639,15
8,False,crf_segmaps,0.5,0.522603,0.007284,0.264943,15
10,False,crf_multi_region,0.0,0.945101,0.049743,0.497422,15
11,False,crf_multi_region,0.5,0.576099,0.005496,0.290798,15


GROUP BY EVALUATION


In [53]:
grouped_by_eval = runs_df.groupby(['eval_per_image',  'iou_thresh'])
# Dictionary to store DataFrames for each group
df_by_eval = {}

# Iterate through the groups and print the relevant information
for eval, group_data in grouped_by_eval:
    print(f"Group: {eval}")
    # Sort the group by 'jacc_class1' column in descending order
    sorted_group = group_data.sort_values(by=['features','prediction','jacc_class1'], ascending=[True, True, True])

    df_by_eval[eval] = sorted_group[['eval_per_image', 'iou_thresh','features','n_clusters','prediction','jacc_class0', 'jacc_class1', 'mIoU',]]

    print(type(eval))

    # print(sorted_group[['prediction', 'n_clusters','eval_per_image', 'iou_thresh', 'mIoU', 'jacc_class0', 'jacc_class1']])
    # print("\n")

Group: (False, 0.0)
<class 'tuple'>
Group: (False, 0.5)
<class 'tuple'>
Group: (True, 0.0)
<class 'tuple'>
Group: (True, 0.5)
<class 'tuple'>


In [54]:
df_by_eval[(False, 0.0)]
styled_df = df_by_eval[(False, 0.0)].style.apply(bold_rows, column='jacc_class1', sign=">", value=0.2, axis=1)
styled_df


Unnamed: 0,eval_per_image,iou_thresh,features,n_clusters,prediction,jacc_class0,jacc_class1,mIoU
10,False,0.0,dino,15,crf_multi_region,0.945101,0.049743,0.497422
9,False,0.0,dino,15,crf_segmaps,0.97345,0.149827,0.561639
4,False,0.0,dino_ssd1,15,crf_multi_region,0.933176,0.050617,0.491897
3,False,0.0,dino_ssd1,15,crf_segmaps,0.974484,0.330181,0.652332
1,False,0.0,dino_ssd2,15,crf_multi_region,0.92754,0.045747,0.486643
6,False,0.0,dino_ssd2,15,crf_segmaps,0.944299,0.140557,0.542428


In [55]:
df_by_eval[(False, 0.5)]
# styled_df = df_by_eval[(False, 0.5)].style.apply(bold_rows, column='jacc_class1', sign=">", value=0.007, axis=1)
# styled_df

Unnamed: 0,eval_per_image,iou_thresh,features,n_clusters,prediction,jacc_class0,jacc_class1,mIoU
11,False,0.5,dino,15,crf_multi_region,0.576099,0.005496,0.290798
8,False,0.5,dino,15,crf_segmaps,0.522603,0.007284,0.264943
0,False,0.5,dino_ssd1,15,crf_multi_region,0.7001,0.001568,0.350834
2,False,0.5,dino_ssd1,15,crf_segmaps,0.659396,0.004014,0.331705
7,False,0.5,dino_ssd2,15,crf_multi_region,0.752283,0.001053,0.376668
5,False,0.5,dino_ssd2,15,crf_segmaps,0.695406,0.006239,0.350823


In [56]:
df_by_eval[(True, 0.0)]
styled_df = df_by_eval[(True, 0.0)].style.apply(bold_rows, column='jacc_class1', sign=">", value=0.59, axis=1)
styled_df

Unnamed: 0,eval_per_image,iou_thresh,features,n_clusters,prediction,jacc_class0,jacc_class1,mIoU
14,True,0.0,dino,15,crf_multi_region,0.953321,0.236096,0.594708
20,True,0.0,dino,15,crf_segmaps,0.913491,0.165103,0.539297
16,True,0.0,dino_ssd1,15,crf_multi_region,0.983943,0.537348,0.760646
21,True,0.0,dino_ssd1,15,crf_segmaps,0.94985,0.290895,0.620372
15,True,0.0,dino_ssd2,15,crf_multi_region,0.986232,0.590251,0.788242
18,True,0.0,dino_ssd2,15,crf_segmaps,0.909543,0.196248,0.552896


In [57]:
df_by_eval[(True, 0.5)]
styled_df = df_by_eval[(True, 0.5)].style.apply(bold_rows, column='jacc_class1', sign=">", value=0.59, axis=1)
styled_df

Unnamed: 0,eval_per_image,iou_thresh,features,n_clusters,prediction,jacc_class0,jacc_class1,mIoU
23,True,0.5,dino,15,crf_multi_region,0.719467,0.031927,0.375697
17,True,0.5,dino,15,crf_segmaps,0.658805,0.028191,0.343498
13,True,0.5,dino_ssd1,15,crf_multi_region,0.9194,0.164296,0.541848
19,True,0.5,dino_ssd1,15,crf_segmaps,0.810194,0.052468,0.431331
12,True,0.5,dino_ssd2,15,crf_multi_region,0.938911,0.218499,0.578705
22,True,0.5,dino_ssd2,15,crf_segmaps,0.762091,0.026661,0.394376
