In [1]:
import pandas as pd 
import wandb
api = wandb.Api()
import json

In [2]:
# Project is specified by <entity/project-name>
runs = api.runs(path="alexaatm/segm-eval",
    filters={
        "$and":[
            {"config.dataset.name": "mutinfo_val_carotid"},
            {"State": "finished"},
            {"tags": {"$in": ["majority_vote_unique", "rerun_after_bugfix", "cutler"]}},
        ]
    }
    )

In [3]:
summary_list, config_list, name_list, run_ids, logged_artifacts = [], [], [], [], []
for run in runs: 
    # .summary contains the output keys/values for metrics like accuracy.
    #  We call ._json_dict to omit large files 
    summary_list.append(run.summary._json_dict)

    # .config contains the hyperparameters.
    #  We remove special values that start with _.
    config_list.append(
        {k: v for k,v in run.config.items()
          if not k.startswith('_')})

    # .name is the human-readable name of the run.
    name_list.append(run.name)
    run_ids.append(run.id)
    logged_artifacts.append(run.logged_artifacts())

In [4]:
runs_df = pd.DataFrame({
    "summary": summary_list,
    "config": config_list,
    "name": name_list,
    "run_id": run_ids,
    "logged_artifacts": logged_artifacts
    })

# runs_df.to_csv("project.csv")

In [6]:
run.config['dataset']['name']

'mutinfo_val_carotid'

In [5]:
# Extract only some fields
dataset_pred_dir_list, eval_per_image_list, iou_thresh_list, miou_list, jacc_class0_list, jacc_class1_list, name_list, run_ids = [], [], [], [], [], [], [], []
for run in runs: 
    dataset_pred_dir_list.append(run.config['dataset']['pred_dir'])

    eval_per_image_list.append(run.config['eval_per_image'])

    iou_thresh_list.append(run.config['iou_thresh'])


    miou_list.append(run.summary['mIoU'])


    # get logged artifacts
    artifacts = run.logged_artifacts()
    jacc_table_artifact = [artifact for artifact in artifacts if "jaccard_table" in artifact.name][0]
    table_dir = jacc_table_artifact.download()
    table_path = f"{table_dir}/jaccard_table.table.json"
    jacc_table = json.load(open(table_path))
    df = pd.DataFrame(jacc_table["data"], columns=jacc_table["columns"])    

    jacc_class0_list.append(df['GT_class0'].iloc[0])
    jacc_class1_list.append(df['GT_class1'].iloc[0])

    # .name is the human-readable name of the run.
    name_list.append(run.name)
    run_ids.append(run.id)



[34m[1mwandb[0m:   1 of 1 files downloaded.  
[34m[1mwandb[0m:   1 of 1 files downloaded.  
[34m[1mwandb[0m:   1 of 1 files downloaded.  
[34m[1mwandb[0m:   1 of 1 files downloaded.  
[34m[1mwandb[0m:   1 of 1 files downloaded.  
[34m[1mwandb[0m:   1 of 1 files downloaded.  
[34m[1mwandb[0m:   1 of 1 files downloaded.  
[34m[1mwandb[0m:   1 of 1 files downloaded.  
[34m[1mwandb[0m:   1 of 1 files downloaded.  
[34m[1mwandb[0m:   1 of 1 files downloaded.  
[34m[1mwandb[0m:   1 of 1 files downloaded.  
[34m[1mwandb[0m:   1 of 1 files downloaded.  
[34m[1mwandb[0m:   1 of 1 files downloaded.  
[34m[1mwandb[0m:   1 of 1 files downloaded.  
[34m[1mwandb[0m:   1 of 1 files downloaded.  
[34m[1mwandb[0m:   1 of 1 files downloaded.  
[34m[1mwandb[0m:   1 of 1 files downloaded.  
[34m[1mwandb[0m:   1 of 1 files downloaded.  
[34m[1mwandb[0m:   1 of 1 files downloaded.  
[34m[1mwandb[0m:   1 of 1 files downloaded.  
[34m[1mwandb[0m: 

In [6]:
runs_df = pd.DataFrame({
    "pred_dir": dataset_pred_dir_list,
    "eval_per_image": eval_per_image_list,
    "iou_thresh": iou_thresh_list,
    "mIoU": miou_list,
    "jacc_class0": jacc_class0_list,
    "jacc_class1": jacc_class1_list,
    "name": name_list,
    "id": run_ids
    })

In [7]:
runs_df.head()

Unnamed: 0,pred_dir,eval_per_image,iou_thresh,mIoU,jacc_class0,jacc_class1,name,id
0,/home/guests/oleksandra_tmenova/test/project/t...,False,0.0,0.577858,0.968323,0.187394,eval_mutinfo_val_carotid_2023-09-13_01-11-40,qgmq01wp
1,/home/guests/oleksandra_tmenova/test/project/t...,False,0.0,0.482332,0.924374,0.040291,eval_mutinfo_val_carotid_2023-09-13_01-11-40,hgwtxx9v
2,/home/guests/oleksandra_tmenova/test/project/t...,False,0.0,0.53459,0.92112,0.148059,eval_mutinfo_val_carotid_2023-09-12_17-56-15,lbuhs11i
3,/home/guests/oleksandra_tmenova/test/project/t...,False,0.0,0.473247,0.91669,0.029804,eval_mutinfo_val_carotid_2023-09-12_17-56-15,e6pgp6qj
4,/home/guests/oleksandra_tmenova/test/project/t...,False,0.0,0.471869,0.91462,0.029118,eval_mutinfo_val_carotid_2023-09-12_17-56-15,58lz5umy


In [8]:
grouped = runs_df.groupby('pred_dir')

In [9]:
from tabulate import tabulate

In [10]:
# Define a function to extract additional columns from 'pred_dir'
def extract_info(pred_dir):
    if pred_dir=="/home/guests/oleksandra_tmenova/test/project/thesis-codebase/deep-spectral-segmentation/outputs/pipeline/mutinfo_val_carotid/2023-08-14--13-59-23/semantic_segmentations/laplacian/crf_segmaps":
        series = pd.Series(["dino", 15, "crf_segmaps"], index=['features', 'n_clusters', 'prediction'])
    elif pred_dir=="/home/guests/oleksandra_tmenova/test/project/thesis-codebase/deep-spectral-segmentation/outputs/pipeline/mutinfo_val_carotid/2023-08-13--20-47-28/semantic_segmentations/laplacian/crf_multi_region":
        series = pd.Series(["dino", 15, "crf_multi_region"], index=['features', 'n_clusters', 'prediction'])
    elif pred_dir == "/home/guests/oleksandra_tmenova/test/project/thesis-codebase/deep-spectral-segmentation/outputs/pipeline/mutinfo_val_carotid/2023-08-16--18-13-13/semantic_segmentations/laplacian/crf_segmaps":
        series = pd.Series(["dino_ssd1", 15, "crf_segmaps"], index=['features', 'n_clusters', 'prediction'])
    elif pred_dir == "/home/guests/oleksandra_tmenova/test/project/thesis-codebase/deep-spectral-segmentation/outputs/pipeline/mutinfo_val_carotid/2023-08-16--18-13-13/semantic_segmentations/laplacian/crf_multi_region":
        series = pd.Series(["dino_ssd1", 15, "crf_multi_region"], index=['features', 'n_clusters', 'prediction'])
    elif pred_dir == "/home/guests/oleksandra_tmenova/test/project/thesis-codebase/deep-spectral-segmentation/outputs/pipeline/mutinfo_val_carotid/2023-08-16--22-45-22/semantic_segmentations/laplacian/crf_segmaps":
        series = pd.Series(["dino_ssd2", 15, "crf_segmaps"], index=['features', 'n_clusters', 'prediction'])
    elif pred_dir == "/home/guests/oleksandra_tmenova/test/project/thesis-codebase/deep-spectral-segmentation/outputs/pipeline/mutinfo_val_carotid/2023-08-16--22-45-22/semantic_segmentations/laplacian/crf_multi_region":
        series = pd.Series(["dino_ssd2", 15, "crf_multi_region"], index=['features', 'n_clusters', 'prediction'])
    
    n_clusters = None
    features = None
    prediction_type = None
    
    # Check for "clusters" in the prediction directory
    if "clusters" in pred_dir:
        # Extract n_clusters using string manipulation
        start = pred_dir.index("clusters") + len("clusters")
        end = pred_dir.index("_", start)
        n_clusters = int(pred_dir[start:end])

    # Check for "dino" in the prediction directory
    if "dino" in pred_dir:
        features = "dino"
        if "ssd1" in pred_dir:
            features += "_ssd1"
        elif "ssd2" in pred_dir:
            features += "_ssd2"
    elif "cutler" in pred_dir:
        features = "cutler"
        if "imagenet" in pred_dir:
            features += "imagenet"
        elif "cutler_carotid" in pred_dir:
            features += 
        elif "maskcut_init" in pred_dir:
            features += "maskcut_init"
        elif "maskcut"

    # Extract prediction_type
    if "crf_segmaps" in pred_dir:
        prediction_type = "crf_segmaps"
    elif "crf_multi_region" in pred_dir:
        prediction_type = "crf_multi_region"

    series = pd.Series([features, n_clusters, prediction_type], index=['features', 'n_clusters', 'prediction'])
    

    # Modify this function to extract 'features', 'n_clusters', and 'prediction'
    # features = "only_dino" if "only_dino" in pred_dir else "other"
    # n_clusters = int(pred_dir.split("_")[-1])
    # prediction = "crf_segmaps" if "crf_segmaps" in pred_dir else "other_prediction"
    return series

# Apply the function and add columns to the DataFrame
runs_df[['features', 'n_clusters', 'prediction']] = runs_df['pred_dir'].apply(extract_info)


In [11]:
runs_df

Unnamed: 0,pred_dir,eval_per_image,iou_thresh,mIoU,jacc_class0,jacc_class1,name,id,features,n_clusters,prediction
0,/home/guests/oleksandra_tmenova/test/project/t...,False,0.0,0.577858,0.968323,0.187394,eval_mutinfo_val_carotid_2023-09-13_01-11-40,qgmq01wp,dino_ssd2,15.0,crf_segmaps
1,/home/guests/oleksandra_tmenova/test/project/t...,False,0.0,0.482332,0.924374,0.040291,eval_mutinfo_val_carotid_2023-09-13_01-11-40,hgwtxx9v,dino_ssd2,15.0,crf_multi_region
2,/home/guests/oleksandra_tmenova/test/project/t...,False,0.0,0.53459,0.92112,0.148059,eval_mutinfo_val_carotid_2023-09-12_17-56-15,lbuhs11i,dino_ssd1,15.0,crf_segmaps
3,/home/guests/oleksandra_tmenova/test/project/t...,False,0.0,0.473247,0.91669,0.029804,eval_mutinfo_val_carotid_2023-09-12_17-56-15,e6pgp6qj,dino_ssd2,15.0,crf_multi_region
4,/home/guests/oleksandra_tmenova/test/project/t...,False,0.0,0.471869,0.91462,0.029118,eval_mutinfo_val_carotid_2023-09-12_17-56-15,58lz5umy,dino_ssd1,15.0,crf_multi_region
5,/home/guests/oleksandra_tmenova/test/project/t...,False,0.0,0.513261,0.880976,0.145545,eval_mutinfo_val_carotid_2023-09-12_17-56-15,1hhre3zw,dino_ssd2,15.0,crf_segmaps
6,/home/guests/oleksandra_tmenova/test/project/t...,True,0.0,0.277633,0.509845,0.045421,eval_mutinfo_val_carotid_2023-09-09_14-44-24,nh3cr4wl,,,
7,/home/guests/oleksandra_tmenova/test/project/t...,True,0.0,0.600312,0.957502,0.243121,eval_mutinfo_val_carotid_2023-09-09_14-26-07,h4apkbcq,,,
8,/home/guests/oleksandra_tmenova/test/project/t...,True,0.0,0.407995,0.749443,0.066546,eval_mutinfo_val_carotid_2023-09-09_14-02-58,xm0id29p,,,
9,/home/guests/oleksandra_tmenova/test/project/t...,True,0.0,0.277633,0.509845,0.045421,eval_mutinfo_val_carotid_2023-09-09_13-38-34,mz7gzg9g,,,


In [12]:
grouped_by_features = runs_df.groupby('features')
# Dictionary to store DataFrames for each group
grouped_df = {}

# Iterate through the groups and print the relevant information
for feature, group_data in grouped_by_features:
    print(f"Group: {feature}")
    # Sort the group by 'jacc_class1' column in descending order
    sorted_group = group_data.sort_values(by=['eval_per_image', 'prediction', 'iou_thresh', 'jacc_class1'], ascending=[False, False, True, False])

    grouped_df[feature] = sorted_group[['eval_per_image','prediction', 'iou_thresh','jacc_class0', 'jacc_class1', 'mIoU','n_clusters',]]

    # print(sorted_group[['prediction', 'n_clusters','eval_per_image', 'iou_thresh', 'mIoU', 'jacc_class0', 'jacc_class1']])
    # print("\n")

Group: dino
Group: dino_ssd1
Group: dino_ssd2


In [13]:
grouped_df['dino']

Unnamed: 0,eval_per_image,prediction,iou_thresh,jacc_class0,jacc_class1,mIoU,n_clusters
25,False,crf_segmaps,0.0,0.397322,0.035176,0.216249,6.0
27,False,crf_multi_region,0.0,0.914907,0.050591,0.482749,6.0


In [14]:
def bold_rows(row, column, sign, value):
    if sign==">":
        return ['font-weight: bold' if row[column] > value else '' for _ in row]
    elif sign=="<":
        return ['font-weight: bold' if row[column] < value else '' for _ in row]

In [15]:
# Define a function to apply bold style to the row with the maximum value in a specified column
def bold_max_row(series):
    max_index = series.idxmax()
    return ['font-weight: bold' if idx == max_index else '' for idx in df.index]

In [16]:
styled_df = grouped_df['dino'].style.apply(bold_rows, column='jacc_class1', sign=">", value=0.2, axis=1)
# styled_df = grouped_df['dino'].style.apply(bold_max_row, subset=['jacc_class1'])


In [17]:
styled_df

Unnamed: 0,eval_per_image,prediction,iou_thresh,jacc_class0,jacc_class1,mIoU,n_clusters
25,False,crf_segmaps,0.0,0.397322,0.035176,0.216249,6.0
27,False,crf_multi_region,0.0,0.914907,0.050591,0.482749,6.0


GROUP BY EVALUATION


In [18]:
grouped_by_eval = runs_df.groupby(['eval_per_image',  'iou_thresh'])
# Dictionary to store DataFrames for each group
df_by_eval = {}

# Iterate through the groups and print the relevant information
for eval, group_data in grouped_by_eval:
    print(f"Group: {eval}")
    # Sort the group by 'jacc_class1' column in descending order
    sorted_group = group_data.sort_values(by=['features','prediction','jacc_class1'], ascending=[True, True, True])

    df_by_eval[eval] = sorted_group[['eval_per_image', 'iou_thresh','features','n_clusters','prediction','jacc_class0', 'jacc_class1', 'mIoU',]]

    print(type(eval))

    # print(sorted_group[['prediction', 'n_clusters','eval_per_image', 'iou_thresh', 'mIoU', 'jacc_class0', 'jacc_class1']])
    # print("\n")

Group: (False, 0.0)
<class 'tuple'>
Group: (False, 0.5)
<class 'tuple'>
Group: (True, 0.0)
<class 'tuple'>
Group: (True, 0.5)
<class 'tuple'>


In [19]:
df_by_eval[(False, 0.0)]
styled_df = df_by_eval[(False, 0.0)].style.apply(bold_rows, column='jacc_class1', sign=">", value=0.2, axis=1)
styled_df


Unnamed: 0,eval_per_image,iou_thresh,features,n_clusters,prediction,jacc_class0,jacc_class1,mIoU
27,False,0.0,dino,6.0,crf_multi_region,0.914907,0.050591,0.482749
25,False,0.0,dino,6.0,crf_segmaps,0.397322,0.035176,0.216249
4,False,0.0,dino_ssd1,15.0,crf_multi_region,0.91462,0.029118,0.471869
22,False,0.0,dino_ssd1,6.0,crf_multi_region,0.899945,0.059585,0.479765
23,False,0.0,dino_ssd1,6.0,crf_segmaps,0.961593,0.127043,0.544318
2,False,0.0,dino_ssd1,15.0,crf_segmaps,0.92112,0.148059,0.53459
3,False,0.0,dino_ssd2,15.0,crf_multi_region,0.91669,0.029804,0.473247
1,False,0.0,dino_ssd2,15.0,crf_multi_region,0.924374,0.040291,0.482332
24,False,0.0,dino_ssd2,6.0,crf_multi_region,0.896431,0.053178,0.474805
26,False,0.0,dino_ssd2,6.0,crf_segmaps,0.914976,0.036402,0.475689


In [55]:
df_by_eval[(False, 0.5)]
# styled_df = df_by_eval[(False, 0.5)].style.apply(bold_rows, column='jacc_class1', sign=">", value=0.007, axis=1)
# styled_df

Unnamed: 0,eval_per_image,iou_thresh,features,n_clusters,prediction,jacc_class0,jacc_class1,mIoU
11,False,0.5,dino,15,crf_multi_region,0.576099,0.005496,0.290798
8,False,0.5,dino,15,crf_segmaps,0.522603,0.007284,0.264943
0,False,0.5,dino_ssd1,15,crf_multi_region,0.7001,0.001568,0.350834
2,False,0.5,dino_ssd1,15,crf_segmaps,0.659396,0.004014,0.331705
7,False,0.5,dino_ssd2,15,crf_multi_region,0.752283,0.001053,0.376668
5,False,0.5,dino_ssd2,15,crf_segmaps,0.695406,0.006239,0.350823


In [56]:
df_by_eval[(True, 0.0)]
styled_df = df_by_eval[(True, 0.0)].style.apply(bold_rows, column='jacc_class1', sign=">", value=0.59, axis=1)
styled_df

Unnamed: 0,eval_per_image,iou_thresh,features,n_clusters,prediction,jacc_class0,jacc_class1,mIoU
14,True,0.0,dino,15,crf_multi_region,0.953321,0.236096,0.594708
20,True,0.0,dino,15,crf_segmaps,0.913491,0.165103,0.539297
16,True,0.0,dino_ssd1,15,crf_multi_region,0.983943,0.537348,0.760646
21,True,0.0,dino_ssd1,15,crf_segmaps,0.94985,0.290895,0.620372
15,True,0.0,dino_ssd2,15,crf_multi_region,0.986232,0.590251,0.788242
18,True,0.0,dino_ssd2,15,crf_segmaps,0.909543,0.196248,0.552896


In [57]:
df_by_eval[(True, 0.5)]
styled_df = df_by_eval[(True, 0.5)].style.apply(bold_rows, column='jacc_class1', sign=">", value=0.59, axis=1)
styled_df

Unnamed: 0,eval_per_image,iou_thresh,features,n_clusters,prediction,jacc_class0,jacc_class1,mIoU
23,True,0.5,dino,15,crf_multi_region,0.719467,0.031927,0.375697
17,True,0.5,dino,15,crf_segmaps,0.658805,0.028191,0.343498
13,True,0.5,dino_ssd1,15,crf_multi_region,0.9194,0.164296,0.541848
19,True,0.5,dino_ssd1,15,crf_segmaps,0.810194,0.052468,0.431331
12,True,0.5,dino_ssd2,15,crf_multi_region,0.938911,0.218499,0.578705
22,True,0.5,dino_ssd2,15,crf_segmaps,0.762091,0.026661,0.394376
