In [57]:
import os
api_token = os.getenv('NEPTUNE_API_TOKEN')

In [58]:
import neptune
import numpy as np
import pandas as pd

project_name = "ACFRmarine/ALDI-DOAD"

project = neptune.init_project(
    project=project_name,
    mode="read-only",
    api_token=api_token,

)

[neptune] [info   ] Neptune initialized. Open in the app: https://app.neptune.ai/ACFRmarine/ALDI-DOAD/


In [59]:
runs_table_df = project.fetch_runs_table(query='(`sys/group_tags`:stringSet CONTAINS "SAOD5")').to_pandas()
runs_table_df = runs_table_df[runs_table_df['sys/failed'] == False]
df = runs_table_df

In [74]:
def is_in_run(column_name, run):
    names = column_name.split('/')
    layer = run.get_structure()
    for n in names:
        if n not in layer:
            return False
        layer = layer[n]
    return True

def get_method(group_tags):
    if "CoSt_str" in group_tags:
        return "Strong Loss"
    elif "CoSt_weak" in group_tags:
        return "Weak Loss"
    elif "CoSt" in group_tags or "CoSt_BOTH" in group_tags:
        return "Our method"
    elif "ORACLE" in group_tags:
        return "Fully Supervised"
    else:
        "Not sure"

def get_dataset(group_tags):
    if "ft_sparse" in group_tags:
        return "Bbox Sparse"
    elif "ORACLE" in group_tags:
        return "Bbox All"
    elif "OG" in group_tags and "FEW" in group_tags:
        return "Point Sparse"
    elif "CROP" in group_tags and "FEW" in group_tags:
        return "Point Sparse - Cropped images"
    else:
        return "Unknown"

def get_max_from_run(run_id, column_name, api_token, project_name):
    run = neptune.init_run(with_id=run_id, mode="read-only",
                           api_token=api_token,
                           project=project_name)
    if is_in_run(column_name, run):
        return run[column_name].fetch_values()['value'].max()
    else:
        return None

def get_max_AP50_from_id(run_id):
    return get_max_from_run(run_id, 'training/metrics/bbox/AP50',
                            api_token=api_token, project_name=project_name)



In [75]:
df['species'] = df['training/config/DATASETS/TEST'].apply(lambda x: '2Redcup' if x == "(\'squidle_redcup_test\',)" else '1Urchin')
df['method'] = df['sys/group_tags'].apply(get_method)
df['dataset'] = df['sys/group_tags'].apply(get_dataset)
df['last_AP50'] = df['training/metrics/bbox/AP50']
df['Seed'] = df ['training/config/SEED']

In [63]:
df['max_AP50'] = df['sys/id'].apply(get_max_AP50_from_id)

[neptune] [info   ] Neptune initialized. Open in the app: https://app.neptune.ai/ACFRmarine/ALDI-DOAD/e/AL-1339
[neptune] [info   ] Neptune initialized. Open in the app: https://app.neptune.ai/ACFRmarine/ALDI-DOAD/e/AL-1338
[neptune] [info   ] Neptune initialized. Open in the app: https://app.neptune.ai/ACFRmarine/ALDI-DOAD/e/AL-1337
[neptune] [info   ] Neptune initialized. Open in the app: https://app.neptune.ai/ACFRmarine/ALDI-DOAD/e/AL-1336
[neptune] [info   ] Neptune initialized. Open in the app: https://app.neptune.ai/ACFRmarine/ALDI-DOAD/e/AL-1335
[neptune] [info   ] Neptune initialized. Open in the app: https://app.neptune.ai/ACFRmarine/ALDI-DOAD/e/AL-1334
[neptune] [info   ] Neptune initialized. Open in the app: https://app.neptune.ai/ACFRmarine/ALDI-DOAD/e/AL-1333
[neptune] [info   ] Neptune initialized. Open in the app: https://app.neptune.ai/ACFRmarine/ALDI-DOAD/e/AL-1332
[neptune] [info   ] Neptune initialized. Open in the app: https://app.neptune.ai/ACFRmarine/ALDI-DOAD/e/

In [76]:
# Replace 'group_col1', 'group_col2', 'group_col3' with your actual column names
df_summary = df[['sys/id', 'species', 'dataset', 'method', 'max_AP50', 'last_AP50', 'Seed']]
df_summary = df_summary.sort_values(by=['species','dataset','method','Seed'])

In [80]:

def mean_with_nan(group):
    return np.nan if group.isna().any() else group.mean()

def std_with_nan(group):
    return np.nan if group.isna().any() else group.std()


group_columns = ['species', 'dataset', 'method']
df_grouped = df.groupby(group_columns, dropna=False).agg(
    AP50_1max_mean=('max_AP50', mean_with_nan),
    AP50_1max_std=('max_AP50', std_with_nan),
    AP50_2last_mean=('last_AP50', mean_with_nan),
    AP50_2last_std=('last_AP50', std_with_nan),
    count=('max_AP50', 'size')  # or use 'count' if you want to exclude NaNs
).reset_index()
df_grouped = df_grouped.round({'AP50_1max_mean': 1, 'AP50_1max_std': 1, 'AP50_2last_mean': 1, 'AP50_2last_std': 1})


In [81]:
print(df_grouped.to_string(index=False))
df_melted = pd.pivot(df_grouped, index=['dataset', 'method' ], columns='species', values=['AP50_1max_mean', 'AP50_1max_std', 'AP50_2last_mean', 'AP50_2last_std'])
df_melted.columns = df_melted.columns.reorder_levels([1, 0])
df_melted = df_melted.sort_index(axis=1)
print(df_melted.to_string())


species                       dataset           method  AP50_1max_mean  AP50_1max_std  AP50_2last_mean  AP50_2last_std  count
1Urchin                      Bbox All Fully Supervised            95.4            0.3             92.7             0.3      3
1Urchin                   Bbox Sparse       Our method            51.8           37.2             50.8            35.7      3
1Urchin                  Point Sparse       Our method             9.9            9.5              2.8             2.4      3
1Urchin                  Point Sparse      Strong Loss             5.6            7.3              3.7             4.0      3
1Urchin                  Point Sparse        Weak Loss             7.2            4.6              2.4             2.3      3
1Urchin Point Sparse - Cropped images       Our method            13.1            9.9              5.5             5.7      3
2Redcup                      Bbox All Fully Supervised            93.5            0.1             91.8             0.5