In [1]:
from reverb.training.utils import DEFAULT_TRAINING_KWARGS, DEFAULT_MODEL_KWARGS, DEFAULT_DATA_KWARGS
import segmentation_models_pytorch as smp

semi_supervised_experiments = {
    "baseline": {
        "run_name": "ablations/semisupervised/baseline",
        "training_kwargs": {
            "max_epochs": 50,
        },
        "model_kwargs": DEFAULT_MODEL_KWARGS,
        "data_kwargs": DEFAULT_DATA_KWARGS,
    },
    "alpha_0.999": {
        "run_name": "ablations/semisupervised/alpha_0.999",
        "training_kwargs": {
            "max_epochs": 50,
            "alpha": 0.999,
        },
        "model_kwargs": DEFAULT_MODEL_KWARGS,
        "data_kwargs": DEFAULT_DATA_KWARGS,
    },
    "alpha_0.98": {
        "run_name": "ablations/semisupervised/alpha_0.98",
        "training_kwargs": {
            "max_epochs": 50,
            "alpha": 0.98,
        },
        "model_kwargs": DEFAULT_MODEL_KWARGS,
        "data_kwargs": DEFAULT_DATA_KWARGS,
    },
    "alpha_0.9": {
        "run_name": "ablations/semisupervised/alpha_0.9",
        "training_kwargs": {
            "max_epochs": 50,
            "alpha": 0.9,
        },
        "model_kwargs": DEFAULT_MODEL_KWARGS,
        "data_kwargs": DEFAULT_DATA_KWARGS,
    },
    "alpha_0.9": {
        "run_name": "ablations/semisupervised/alpha_0.9",
        "training_kwargs": {
            "max_epochs": 50,
            "alpha": 0.9,
        },
        "model_kwargs": DEFAULT_MODEL_KWARGS,
        "data_kwargs": DEFAULT_DATA_KWARGS,
    },
    "no_ramp_up": {
        "run_name": "ablations/semisupervised/no_ramp_up",
        "training_kwargs": {
            "max_epochs": 50,
            "consistency_ramp_up": 0.1,
            "alpha": 0.9,
        },
        "model_kwargs": DEFAULT_MODEL_KWARGS,
        "data_kwargs": DEFAULT_DATA_KWARGS,
    },
    "lambda_0.1": {
        "run_name": "ablations/semisupervised/lambda_0.1",
        "training_kwargs": {
            "max_epochs": 50,
            "consistency_lambda": 0.1,
            "alpha": 0.9,
        },
        "model_kwargs": DEFAULT_MODEL_KWARGS,
        "data_kwargs": DEFAULT_DATA_KWARGS,
    },
    "lambda_2.0": {
        "run_name": "ablations/semisupervised/lambda_2.0",
        "training_kwargs": {
            "max_epochs": 50,
            "consistency_lambda": 2.0,
            "alpha": 0.9,
        },
        "model_kwargs": DEFAULT_MODEL_KWARGS,
        "data_kwargs": DEFAULT_DATA_KWARGS,
    },
}

In [None]:
from reverb.training.utils import train, get_eval_dataloaders, compute_results_over_eval_sets, save_evaluation_results
eval_dataloaders = get_eval_dataloaders()


In [None]:
for experiment in semi_supervised_experiments.keys():
    experiment_config = semi_supervised_experiments[experiment]
    for i in range(3):
        if i == 0 and experiment == "no_ramp_up":
            continue
        run_name = f"{experiment_config['run_name']}_{i}"

        training_kwargs = experiment_config['training_kwargs']
        model_kwargs = experiment_config['model_kwargs']
        data_kwargs = experiment_config['data_kwargs']
        # Train the model
        train(
            run_name=run_name,
            mode="semi_supervised",
            model_kwargs=model_kwargs,
            data_kwargs=data_kwargs,  
            training_kwargs=training_kwargs,
        )

        # Evaluate the model
        results = compute_results_over_eval_sets(run_name, eval_dataloaders, model_kwargs=model_kwargs)
        save_evaluation_results(run_name, results)


In [2]:
import os
import json
import pandas as pd
experiment_names = semi_supervised_experiments.keys()

# Root directory containing experiment folders like 'baseline_model_0/', 'baseline_model_1/', etc.
experiments_root = './checkpoints/ablations/semisupervised'

flattened_data = []

for exp_name in experiment_names:
    # Find folders starting with the experiment name and ending in a number (repeats)
    matching_folders = [
        d for d in os.listdir(experiments_root)
        if os.path.isdir(os.path.join(experiments_root, d)) and d.startswith(exp_name + '_')
    ]

    for folder in matching_folders:
        results_path = os.path.join(experiments_root, folder, 'eval_results.json')
        if os.path.isfile(results_path):
            with open(results_path, 'r') as f:
                datasets = json.load(f)
            for dataset, metrics in datasets.items():
                for metric, value in metrics.items():
                    if metric in ['miou', 'precision', 'recall']:
                        flattened_data.append({
                            'Experiment': exp_name,  # Group under common experiment name
                            'Repeat': folder,
                            'Dataset': dataset,
                            'Metric': metric,
                            'Value': value
                        })

# Convert to DataFrame
df = pd.DataFrame(flattened_data)

# Compute mean and SEM over repeats for each experiment
mean_df = (
    df.groupby(['Experiment', 'Dataset', 'Metric'])['Value']
    .mean()
    .reset_index()
    .rename(columns={'Value': 'Mean'})
)

sem_df = (
    df.groupby(['Experiment', 'Dataset', 'Metric'])['Value']
    .sem()
    .reset_index()
    .rename(columns={'Value': 'Std_Error'})
)

# Merge summaries
summary_df = pd.merge(mean_df, sem_df, on=['Experiment', 'Dataset', 'Metric'])

# Save outputs
df.to_csv('individual_repeat_results.csv', index=False)
summary_df.to_csv('semisupervised_experiment_summary.csv', index=False)

print("Saved individual repeat results and summary statistics.")


Saved individual repeat results and summary statistics.


In [None]:
import os
import json
import pandas as pd
experiment_names = semi_supervised_experiments.keys()

# Root directory containing experiment folders like 'baseline_model_0/', 'baseline_model_1/', etc.
experiments_root = './checkpoints/ablations/semisupervised'

flattened_data = []

for exp_name in experiment_names:
    # Find folders starting with the experiment name and ending in a number (repeats)
    matching_folders = [
        d for d in os.listdir(experiments_root)
        if os.path.isdir(os.path.join(experiments_root, d)) and d.startswith(exp_name + '_')
    ]

    for folder in matching_folders:
        results_path = os.path.join(experiments_root, folder, 'eval_results.json')
        if os.path.isfile(results_path):
            with open(results_path, 'r') as f:
                datasets = json.load(f)
            for dataset, metrics in datasets.items():
                for metric, value in metrics.items():
                    if metric in ['miou', 'precision', 'recall']:
                        flattened_data.append({
                            'Experiment': exp_name,  # Group under common experiment name
                            'Repeat': folder,
                            'Dataset': dataset,
                            'Metric': metric,
                            'Value': value
                        })

# Convert to DataFrame
df = pd.DataFrame(flattened_data)

# Compute mean and SEM over repeats for each experiment
mean_df = (
    df.groupby(['Experiment', 'Dataset', 'Metric'])['Value']
    .mean()
    .reset_index()
    .rename(columns={'Value': 'Mean'})
)

sem_df = (
    df.groupby(['Experiment', 'Dataset', 'Metric'])['Value']
    .sem()
    .reset_index()
    .rename(columns={'Value': 'Std_Error'})
)

# Merge summaries
summary_df = pd.merge(mean_df, sem_df, on=['Experiment', 'Dataset', 'Metric'])

# Save outputs
df.to_csv('individual_repeat_results.csv', index=False)
summary_df.to_csv('experiment_summary.csv', index=False)

print("Saved individual repeat results and summary statistics.")


Saved individual repeat results and summary statistics.


In [7]:
# Filter only for 'miou'
miou_df = summary_df[summary_df['Metric'] == 'miou']

# Print one table per dataset
for dataset in miou_df['Dataset'].unique():
    print(f"\n--- Dataset: {dataset} ---")
    display(miou_df[miou_df['Dataset'] == dataset].drop(columns=['Metric']))



--- Dataset: rr_eval ---


Unnamed: 0,Experiment,Dataset,Mean,Std_Error
0,alpha_0.9,rr_eval,0.454331,0.010645
9,alpha_0.98,rr_eval,0.500459,0.019855
18,alpha_0.999,rr_eval,0.308634,0.039406
27,baseline,rr_eval,0.495119,0.002127
36,lambda_0.1,rr_eval,0.449794,0.010907
45,lambda_2.0,rr_eval,0.452546,0.00336
54,no_ramp_up,rr_eval,0.458133,0.028303



--- Dataset: up34_eval ---


Unnamed: 0,Experiment,Dataset,Mean,Std_Error
3,alpha_0.9,up34_eval,0.517524,0.001767
12,alpha_0.98,up34_eval,0.472154,0.009551
21,alpha_0.999,up34_eval,0.310512,0.063769
30,baseline,up34_eval,0.461312,0.014873
39,lambda_0.1,up34_eval,0.489716,0.018847
48,lambda_2.0,up34_eval,0.474084,0.007044
57,no_ramp_up,up34_eval,0.471366,0.025263



--- Dataset: valid ---


Unnamed: 0,Experiment,Dataset,Mean,Std_Error
6,alpha_0.9,valid,0.434556,0.001754
15,alpha_0.98,valid,0.431697,0.001525
24,alpha_0.999,valid,0.22071,0.056816
33,baseline,valid,0.430219,0.001
42,lambda_0.1,valid,0.427724,0.005765
51,lambda_2.0,valid,0.431067,0.006183
60,no_ramp_up,valid,0.433757,0.003392
