# Run multiple repetitions of a full segmentation experiment
This notebook is useful to test variables that may influence the result. In this example, we evaluated the impact of using variable number of images for training a U-Net

In [None]:
import os
import utils.experiment
import utils.dirtools
import pandas as pd
import seaborn as sb
import matplotlib.pyplot as plt

In [None]:
from config import config_vars

experiment_name = 'impact_of_augmented_dataset_size'

partition = "validation"

total_repetitions = 10

config_vars = utils.dirtools.setup_experiment(config_vars, experiment_name)

config_vars

# Main loop
Run repetitions for each number of images

In [None]:
results = pd.DataFrame(columns=["Samples", "Repeat", "Average_F1", "Jaccard", "Missed", "Merges", "Splits"])
idx = 0

for max_samples in [2, 4, 6, 8, 10, 20, 40, 60, 80, 100]:
    for repetition in range(total_repetitions):
        print("Experiment", idx, "- max_samples:", max_samples, "- repetition:", repetition)
        
        # Modify settings
        config_vars["max_training_images"] = max_samples
        
        # Reconfigure variables and data partitions
        config_vars = utils.dirtools.setup_experiment(config_vars, experiment_name)
        data_partitions = utils.dirtools.read_data_partitions(config_vars)
        
        # Run experiment
        output = utils.experiment.run(config_vars, data_partitions, experiment_name, partition, GPU="0")
        
        # Collect outputs
        record = {
            "Samples": max_samples,
            "Repeat": repetition,
            "Average_F1": output["Average_F1"],
            "Jaccard": output["Jaccard"],
            "Missed": output["Missed"].sum(),
            "Merges": output["Merges"],
            "Splits": output["Splits"]
        }
        results.loc[idx] = record
        idx += 1
        
        # Clean up directories
        experiment_dir = config_vars["root_directory"] + "/experiments/" + experiment_name
        if os.path.exists(experiment_dir):
            os.system("rm -Rf " + experiment_dir)
            
        # Save results
        results.to_csv(config_vars["root_directory"] + "/experiments/" + experiment_name + ".csv")

# Visualize results
After all repetitions are done, load results and visualize

In [None]:
# Load all the results and repetitions for analysis

results = pd.read_csv(config_vars["root_directory"] + "/experiments/" + experiment_name + ".csv")
mean = results.groupby("Samples").mean().reset_index()
sem = results.groupby("Samples").sem().reset_index()
sem.columns = [c+"_se" for c in sem.columns]
data = pd.concat([mean, sem], axis=1).drop(["Samples_se", "Repeat", "Repeat_se"], axis=1)
data

In [None]:
# Change the column for visualizing other metrics:
results_column = "Average_F1"

plt.figure(figsize=(8,8))
plt.errorbar(x=data["Samples"], y=data[results_column], yerr=data[results_column + "_se"])
plt.xscale("log")