# Experiment

> Scripts to perform the experiments

In [16]:
#| default_exp experiment

In [17]:
#| export
#| hide
import os
import csv
import pandas as pd
import subprocess
import shutil
from typing import Dict, Any, List, Optional, Tuple

from orbit_generation.constants import ORBIT_CLASS_DF

In [18]:
#| hide
from fastcore.test import test_eq

In [19]:
#| export
def setup_new_experiment(params: Dict[str, Any],              # Dictionary of parameters for the new experiment.
                         experiments_folder: str,             # Path to the folder containing all experiments.
                         csv_file: Optional[str] = None       # Optional path to the CSV file tracking experiment parameters.
                        ) -> str:                             # The path to the newly created experiment folder.
    """
    Sets up a new experiment by creating a new folder and updating the CSV file with experiment parameters.
    """
    # Ensure the experiments folder exists
    if not os.path.exists(experiments_folder):
        os.makedirs(experiments_folder)

    # Default CSV file to 'experiments.csv' in the experiments_folder if not provided
    if csv_file is None:
        csv_file = os.path.join(experiments_folder, 'experiments.csv')

    existing_experiment_folder = None
    existing_experiment_ids = set()

    # Check if the parameters already exist in the CSV file
    if os.path.isfile(csv_file):
        with open(csv_file, mode='r') as file:
            reader = csv.DictReader(file)
            for row in reader:
                existing_experiment_ids.add(int(row['id']))
                if all(row[key] == str(value) for key, value in params.items()):
                    candidate_folder = os.path.join(experiments_folder, f"experiment_{row['id']}")
                    if os.path.exists(candidate_folder):
                        print(f'Parameters already exist for experiment: {candidate_folder}')
                        return candidate_folder
                    else:
                        existing_experiment_folder = candidate_folder

    # Determine the next experiment number, avoiding existing IDs in the CSV
    existing_experiment_folders = [d for d in os.listdir(experiments_folder) if os.path.isdir(os.path.join(experiments_folder, d))]
    existing_experiment_numbers = {int(folder.split('_')[-1]) for folder in existing_experiment_folders if folder.startswith('experiment')}
    next_experiment_number = 1
    while next_experiment_number in existing_experiment_ids or next_experiment_number in existing_experiment_numbers:
        next_experiment_number += 1

    # Create a new folder for the next experiment
    if existing_experiment_folder and not os.path.exists(existing_experiment_folder):
        new_experiment_folder = existing_experiment_folder
    else:
        new_experiment_folder = os.path.join(experiments_folder, f'experiment_{next_experiment_number}')
    os.makedirs(new_experiment_folder, exist_ok=True)

    # Update the CSV file with the new experiment's parameters
    csv_exists = os.path.isfile(csv_file)
    with open(csv_file, mode='a', newline='') as file:
        writer = csv.writer(file)
        # Write header if the CSV does not exist
        if not csv_exists:
            header = ['id'] + list(params.keys())
            writer.writerow(header)
        # Write the experiment parameters
        row = [next_experiment_number] + list(params.values())
        writer.writerow(row)

    print(f'New experiment setup complete: {new_experiment_folder}')
    print(f'Parameters saved to {csv_file}.')

    return new_experiment_folder

In [20]:
# Test function
def test_setup_new_experiment():
    experiments_folder = 'test_experiments'
    csv_file = os.path.join(experiments_folder, 'experiments.csv')
    
    if not os.path.exists(experiments_folder):
        os.makedirs(experiments_folder)
    
    params = {'param1': 'value1', 'param2': 'value2'}
    new_folder = setup_new_experiment(params, experiments_folder)
    
    # Assertions
    assert os.path.exists(new_folder)
    assert os.path.exists(csv_file)
    
    with open(csv_file, mode='r') as file:
        reader = csv.DictReader(file)
        rows = list(reader)
    
    test_eq(len(rows), 1)
    test_eq(rows[0]['param1'], 'value1')
    test_eq(rows[0]['param2'], 'value2')

    # Clean up
    for file in os.listdir(experiments_folder):
        file_path = os.path.join(experiments_folder, file)
        if os.path.isfile(file_path):
            os.unlink(file_path)
        else:
            os.rmdir(file_path)
    os.rmdir(experiments_folder)

# Run the test
test_setup_new_experiment()

New experiment setup complete: test_experiments/experiment_1
Parameters saved to test_experiments/experiments.csv.


In [21]:
#| export
def add_experiment_metrics(experiments_folder: str,                    # Path to the folder containing all experiments.
                           params: Optional[Dict[str, Any]] = None,    # Optional dictionary of parameters identifying the experiment.
                           experiment_id: Optional[int] = None,        # Optional ID to identify the experiment.
                           metrics: Optional[Dict[str, Any]] = None,   # Optional dictionary of metrics to be added to the experiment.
                           csv_file: Optional[str] = None              # Optional path to the CSV file tracking experiment parameters and metrics.
                          ) -> None:
    """
    Adds metrics to an existing experiment in the CSV file based on the given parameters or ID.
    """
    # Ensure the experiments folder exists
    if not os.path.exists(experiments_folder):
        raise FileNotFoundError(f"The experiments folder '{experiments_folder}' does not exist.")

    # Default CSV file to 'experiments.csv' in the experiments_folder if not provided
    if csv_file is None:
        csv_file = os.path.join(experiments_folder, 'experiments.csv')

    if not os.path.isfile(csv_file):
        raise FileNotFoundError(f"The CSV file '{csv_file}' does not exist.")

    if params is None and experiment_id is None:
        raise ValueError("Either 'params' or 'experiment_id' must be provided to identify the experiment.")

    if metrics is None:
        metrics = {}

    updated_rows = []
    found_experiment = False

    # Read the CSV file and find the matching experiment
    with open(csv_file, mode='r', newline='') as file:
        reader = csv.DictReader(file)
        for row in reader:
            if experiment_id is not None and int(row['id']) == experiment_id:
                found_experiment = True
                row.update(metrics)
            elif params is not None and all(row.get(key) == str(value) for key, value in params.items() if key in row):
                found_experiment = True
                row.update(metrics)
            updated_rows.append(row)

    if not found_experiment:
        if experiment_id is not None:
            raise ValueError(f"Experiment with the specified ID {experiment_id} does not exist.")
        else:
            raise ValueError("Experiment with the specified parameters does not exist.")

    # Determine the updated header
    header = set(updated_rows[0].keys())
    header.update(metrics.keys())
    header = list(header)

    # Write the updated rows back to the CSV file
    with open(csv_file, mode='w', newline='') as file:
        writer = csv.DictWriter(file, fieldnames=header)
        writer.writeheader()
        writer.writerows(updated_rows)

    if experiment_id is not None:
        print(f'Metrics added to experiment with ID {experiment_id} in {csv_file}.')
    else:
        experiment_id = [row['id'] for row in updated_rows if all(row.get(key) == str(value) for key, value in params.items())][0]
        print(f'Metrics added to experiment with ID {experiment_id} in {csv_file}.')

In [22]:
# Test function
def test_add_experiment_metrics():
    experiments_folder = 'test_experiments'
    csv_file = os.path.join(experiments_folder, 'experiments.csv')
    
    if not os.path.exists(experiments_folder):
        os.makedirs(experiments_folder)
    
    params = {'param1': 'value1', 'param2': 'value2'}
    metrics = {'metric1': '0.95', 'metric2': '0.85'}
    
    # Setup initial experiment
    new_folder = setup_new_experiment(params, experiments_folder)
    
    # Add metrics
    add_experiment_metrics(params=params, metrics=metrics, experiments_folder=experiments_folder)
    
    # Assertions
    with open(csv_file, mode='r') as file:
        reader = csv.DictReader(file)
        rows = list(reader)
    
    test_eq(len(rows), 1)
    test_eq(rows[0]['param1'], 'value1')
    test_eq(rows[0]['param2'], 'value2')
    test_eq(rows[0]['metric1'], '0.95')
    test_eq(rows[0]['metric2'], '0.85')

    # Clean up
    for file in os.listdir(experiments_folder):
        file_path = os.path.join(experiments_folder, file)
        if os.path.isfile(file_path):
            os.unlink(file_path)
        else:
            os.rmdir(file_path)
    os.rmdir(experiments_folder)

# Run the test
test_add_experiment_metrics()

New experiment setup complete: test_experiments/experiment_1
Parameters saved to test_experiments/experiments.csv.
Metrics added to experiment with ID 1 in test_experiments/experiments.csv.


In [23]:
#| export
def convert_notebook(notebook_path: str,                # The path to the notebook to convert.
                     output_folder: str,                # The folder to save the converted file.
                     output_filename: str,              # The name of the output file.
                     format: str = 'html'               # The format to convert the notebook to ('html' or 'pdf').
                    ) -> None:                          # This function does not return a value.
    """
    Convert the specified Jupyter notebook to HTML or PDF.

    :param notebook_path: The path to the notebook to convert.
    :param output_folder: The folder to save the converted file.
    :param output_filename: The name of the output file.
    :param format: The format to convert the notebook to ('html' or 'pdf').
    """
    if format == 'pdf' and shutil.which('pandoc') is None:
        raise RuntimeError("Pandoc is required for PDF conversion but was not found. Please install Pandoc: https://pandoc.org/installing.html")

    # Create the full path for the output file
    os.makedirs(output_folder, exist_ok=True)
    output_path = os.path.join(output_folder, f"{output_filename}.{format}")

    # Convert the notebook using nbconvert
    command = f"jupyter nbconvert --to {format} \"{notebook_path}\" --output \"{output_path}\""
    try:
        subprocess.run(command, shell=True, check=True, capture_output=True, text=True)
        print(f"Notebook converted to {format.upper()} and saved at {output_path}")
    except subprocess.CalledProcessError as e:
        print(f"An error occurred while converting the notebook to {format.upper()}:")
        print(e.stderr)
        raise

In [24]:
#| hide
import nbdev; nbdev.nbdev_export()