# Example: Checkpointing and Resume Functionality

## 1. Notebook setup

### 1.1. Imports

In [None]:
import pickle
import sys
import os

import matplotlib.pyplot as plt
import numpy as np
import pandas as pd

from scipy import stats

sys.path.append('..')

from hill_climber import HillClimber

### 1.2. Setup data and objective function

In [None]:
# Create input distribution
n = 5000
np.random.seed(42)
data = pd.DataFrame({
    'x': np.random.normal(loc=10.0, scale=2.0, size=n),
    'y': np.random.normal(loc=10.0, scale=2.0, size=n)
})

# Ensure all values are strictly positive
data['x'] = np.maximum(data['x'], 0.1)
data['y'] = np.maximum(data['y'], 0.1)

print(f"Initial mean X: {np.mean(data['x']):.4f}, mean Y: {np.mean(data['y']):.4f}")
print(f"Initial std X: {np.std(data['x'], ddof=1):.4f}, std Y: {np.std(data['y'], ddof=1):.4f}")

In [None]:
def objective_same_mean_std_different_distributions(x, y):
    '''Maximize distribution difference while maintaining same mean and std.'''
    mean_x = np.mean(x)
    mean_y = np.mean(y)
    std_x = np.std(x, ddof=1)
    std_y = np.std(y, ddof=1)
    
    # Calculate KS statistic (measure of distribution difference)
    ks_statistic, _ = stats.ks_2samp(x, y)
    
    # Calculate penalty for not matching mean and std
    mean_scale = (abs(mean_x) + abs(mean_y)) / 2 + 0.1
    std_scale = (std_x + std_y) / 2 + 0.1
    
    mean_penalty = abs(mean_x - mean_y) / mean_scale
    std_penalty = abs(std_x - std_y) / std_scale
    total_penalty = mean_penalty + std_penalty
    
    # Objective: maximize KS statistic while minimizing penalty
    penalty_weight = 0.5
    objective = ks_statistic - (penalty_weight * total_penalty)
    
    metrics = {
        'Mean X': mean_x,
        'Mean Y': mean_y,
        'Std X': std_x,
        'Std Y': std_y,
        'KS Statistic': ks_statistic,
        'Mean Penalty': mean_penalty,
        'Std Penalty': std_penalty,
        'Total Penalty': total_penalty
    }
    
    return metrics, objective

## 2. Example 1: Single run with checkpointing

In [None]:
# Create climber with checkpoint file
checkpoint_file = '../data/single_run_checkpoint.pkl'

climber = HillClimber(
    data=data,
    objective_func=objective_same_mean_std_different_distributions,
    max_time=2,  # 2 minutes
    step_size=2.0,
    perturb_fraction=0.3,
    temperature=1000.0,
    cooling_rate=0.999,
    mode='maximize',
    checkpoint_file=checkpoint_file,
    save_interval=30  # Save every 30 seconds
)

print("Starting optimization with periodic checkpointing...")
best_data, steps_df = climber.climb()

print(f"\nOptimization completed!")
print(f"Final objective: {steps_df['Objective value'].iloc[-1]:.4f}")
print(f"Total steps: {len(steps_df)}")

## 3. Example 2: Resume from checkpoint

In [None]:
# Resume from the checkpoint with additional time
print("Resuming optimization from checkpoint...")

resumed_climber = HillClimber.resume_from_checkpoint(
    checkpoint_file=checkpoint_file,
    objective_func=objective_same_mean_std_different_distributions,
    new_max_time=1,  # Run for 1 more minute
    new_checkpoint_file='../data/resumed_checkpoint.pkl'
)

# Continue optimization
resumed_best_data, resumed_steps_df = resumed_climber.climb()

print(f"\nResumed optimization completed!")
print(f"Final objective: {resumed_steps_df['Objective value'].iloc[-1]:.4f}")
print(f"Total steps after resume: {len(resumed_steps_df)}")

## 4. Example 3: Parallel runs with checkpointing

In [None]:
# Run parallel optimization with checkpointing
checkpoint_dir = '../data/parallel_checkpoints'

parallel_climber = HillClimber(
    data=data,
    objective_func=objective_same_mean_std_different_distributions,
    max_time=1,  # 1 minute per replicate
    step_size=2.0,
    perturb_fraction=0.3,
    temperature=1000.0,
    cooling_rate=0.999,
    mode='maximize',
    save_interval=30  # Save every 30 seconds
)

print("Starting parallel optimization with checkpointing...")
results = parallel_climber.climb_parallel(
    replicates=4,
    initial_noise=1.0,
    output_file='../data/parallel_results_with_checkpoints.pkl',
    checkpoint_dir=checkpoint_dir
)

print(f"\nParallel optimization completed!")
print(f"Number of replicates: {len(results)}")
for i, (best_data_rep, steps_df_rep) in enumerate(results):
    final_obj = steps_df_rep['Objective value'].iloc[-1]
    print(f"Replicate {i}: Final objective = {final_obj:.4f}, Steps = {len(steps_df_rep)}")

## 5. Check checkpoint files

In [None]:
# List checkpoint files
print("Checkpoint files created:")
print(f"Single run: {os.path.exists(checkpoint_file)}")
print(f"Resumed run: {os.path.exists('../data/resumed_checkpoint.pkl')}")

if os.path.exists(checkpoint_dir):
    parallel_checkpoints = os.listdir(checkpoint_dir)
    print(f"Parallel checkpoints: {len(parallel_checkpoints)} files")
    for cp in sorted(parallel_checkpoints):
        print(f"  - {cp}")
else:
    print("Parallel checkpoint directory not found")

## 6. Load and inspect a checkpoint

In [None]:
# Load and inspect a checkpoint file
if os.path.exists(checkpoint_file):
    with open(checkpoint_file, 'rb') as f:
        checkpoint_data = pickle.load(f)
    
    print("Checkpoint contents:")
    print(f"- Step: {checkpoint_data['step']}")
    print(f"- Best objective: {checkpoint_data['best_objective']:.4f}")
    print(f"- Current temperature: {checkpoint_data['temp']:.2f}")
    print(f"- Elapsed time: {checkpoint_data['elapsed_time']:.1f} seconds")
    print(f"- Hyperparameters: {list(checkpoint_data['hyperparameters'].keys())}")
    print(f"- Steps recorded: {len(checkpoint_data['steps']['Step'])}")
else:
    print("Checkpoint file not found")