In [None]:
# Install optuna if needed
# !pip install optuna loguru

In [None]:
# Import libraries
import numpy as np
import matplotlib.pyplot as plt
import optuna
from optuna.visualization import (
    plot_optimization_history,
    plot_param_importances,
    plot_parallel_coordinate,
    plot_slice
)
import warnings
import os

warnings.filterwarnings('ignore')

# Add src to path
import sys
sys.path.append('..')

from src.config import (
    SEQUENCES_DIR, LOGS_DIR, OPTUNA_DB_PATH,
    OUTPUT_SEQ_LEN, OPTUNA_SEARCH_SPACE, OPTUNA_N_TRIALS, OPTUNA_TIMEOUT,
    DEVICE, RANDOM_SEED, set_seed
)
from src.optuna_optimization import run_optuna_optimization
from src.utils import load_json, save_json

# Set seed
set_seed(RANDOM_SEED)

print("Libraries imported successfully!")
print(f"Device: {DEVICE}")

## 6a.1 Load Data

In [None]:
# Load sequences
X_train = np.load(os.path.join(SEQUENCES_DIR, 'X_train.npy'))
y_train = np.load(os.path.join(SEQUENCES_DIR, 'y_train.npy'))
X_val = np.load(os.path.join(SEQUENCES_DIR, 'X_val.npy'))
y_val = np.load(os.path.join(SEQUENCES_DIR, 'y_val.npy'))

print(f"X_train: {X_train.shape}")
print(f"y_train: {y_train.shape}")
print(f"X_val: {X_val.shape}")
print(f"y_val: {y_val.shape}")

## 6a.2 Define Search Space

In [None]:
# Print search space from config
print("Optuna Search Space:")
print("=" * 50)
for param, values in OPTUNA_SEARCH_SPACE.items():
    if isinstance(values, list):
        print(f"  {param}: categorical {values}")
    else:
        print(f"  {param}: range {values}")
print("=" * 50)
print(f"\nNumber of trials: {OPTUNA_N_TRIALS}")
print(f"Timeout: {OPTUNA_TIMEOUT/3600:.1f} hours")

In [None]:
# Optional: Custom search space (uncomment to override config)
custom_search_space = {
    'hidden_size': [64, 128, 256],
    'num_layers': [1, 2, 3],
    'dropout': (0.1, 0.5),
    'learning_rate': (1e-4, 1e-2),
    'batch_size': [32, 64, 128],
    'weight_decay': (1e-6, 1e-3),
    'teacher_forcing_ratio': (0.3, 0.7),
}

# Use custom or config search space
search_space = OPTUNA_SEARCH_SPACE  # or custom_search_space

## 6a.3 Run Optimization

In [None]:
# Create logs directory
os.makedirs(LOGS_DIR, exist_ok=True)

# Run Optuna optimization
# Note: This will take a while depending on n_trials and timeout
results = run_optuna_optimization(
    X_train=X_train,
    y_train=y_train,
    X_val=X_val,
    y_val=y_val,
    output_seq_len=OUTPUT_SEQ_LEN,
    search_space=search_space,
    n_trials=OPTUNA_N_TRIALS,  # Reduce this for faster testing
    timeout=OPTUNA_TIMEOUT,
    device=DEVICE,
    log_dir=LOGS_DIR,
    db_path=OPTUNA_DB_PATH
)

## 6a.4 Results Analysis

In [None]:
# Print best parameters
print("\n" + "=" * 60)
print("OPTIMIZATION RESULTS")
print("=" * 60)
print(f"\nCompleted trials: {results['n_completed_trials']}")
print(f"Pruned trials: {results['n_pruned_trials']}")
print(f"Duration: {results['duration_minutes']:.1f} minutes")
print(f"\nBest trial: #{results['best_trial_number']}")
print(f"Best validation loss: {results['best_value']:.6f}")
print("\nBest hyperparameters:")
for param, value in results['best_params'].items():
    print(f"  {param}: {value}")
print("=" * 60)

In [None]:
# Save best parameters
best_params_path = os.path.join(LOGS_DIR, 'best_params.json')
save_json(results['best_params'], best_params_path)
print(f"Best parameters saved to: {best_params_path}")

## 6a.5 Visualize Optimization

In [None]:
# Get study object
study = results['study']

In [None]:
# 1. Optimization history
fig = plot_optimization_history(study)
fig.update_layout(title="Optimization History")
fig.show()

In [None]:
# 2. Parameter importance
try:
    fig = plot_param_importances(study)
    fig.update_layout(title="Hyperparameter Importance")
    fig.show()
except:
    print("Not enough trials to compute parameter importance")

In [None]:
# 3. Parallel coordinate plot
fig = plot_parallel_coordinate(study)
fig.update_layout(title="Parallel Coordinate Plot")
fig.show()

In [None]:
# 4. Slice plot for each parameter
fig = plot_slice(study)
fig.show()

In [None]:
# 5. Manual visualization - Trials dataframe
import pandas as pd

trials_df = study.trials_dataframe()
print("Top 10 trials:")
display(trials_df.nsmallest(10, 'value')[['number', 'value', 'state'] + 
        [col for col in trials_df.columns if col.startswith('params_')]])

## 6a.6 Continue from Previous Study (Optional)

In [None]:
# Load existing study to continue optimization
# study = optuna.load_study(
#     study_name="seq2seq_optimization",
#     storage=f"sqlite:///{OPTUNA_DB_PATH}"
# )
# print(f"Loaded study with {len(study.trials)} trials")
# print(f"Best value so far: {study.best_value}")

## Summary

**Optuna Optimization completed:**
1. ✅ Loaded training and validation data
2. ✅ Defined hyperparameter search space
3. ✅ Ran optimization with logging
4. ✅ Saved best parameters
5. ✅ Visualized optimization results

**Next step:** 
- Use best parameters in `06_Model_Training.ipynb`
- Or load from `logs/best_params.json`