## 1️⃣ Setup Environment

In [None]:
# Check GPU
!nvidia-smi

In [None]:
# Clone repository
!git clone https://github.com/decodingai-magazine/second-brain-ai-assistant-course.git
%cd second-brain-ai-assistant-course
!git fetch origin
!git checkout -b feature/module4-training-v2 origin/feature/module4-training-v2

In [None]:
# Install dependencies
!pip install -q -r module4.5/requirements.txt
!pip install -q -r module4/requirements.txt

## 2️⃣ Verify Dataset

In [None]:
import json

# Check dataset
train_path = 'module3/data/generated/train_v2.jsonl'
val_path = 'module3/data/generated/val_v2.jsonl'
test_path = 'module3/data/generated/test_v2.jsonl'

with open(train_path, 'r') as f:
    train_data = [json.loads(line) for line in f]

with open(val_path, 'r') as f:
    val_data = [json.loads(line) for line in f]

with open(test_path, 'r') as f:
    test_data = [json.loads(line) for line in f]

print(f"✅ Dataset loaded:")
print(f"   Train: {len(train_data)} samples")
print(f"   Val: {len(val_data)} samples")
print(f"   Test: {len(test_data)} samples")
print(f"   Total: {len(train_data) + len(val_data) + len(test_data)} samples")
print()
print(f"Sample:")
print(json.dumps(train_data[0], indent=2, ensure_ascii=False)[:500])

## 3️⃣ Run Optimization

This will run 20 trials of Bayesian optimization. Each trial:
1. Trains model for 1 epoch with trial hyperparameters
2. Evaluates validation loss
3. Reports results to Ax
4. Ax suggests next trial based on all previous results

In [None]:
# IMPORTANT: Update config to use correct dataset paths
import yaml

config_path = 'module4.5/configs/optimization_config.yaml'

with open(config_path, 'r') as f:
    config = yaml.safe_load(f)

# Update dataset path to match Colab structure
config['dataset']['path'] = 'module3/data/generated/train_v2.jsonl'

with open(config_path, 'w') as f:
    yaml.dump(config, f, default_flow_style=False)

print("✅ Config updated for Colab environment")

In [None]:
# Run optimization!
%cd module4.5
!python ax_optimization.py --trials 20 --output configs/

## 4️⃣ View Results

In [None]:
# Load results
import json
import pandas as pd
import yaml

# Best config
print("="*70)
print("🏆 BEST CONFIGURATION")
print("="*70)
with open('configs/best_config.yaml', 'r') as f:
    best_config = yaml.safe_load(f)
print(yaml.dump(best_config, default_flow_style=False))

# Full results
print("\n" + "="*70)
print("📊 ALL TRIALS")
print("="*70)
with open('configs/optimization_results.json', 'r') as f:
    results = json.load(f)

print(f"Best Loss: {results['best_loss']:.4f}")
print(f"Baseline Loss: {results['baseline_loss']:.4f}")
print(f"Improvement: {results['improvement_percent']:.1f}%")
print()

# Trial history as DataFrame
df = pd.DataFrame(results['all_trials'])
print(df[['trial', 'eval_loss', 'parameters']].to_string())

In [None]:
# Plot optimization trace
import matplotlib.pyplot as plt

df = pd.read_csv('configs/trial_history.csv')

plt.figure(figsize=(12, 5))

# Loss over trials
plt.subplot(1, 2, 1)
plt.plot(df['trial'], df['eval_loss'], 'o-', alpha=0.7)
plt.axhline(y=0.6097, color='r', linestyle='--', label='Baseline (0.6097)')
plt.xlabel('Trial')
plt.ylabel('Validation Loss')
plt.title('Optimization Progress')
plt.legend()
plt.grid(True, alpha=0.3)

# Best loss so far
plt.subplot(1, 2, 2)
best_so_far = df['eval_loss'].cummin()
plt.plot(df['trial'], best_so_far, 'g-', linewidth=2)
plt.axhline(y=0.6097, color='r', linestyle='--', label='Baseline')
plt.xlabel('Trial')
plt.ylabel('Best Loss So Far')
plt.title('Best Loss Evolution')
plt.legend()
plt.grid(True, alpha=0.3)

plt.tight_layout()
plt.savefig('configs/optimization_progress.png', dpi=150, bbox_inches='tight')
plt.show()

print("📊 Plot saved to: configs/optimization_progress.png")

## 5️⃣ Download Results

Download these files back to your local repository:
- `configs/best_config.yaml`
- `configs/optimization_results.json`
- `configs/trial_history.csv`
- `configs/optimization_progress.png`

In [None]:
# Zip results for download
!zip -r optimization_results.zip configs/

print("✅ Results zipped!")
print("📥 Download: optimization_results.zip")
print()
print("To integrate back to your repository:")
print("1. Download optimization_results.zip")
print("2. Extract to module4.5/configs/")
print("3. Commit changes")

---

## 🎉 Next Steps

After downloading results:

1. **Document Results**
   - Create `OPTIMIZATION_RESULTS.md`
   - Analyze what worked and why
   - Add recommendations

2. **Retrain Model** (Optional)
   - Use best config for full 3-epoch training
   - Compare with baseline (0.6097)
   - Update `TRAINING_SUMMARY.md`

3. **Continue to Module 5**
   - Production RAG system
   - Vector database integration
   - Semantic search