In [None]:
# ==================== SETUP PATH & IMPORTS ====================
import sys
import os
import json
import random
import numpy as np
import pandas as pd
import torch
from pathlib import Path

# Set random seeds
SEED = 42
random.seed(SEED)
np.random.seed(SEED)
torch.manual_seed(SEED)

# Add project root to path (notebook is in notebooks/)
PROJECT_ROOT = Path.cwd().parent
if str(PROJECT_ROOT) not in sys.path:
    sys.path.insert(0, str(PROJECT_ROOT))

print(f"‚úì Project Root: {PROJECT_ROOT}")
print(f"‚úì Random seed: {SEED}")

In [None]:
# ==================== IMPORT PROJECT MODULES ====================
from src.agents import DDPGAgent
from src.environments import HVACEnvironment
from src.visualization import plot_training_progress
from configs.config_manager import get_train_config

print("‚úì All project modules imported successfully!")

## 1. Configuration

Ch·ªçn ch·∫ø ƒë·ªô training v√† c√°c hyperparameters.

In [None]:
# ==================== CONFIGURATION ====================
# Ch·ªçn mode: True = v·ªõi forecast (state_dim=15), False = kh√¥ng forecast (state_dim=14)
USE_FORECAST = False

# Get config t·ª´ config_manager
config = get_train_config(use_forecast=USE_FORECAST)

# Override settings n·∫øu c·∫ßn
config.NUM_EPISODES = 50
config.SAVE_FREQ = 2
config.BATCH_SIZE = 512

# Override paths n·∫øu c·∫ßn
config.FMU_PATH = str(PROJECT_ROOT / "HVAC.fmu")
config.WEATHER_CSV = str(PROJECT_ROOT / "data" / "weather_data.csv")  # Thay ƒë·ªïi path

# Create directories
config.create_directories()

# Display config
config.display()
print(f"\nüìÅ Paths:")
print(f"   FMU:         {config.FMU_PATH}")
print(f"   Weather:     {config.WEATHER_CSV}")
print(f"   Checkpoints: {config.CHECKPOINT_PATH}")
print(f"   Results:     {config.RESULTS_PATH}")

## 2. Initialize Environment & Agent

In [None]:
# ==================== INITIALIZE ====================
print("="*70)
print("üîß INITIALIZING ENVIRONMENT & AGENT")
print("="*70 + "\n")

# Initialize Environment t·ª´ src.environments
env = HVACEnvironment(config, verbose=False)

# Initialize DDPG Agent t·ª´ src.agents
agent = DDPGAgent(config)

print(f"\n‚úÖ Environment: {type(env).__name__}")
print(f"‚úÖ Agent: {type(agent).__name__}")
print(f"   State dim:  {config.STATE_DIM}")
print(f"   Action dim: {config.ACTION_DIM}")
print(f"   Device:     {config.DEVICE}")

## 3. Training Loop

In [None]:
# ==================== TRAINING LOOP ====================
print("\n" + "="*70)
print("üöÄ STARTING DDPG TRAINING")
print("="*70 + "\n")

# Tracking
episode_rewards = []
episode_stats = []
best_reward = -np.inf

VERBOSE_STEP = False  # Set True ƒë·ªÉ xem chi ti·∫øt t·ª´ng step

for episode in range(1, config.NUM_EPISODES + 1):
    print(f"\n{'='*70}")
    print(f"üìä Episode {episode}/{config.NUM_EPISODES}")
    
    # Show learning rates
    lrs = agent.get_current_lr()
    print(f"   LR: Actor={lrs['actor_lr']:.2e}  Critic={lrs['critic_lr']:.2e}")
    print(f"   Exploration: Œµ={agent.epsilon:.3f}  œÉ={agent.noise.get_sigma():.3f}")
    print(f"{'='*70}")
    
    # Reset environment
    state = env.reset(episode=episode)
    agent.noise.reset()
    
    episode_reward = 0.0
    step_count = 0
    force_explore = episode <= 3  # Force exploration early
    done = False
    
    while not done:
        # Select action
        action = agent.select_action(state, add_noise=True, force_explore=force_explore)
        
        # Execute step
        next_state, reward, done, info = env.step(action)
        
        # Store transition
        agent.store_transition(state, action, reward, next_state, done)
        
        # Train agent
        actor_loss, critic_loss = agent.train()
        
        episode_reward += reward
        step_count += 1
        state = next_state
        
        # Verbose output
        if VERBOSE_STEP and step_count % 100 == 0 and info:
            print(f"  Step {step_count:04d} | T={info['T_zone']:.2f}¬∞C | "
                  f"RH={info['RH_zone']:.3f} | P={info['P_total']/1000:.2f}kW | r={reward:.3f}")
    
    # Update learning rate
    agent.update_learning_rate(episode_reward)
    
    # Get episode statistics
    stats = env.get_episode_stats()
    episode_rewards.append(episode_reward)
    episode_stats.append(stats)
    
    # Print episode summary
    print(f"\nüìà Episode {episode} Summary:")
    print(f"   Total Reward:     {episode_reward:.2f}")
    print(f"   Steps:            {step_count}")
    print(f"   Avg Temperature:  {stats['avg_T']:.2f}¬∞C ¬± {stats['std_T']:.2f}¬∞C")
    print(f"   Avg Humidity:     {stats['avg_RH']:.3f} ¬± {stats['std_RH']:.3f}")
    print(f"   T Comfort:        {stats['T_comfort_ratio']*100:.1f}%")
    print(f"   RH Comfort:       {stats['RH_comfort_ratio']*100:.1f}%")
    print(f"   Action Diversity: {stats['action_diversity']:.4f}")
    print(f"   Buffer Size:      {agent.replay_buffer.size()}")
    
    # Save best model
    if episode_reward > best_reward:
        best_reward = episode_reward
        best_path = config.CHECKPOINT_PATH / "best_model.pth"
        agent.save(best_path)
        print(f"   ‚≠ê NEW BEST MODEL! Saved to {best_path}")
    
    # Periodic save
    if episode % config.SAVE_FREQ == 0:
        checkpoint_path = config.CHECKPOINT_PATH / f"model_ep{episode}.pth"
        agent.save(checkpoint_path)
        print(f"   üíæ Checkpoint saved: {checkpoint_path}")

print("\n‚úÖ Training loop completed!")

## 4. Save Final Model & Results

In [None]:
# ==================== SAVE FINAL MODEL ====================
# Save final model
final_path = config.CHECKPOINT_PATH / "final_model.pth"
agent.save(final_path)
print(f"‚úì Final model saved: {final_path}")

# Save training history
history = {
    'episode_rewards': [float(r) for r in episode_rewards],
    'episode_stats': episode_stats,
    'config': {
        'use_forecast': config.USE_FORECAST,
        'state_dim': config.STATE_DIM,
        'action_dim': config.ACTION_DIM,
        'num_episodes': config.NUM_EPISODES,
        'lr_actor': config.LR_ACTOR,
        'lr_critic': config.LR_CRITIC,
        'gamma': config.GAMMA,
        'tau': config.TAU
    }
}

history_path = config.RESULTS_PATH / "training_history.json"
with open(history_path, 'w') as f:
    json.dump(history, f, indent=2)
print(f"‚úì Training history saved: {history_path}")

# Save stats to CSV
stats_df = pd.DataFrame(episode_stats)
stats_df['episode'] = range(1, len(episode_stats) + 1)
stats_df['total_reward'] = episode_rewards
stats_csv = config.RESULTS_PATH / "training_stats.csv"
stats_df.to_csv(stats_csv, index=False)
print(f"‚úì Training stats saved: {stats_csv}")

## 5. Training Summary

In [None]:
# ==================== TRAINING SUMMARY ====================
print("\n" + "="*70)
print("üìã TRAINING SUMMARY")
print("="*70)
print(f"  Mode:              {'WITH Forecast' if config.USE_FORECAST else 'NO Forecast'}")
print(f"  Total Episodes:    {len(episode_rewards)}")
print(f"  Best Reward:       {best_reward:.2f}")
print(f"  Final Reward:      {episode_rewards[-1]:.2f}")
print(f"  Avg Last 5 Ep:     {np.mean(episode_rewards[-5:]):.2f}")
print(f"\n  Final Stats:")
print(f"    Avg Temperature: {episode_stats[-1]['avg_T']:.2f}¬∞C")
print(f"    T Comfort:       {episode_stats[-1]['T_comfort_ratio']*100:.1f}%")
print(f"    Avg Humidity:    {episode_stats[-1]['avg_RH']:.3f}")
print(f"    RH Comfort:      {episode_stats[-1]['RH_comfort_ratio']*100:.1f}%")
print(f"\nüìÅ Checkpoints: {config.CHECKPOINT_PATH}")
print(f"üìÅ Results:     {config.RESULTS_PATH}")
print("="*70)

## 6. Plot Training Progress

S·ª≠ d·ª•ng `plot_training_progress` t·ª´ `src.visualization`.

In [None]:
# ==================== PLOT TRAINING PROGRESS ====================
# S·ª≠ d·ª•ng function t·ª´ src.visualization
plot_training_progress(episode_rewards, episode_stats, config.RESULTS_PATH)

print(f"\n‚úÖ Training curves saved to: {config.RESULTS_PATH}")

In [None]:
# ==================== DISPLAY TRAINING PLOT ====================
from IPython.display import Image, display

plot_path = config.RESULTS_PATH / "training_progress.png"
if plot_path.exists():
    print("üìä Training Progress:")
    display(Image(filename=str(plot_path), width=900))

## 7. Quick Test Best Model (Optional)

Test nhanh best model ƒë·ªÉ ki·ªÉm tra action diversity.

In [None]:
# ==================== QUICK TEST ====================
print("\n" + "="*70)
print("üß™ QUICK TEST - Best Model (100 steps)")
print("="*70 + "\n")

# Load best model
test_agent = DDPGAgent(config)
test_agent.load(config.CHECKPOINT_PATH / "best_model.pth")
print(f"‚úì Loaded best model")

# Quick test
test_state = env.reset()
test_actions = []

for i in range(100):
    action = test_agent.select_action(test_state, add_noise=False)  # No noise
    test_actions.append(action.copy())
    next_state, _, done, _ = env.step(action)
    test_state = next_state
    if done:
        break

# Check action diversity
test_actions = np.array(test_actions)
action_diversity = np.mean(np.std(test_actions, axis=0))

print(f"\nüîç Action Analysis (100 steps, no noise):")
print(f"   Diversity (std):  {action_diversity:.4f}")
print(f"   uFan:    mean={np.mean(test_actions[:, 0]):.3f}, std={np.std(test_actions[:, 0]):.3f}")
print(f"   uOA:     mean={np.mean(test_actions[:, 1]):.3f}, std={np.std(test_actions[:, 1]):.3f}")
print(f"   uChiller: mean={np.mean(test_actions[:, 2]):.3f}, std={np.std(test_actions[:, 2]):.3f}")
print(f"   uHeater: mean={np.mean(test_actions[:, 3]):.3f}, std={np.std(test_actions[:, 3]):.3f}")
print(f"   uFanEA:  mean={np.mean(test_actions[:, 4]):.3f}, std={np.std(test_actions[:, 4]):.3f}")

if action_diversity < 0.01:
    print("\n‚ö†Ô∏è  WARNING: Very low diversity - model might be STUCK!")
else:
    print(f"\n‚úÖ Action diversity is healthy!")

## 8. Final Summary

In [None]:
# ==================== FINAL SUMMARY ====================
print("\n" + "="*70)
print("‚úÖ TRAINING COMPLETED!")
print("="*70)
print(f"\nüìÅ Output Files:")
print(f"   Best Model:    {config.CHECKPOINT_PATH / 'best_model.pth'}")
print(f"   Final Model:   {config.CHECKPOINT_PATH / 'final_model.pth'}")
print(f"   History JSON:  {config.RESULTS_PATH / 'training_history.json'}")
print(f"   Stats CSV:     {config.RESULTS_PATH / 'training_stats.csv'}")
print(f"   Plot:          {config.RESULTS_PATH / 'training_progress.png'}")
print("\n" + "="*70)