In [15]:
# Import libraries
import numpy as np
import matplotlib.pyplot as plt
import torch
import torch.nn as nn
import warnings
import os

warnings.filterwarnings('ignore')

# Add src to path
import sys
sys.path.append('..')

from src.config import (
    SEQUENCES_DIR, BEST_MODEL_PATH, CHECKPOINTS_DIR, TRAINING_FIGURES_DIR,
    LOGS_DIR, INPUT_SEQ_LEN, OUTPUT_SEQ_LEN,
    ENCODER_HIDDEN_SIZE, ENCODER_NUM_LAYERS, ENCODER_DROPOUT, ENCODER_BIDIRECTIONAL,
    DECODER_HIDDEN_SIZE, DECODER_NUM_LAYERS, DECODER_DROPOUT,
    BATCH_SIZE, LEARNING_RATE, WEIGHT_DECAY, NUM_EPOCHS,
    EARLY_STOPPING_PATIENCE, GRADIENT_CLIP, TEACHER_FORCING_RATIO,
    LR_SCHEDULER, DEVICE, RANDOM_SEED, set_seed
)
from src.dataset import create_dataloaders
from src.model import build_model
from src.train import train
from src.utils import print_gpu_info, save_figure, load_json

# Set random seed
set_seed(RANDOM_SEED)

# Create logs directory
os.makedirs(LOGS_DIR, exist_ok=True)

print("Libraries imported successfully!")
print_gpu_info()

Libraries imported successfully!
GPU: NVIDIA GeForce RTX 3060
CUDA Version: 11.8
Memory Allocated: 24.25 MB
Memory Cached: 172.00 MB


## 6.1 Load Sequences

In [16]:
# Load sequences
X_train = np.load(os.path.join(SEQUENCES_DIR, 'X_train.npy'))
y_train = np.load(os.path.join(SEQUENCES_DIR, 'y_train.npy'))
X_val = np.load(os.path.join(SEQUENCES_DIR, 'X_val.npy'))
y_val = np.load(os.path.join(SEQUENCES_DIR, 'y_val.npy'))
X_test = np.load(os.path.join(SEQUENCES_DIR, 'X_test.npy'))
y_test = np.load(os.path.join(SEQUENCES_DIR, 'y_test.npy'))

# Load metadata
metadata = load_json(os.path.join(SEQUENCES_DIR, 'metadata.json'))

print("Data loaded:")
print(f"X_train: {X_train.shape}")
print(f"y_train: {y_train.shape}")
print(f"X_val: {X_val.shape}")
print(f"y_val: {y_val.shape}")
print(f"X_test: {X_test.shape}")
print(f"y_test: {y_test.shape}")

Data loaded:
X_train: (19394, 24, 22)
y_train: (19394, 5)
X_val: (4156, 24, 22)
y_val: (4156, 5)
X_test: (4157, 24, 22)
y_test: (4157, 5)


In [17]:
# Get dimensions
n_features = X_train.shape[2]
print(f"\nNumber of input features: {n_features}")
print(f"Input sequence length: {INPUT_SEQ_LEN}")
print(f"Output sequence length: {OUTPUT_SEQ_LEN}")


Number of input features: 22
Input sequence length: 24
Output sequence length: 5


## 6.2 Create DataLoaders

In [18]:
# Create DataLoaders
train_loader, val_loader, test_loader = create_dataloaders(
    X_train, y_train,
    X_val, y_val,
    X_test, y_test,
    batch_size=BATCH_SIZE,
    num_workers=0
)

print(f"\nTrain batches: {len(train_loader)}")
print(f"Val batches: {len(val_loader)}")
print(f"Test batches: {len(test_loader)}")

Created DataLoaders with batch_size=64

Train batches: 304
Val batches: 65
Test batches: 65


## 6.3 Build Model

In [19]:
# Build Encoder-Decoder model
model = build_model(
    input_size=n_features,
    hidden_size=ENCODER_HIDDEN_SIZE,
    num_layers=ENCODER_NUM_LAYERS,
    dropout=ENCODER_DROPOUT,
    bidirectional=ENCODER_BIDIRECTIONAL,
    output_seq_len=OUTPUT_SEQ_LEN,
    device=DEVICE
)

Model built on cuda
Total parameters: 816,001
Trainable parameters: 816,001


In [20]:
# Model architecture summary
print("\nModel Architecture:")
print("=" * 60)
print(model)


Model Architecture:
Seq2Seq(
  (encoder): Encoder(
    (lstm): LSTM(22, 128, num_layers=2, batch_first=True, dropout=0.2, bidirectional=True)
    (fc_hidden): Linear(in_features=256, out_features=128, bias=True)
    (fc_cell): Linear(in_features=256, out_features=128, bias=True)
  )
  (decoder): Decoder(
    (lstm): LSTM(1, 128, num_layers=2, batch_first=True, dropout=0.2)
    (fc): Linear(in_features=128, out_features=1, bias=True)
  )
)


## 6.4 Training Configuration

In [21]:
# Print training configuration
print("Training Configuration:")
print("=" * 60)
print(f"Device: {DEVICE}")
print(f"Batch size: {BATCH_SIZE}")
print(f"Learning rate: {LEARNING_RATE}")
print(f"Weight decay: {WEIGHT_DECAY}")
print(f"Epochs: {NUM_EPOCHS}")
print(f"Early stopping patience: {EARLY_STOPPING_PATIENCE}")
print(f"Gradient clipping: {GRADIENT_CLIP}")
print(f"Teacher forcing ratio: {TEACHER_FORCING_RATIO}")
print(f"LR scheduler: {LR_SCHEDULER}")
print("=" * 60)

Training Configuration:
Device: cuda
Batch size: 64
Learning rate: 0.001
Weight decay: 1e-05
Epochs: 100
Early stopping patience: 15
Gradient clipping: 1.0
Teacher forcing ratio: 0.5
LR scheduler: ReduceLROnPlateau


## 6.4a Load Optuna Best Params (Optional)

Run this cell if you have already run Optuna optimization and want to use the best parameters.

In [22]:
# Load best parameters from Optuna (if available)
best_params_path = os.path.join(LOGS_DIR, 'best_params.json')

USE_OPTUNA_PARAMS = True  # Set to True to use Optuna parameters

if USE_OPTUNA_PARAMS and os.path.exists(best_params_path):
    optuna_params = load_json(best_params_path)
    print("Loaded Optuna best parameters:")
    for k, v in optuna_params.items():
        print(f"  {k}: {v}")
    
    # Override config values
    ENCODER_HIDDEN_SIZE = optuna_params.get('hidden_size', ENCODER_HIDDEN_SIZE)
    ENCODER_NUM_LAYERS = optuna_params.get('num_layers', ENCODER_NUM_LAYERS)
    ENCODER_DROPOUT = optuna_params.get('dropout', ENCODER_DROPOUT)
    LEARNING_RATE = optuna_params.get('learning_rate', LEARNING_RATE)
    BATCH_SIZE = optuna_params.get('batch_size', BATCH_SIZE)
    WEIGHT_DECAY = optuna_params.get('weight_decay', WEIGHT_DECAY)
    TEACHER_FORCING_RATIO = optuna_params.get('teacher_forcing_ratio', TEACHER_FORCING_RATIO)
    
    print("\nConfig values updated!")
else:
    print("Using default config parameters")

Loaded Optuna best parameters:
  hidden_size: 64
  num_layers: 2
  dropout: 0.24822872742761123
  learning_rate: 0.0015517954134162649
  batch_size: 64
  weight_decay: 0.00037944994831423065
  teacher_forcing_ratio: 0.648343601345059

Config values updated!


## 6.5 Train Model

In [23]:
# Train model with logging
history = train(
    model=model,
    train_loader=train_loader,
    val_loader=val_loader,
    num_epochs=NUM_EPOCHS,
    learning_rate=LEARNING_RATE,
    weight_decay=WEIGHT_DECAY,
    teacher_forcing_ratio=TEACHER_FORCING_RATIO,
    gradient_clip=GRADIENT_CLIP,
    early_stopping_patience=EARLY_STOPPING_PATIENCE,
    lr_scheduler_type=LR_SCHEDULER,
    checkpoint_dir=CHECKPOINTS_DIR,
    best_model_path=BEST_MODEL_PATH,
    device=DEVICE,
    log_dir=LOGS_DIR  # Enable logging
)

[32m2025-12-05 04:03:08[0m | [1mINFO    [0m | [36msrc.logger[0m:[36msetup_logger[0m:[36m67[0m | [1mLog file: d:\DeepLearning_final\logs\training.log[0m
[32m2025-12-05 04:03:08[0m | [1mINFO    [0m | [36msrc.train[0m:[36mtrain[0m:[36m134[0m | [1mTraining on cuda[0m
[32m2025-12-05 04:03:08[0m | [1mINFO    [0m | [36msrc.logger[0m:[36mlog_hyperparams[0m:[36m141[0m | [1mHyperparameters:[0m
[32m2025-12-05 04:03:08[0m | [1mINFO    [0m | [36msrc.logger[0m:[36mlog_hyperparams[0m:[36m143[0m | [1m  num_epochs: 100[0m
[32m2025-12-05 04:03:08[0m | [1mINFO    [0m | [36msrc.logger[0m:[36mlog_hyperparams[0m:[36m143[0m | [1m  learning_rate: 0.0015517954134162649[0m
[32m2025-12-05 04:03:08[0m | [1mINFO    [0m | [36msrc.logger[0m:[36mlog_hyperparams[0m:[36m143[0m | [1m  weight_decay: 0.00037944994831423065[0m
[32m2025-12-05 04:03:08[0m | [1mINFO    [0m | [36msrc.logger[0m:[36mlog_hyperparams[0m:[36m143[0m | [1m  teacher_for

                                                            

[32m2025-12-05 04:03:11[0m | [32m[1mSUCCESS [0m | [36msrc.logger[0m:[36mlog_best_model[0m:[36m123[0m | [32m[1mNew best model at epoch 1 with val_loss=0.006403[0m
[32m2025-12-05 04:03:11[0m | [1mINFO    [0m | [36msrc.logger[0m:[36mlog_best_model[0m:[36m124[0m | [1mModel saved to: d:\DeepLearning_final\models\best_model.pth[0m
[32m2025-12-05 04:03:11[0m | [1mINFO    [0m | [36msrc.logger[0m:[36mlog_epoch[0m:[36m113[0m | [1mEpoch   1 | Train Loss: 0.014411 | Val Loss: 0.006403 | LR: 1.55e-03 | Time: 2.7s[0m


                                                            

[32m2025-12-05 04:03:14[0m | [1mINFO    [0m | [36msrc.logger[0m:[36mlog_epoch[0m:[36m113[0m | [1mEpoch   2 | Train Loss: 0.003758 | Val Loss: 0.007568 | LR: 1.55e-03 | Time: 2.4s[0m


                                                            

[32m2025-12-05 04:03:16[0m | [32m[1mSUCCESS [0m | [36msrc.logger[0m:[36mlog_best_model[0m:[36m123[0m | [32m[1mNew best model at epoch 3 with val_loss=0.003764[0m
[32m2025-12-05 04:03:16[0m | [1mINFO    [0m | [36msrc.logger[0m:[36mlog_best_model[0m:[36m124[0m | [1mModel saved to: d:\DeepLearning_final\models\best_model.pth[0m
[32m2025-12-05 04:03:16[0m | [1mINFO    [0m | [36msrc.logger[0m:[36mlog_epoch[0m:[36m113[0m | [1mEpoch   3 | Train Loss: 0.003030 | Val Loss: 0.003764 | LR: 1.55e-03 | Time: 2.5s[0m


                                                            

[32m2025-12-05 04:03:19[0m | [1mINFO    [0m | [36msrc.logger[0m:[36mlog_epoch[0m:[36m113[0m | [1mEpoch   4 | Train Loss: 0.002648 | Val Loss: 0.004449 | LR: 1.55e-03 | Time: 2.5s[0m


                                                            

[32m2025-12-05 04:03:21[0m | [1mINFO    [0m | [36msrc.logger[0m:[36mlog_epoch[0m:[36m113[0m | [1mEpoch   5 | Train Loss: 0.002356 | Val Loss: 0.009058 | LR: 1.55e-03 | Time: 2.5s[0m


                                                            

[32m2025-12-05 04:03:24[0m | [1mINFO    [0m | [36msrc.logger[0m:[36mlog_epoch[0m:[36m113[0m | [1mEpoch   6 | Train Loss: 0.002390 | Val Loss: 0.005607 | LR: 1.55e-03 | Time: 2.5s[0m


                                                            

[32m2025-12-05 04:03:26[0m | [1mINFO    [0m | [36msrc.logger[0m:[36mlog_epoch[0m:[36m113[0m | [1mEpoch   7 | Train Loss: 0.002161 | Val Loss: 0.004045 | LR: 1.55e-03 | Time: 2.5s[0m


                                                            

[32m2025-12-05 04:03:29[0m | [32m[1mSUCCESS [0m | [36msrc.logger[0m:[36mlog_best_model[0m:[36m123[0m | [32m[1mNew best model at epoch 8 with val_loss=0.003663[0m
[32m2025-12-05 04:03:29[0m | [1mINFO    [0m | [36msrc.logger[0m:[36mlog_best_model[0m:[36m124[0m | [1mModel saved to: d:\DeepLearning_final\models\best_model.pth[0m
[32m2025-12-05 04:03:29[0m | [1mINFO    [0m | [36msrc.logger[0m:[36mlog_epoch[0m:[36m113[0m | [1mEpoch   8 | Train Loss: 0.001943 | Val Loss: 0.003663 | LR: 1.55e-03 | Time: 2.6s[0m


                                                            

[32m2025-12-05 04:03:32[0m | [32m[1mSUCCESS [0m | [36msrc.logger[0m:[36mlog_best_model[0m:[36m123[0m | [32m[1mNew best model at epoch 9 with val_loss=0.003288[0m
[32m2025-12-05 04:03:32[0m | [1mINFO    [0m | [36msrc.logger[0m:[36mlog_best_model[0m:[36m124[0m | [1mModel saved to: d:\DeepLearning_final\models\best_model.pth[0m
[32m2025-12-05 04:03:32[0m | [1mINFO    [0m | [36msrc.logger[0m:[36mlog_epoch[0m:[36m113[0m | [1mEpoch   9 | Train Loss: 0.002008 | Val Loss: 0.003288 | LR: 1.55e-03 | Time: 2.7s[0m


                                                            

[32m2025-12-05 04:03:34[0m | [32m[1mSUCCESS [0m | [36msrc.logger[0m:[36mlog_best_model[0m:[36m123[0m | [32m[1mNew best model at epoch 10 with val_loss=0.003255[0m
[32m2025-12-05 04:03:34[0m | [1mINFO    [0m | [36msrc.logger[0m:[36mlog_best_model[0m:[36m124[0m | [1mModel saved to: d:\DeepLearning_final\models\best_model.pth[0m
[32m2025-12-05 04:03:34[0m | [1mINFO    [0m | [36msrc.logger[0m:[36mlog_epoch[0m:[36m113[0m | [1mEpoch  10 | Train Loss: 0.001897 | Val Loss: 0.003255 | LR: 1.55e-03 | Time: 2.5s[0m


                                                            

[32m2025-12-05 04:03:36[0m | [1mINFO    [0m | [36msrc.logger[0m:[36mlog_epoch[0m:[36m113[0m | [1mEpoch  11 | Train Loss: 0.001874 | Val Loss: 0.003786 | LR: 1.55e-03 | Time: 2.4s[0m


                                                            

[32m2025-12-05 04:03:39[0m | [32m[1mSUCCESS [0m | [36msrc.logger[0m:[36mlog_best_model[0m:[36m123[0m | [32m[1mNew best model at epoch 12 with val_loss=0.003198[0m
[32m2025-12-05 04:03:39[0m | [1mINFO    [0m | [36msrc.logger[0m:[36mlog_best_model[0m:[36m124[0m | [1mModel saved to: d:\DeepLearning_final\models\best_model.pth[0m
[32m2025-12-05 04:03:39[0m | [1mINFO    [0m | [36msrc.logger[0m:[36mlog_epoch[0m:[36m113[0m | [1mEpoch  12 | Train Loss: 0.001822 | Val Loss: 0.003198 | LR: 1.55e-03 | Time: 2.4s[0m


                                                            

[32m2025-12-05 04:03:41[0m | [1mINFO    [0m | [36msrc.logger[0m:[36mlog_epoch[0m:[36m113[0m | [1mEpoch  13 | Train Loss: 0.001639 | Val Loss: 0.003330 | LR: 1.55e-03 | Time: 2.4s[0m


                                                            

[32m2025-12-05 04:03:44[0m | [32m[1mSUCCESS [0m | [36msrc.logger[0m:[36mlog_best_model[0m:[36m123[0m | [32m[1mNew best model at epoch 14 with val_loss=0.003189[0m
[32m2025-12-05 04:03:44[0m | [1mINFO    [0m | [36msrc.logger[0m:[36mlog_best_model[0m:[36m124[0m | [1mModel saved to: d:\DeepLearning_final\models\best_model.pth[0m
[32m2025-12-05 04:03:44[0m | [1mINFO    [0m | [36msrc.logger[0m:[36mlog_epoch[0m:[36m113[0m | [1mEpoch  14 | Train Loss: 0.001689 | Val Loss: 0.003189 | LR: 1.55e-03 | Time: 2.4s[0m


                                                            

[32m2025-12-05 04:03:46[0m | [1mINFO    [0m | [36msrc.logger[0m:[36mlog_epoch[0m:[36m113[0m | [1mEpoch  15 | Train Loss: 0.001799 | Val Loss: 0.003521 | LR: 1.55e-03 | Time: 2.4s[0m


                                                            

[32m2025-12-05 04:03:48[0m | [32m[1mSUCCESS [0m | [36msrc.logger[0m:[36mlog_best_model[0m:[36m123[0m | [32m[1mNew best model at epoch 16 with val_loss=0.003173[0m
[32m2025-12-05 04:03:48[0m | [1mINFO    [0m | [36msrc.logger[0m:[36mlog_best_model[0m:[36m124[0m | [1mModel saved to: d:\DeepLearning_final\models\best_model.pth[0m
[32m2025-12-05 04:03:48[0m | [1mINFO    [0m | [36msrc.logger[0m:[36mlog_epoch[0m:[36m113[0m | [1mEpoch  16 | Train Loss: 0.001729 | Val Loss: 0.003173 | LR: 1.55e-03 | Time: 2.4s[0m


                                                            

[32m2025-12-05 04:03:51[0m | [1mINFO    [0m | [36msrc.logger[0m:[36mlog_epoch[0m:[36m113[0m | [1mEpoch  17 | Train Loss: 0.001740 | Val Loss: 0.003206 | LR: 1.55e-03 | Time: 2.4s[0m


                                                            

[32m2025-12-05 04:03:53[0m | [1mINFO    [0m | [36msrc.logger[0m:[36mlog_epoch[0m:[36m113[0m | [1mEpoch  18 | Train Loss: 0.001624 | Val Loss: 0.003912 | LR: 1.55e-03 | Time: 2.4s[0m


                                                            

[32m2025-12-05 04:03:56[0m | [1mINFO    [0m | [36msrc.logger[0m:[36mlog_epoch[0m:[36m113[0m | [1mEpoch  19 | Train Loss: 0.001557 | Val Loss: 0.003383 | LR: 1.55e-03 | Time: 2.4s[0m


                                                            

[32m2025-12-05 04:03:58[0m | [32m[1mSUCCESS [0m | [36msrc.logger[0m:[36mlog_best_model[0m:[36m123[0m | [32m[1mNew best model at epoch 20 with val_loss=0.003107[0m
[32m2025-12-05 04:03:58[0m | [1mINFO    [0m | [36msrc.logger[0m:[36mlog_best_model[0m:[36m124[0m | [1mModel saved to: d:\DeepLearning_final\models\best_model.pth[0m
[32m2025-12-05 04:03:58[0m | [1mINFO    [0m | [36msrc.logger[0m:[36mlog_epoch[0m:[36m113[0m | [1mEpoch  20 | Train Loss: 0.001431 | Val Loss: 0.003107 | LR: 1.55e-03 | Time: 2.4s[0m


                                                            

[32m2025-12-05 04:04:00[0m | [32m[1mSUCCESS [0m | [36msrc.logger[0m:[36mlog_best_model[0m:[36m123[0m | [32m[1mNew best model at epoch 21 with val_loss=0.002864[0m
[32m2025-12-05 04:04:00[0m | [1mINFO    [0m | [36msrc.logger[0m:[36mlog_best_model[0m:[36m124[0m | [1mModel saved to: d:\DeepLearning_final\models\best_model.pth[0m
[32m2025-12-05 04:04:00[0m | [1mINFO    [0m | [36msrc.logger[0m:[36mlog_epoch[0m:[36m113[0m | [1mEpoch  21 | Train Loss: 0.001600 | Val Loss: 0.002864 | LR: 1.55e-03 | Time: 2.4s[0m


                                                            

[32m2025-12-05 04:04:03[0m | [1mINFO    [0m | [36msrc.logger[0m:[36mlog_epoch[0m:[36m113[0m | [1mEpoch  22 | Train Loss: 0.001412 | Val Loss: 0.003077 | LR: 1.55e-03 | Time: 2.4s[0m


                                                            

[32m2025-12-05 04:04:05[0m | [1mINFO    [0m | [36msrc.logger[0m:[36mlog_epoch[0m:[36m113[0m | [1mEpoch  23 | Train Loss: 0.001450 | Val Loss: 0.003092 | LR: 1.55e-03 | Time: 2.5s[0m


                                                            

[32m2025-12-05 04:04:08[0m | [1mINFO    [0m | [36msrc.logger[0m:[36mlog_epoch[0m:[36m113[0m | [1mEpoch  24 | Train Loss: 0.001375 | Val Loss: 0.003116 | LR: 1.55e-03 | Time: 2.4s[0m


                                                            

[32m2025-12-05 04:04:10[0m | [1mINFO    [0m | [36msrc.logger[0m:[36mlog_epoch[0m:[36m113[0m | [1mEpoch  25 | Train Loss: 0.001430 | Val Loss: 0.003791 | LR: 1.55e-03 | Time: 2.4s[0m


                                                            

[32m2025-12-05 04:04:12[0m | [1mINFO    [0m | [36msrc.logger[0m:[36mlog_epoch[0m:[36m113[0m | [1mEpoch  26 | Train Loss: 0.001557 | Val Loss: 0.002958 | LR: 1.55e-03 | Time: 2.4s[0m


                                                            

[32m2025-12-05 04:04:15[0m | [1mINFO    [0m | [36msrc.logger[0m:[36mlog_epoch[0m:[36m113[0m | [1mEpoch  27 | Train Loss: 0.001494 | Val Loss: 0.003417 | LR: 1.55e-03 | Time: 2.4s[0m


                                                            

[32m2025-12-05 04:04:17[0m | [1mINFO    [0m | [36msrc.logger[0m:[36mlog_epoch[0m:[36m113[0m | [1mEpoch  28 | Train Loss: 0.001179 | Val Loss: 0.002970 | LR: 7.76e-04 | Time: 2.4s[0m


                                                            

[32m2025-12-05 04:04:20[0m | [1mINFO    [0m | [36msrc.logger[0m:[36mlog_epoch[0m:[36m113[0m | [1mEpoch  29 | Train Loss: 0.001119 | Val Loss: 0.003213 | LR: 7.76e-04 | Time: 2.4s[0m


                                                            

[32m2025-12-05 04:04:22[0m | [1mINFO    [0m | [36msrc.logger[0m:[36mlog_epoch[0m:[36m113[0m | [1mEpoch  30 | Train Loss: 0.001107 | Val Loss: 0.003056 | LR: 7.76e-04 | Time: 2.4s[0m


                                                            

[32m2025-12-05 04:04:24[0m | [1mINFO    [0m | [36msrc.logger[0m:[36mlog_epoch[0m:[36m113[0m | [1mEpoch  31 | Train Loss: 0.001094 | Val Loss: 0.003201 | LR: 7.76e-04 | Time: 2.4s[0m


                                                            

[32m2025-12-05 04:04:27[0m | [1mINFO    [0m | [36msrc.logger[0m:[36mlog_epoch[0m:[36m113[0m | [1mEpoch  32 | Train Loss: 0.001092 | Val Loss: 0.003220 | LR: 7.76e-04 | Time: 2.4s[0m


                                                            

[32m2025-12-05 04:04:30[0m | [1mINFO    [0m | [36msrc.logger[0m:[36mlog_epoch[0m:[36m113[0m | [1mEpoch  33 | Train Loss: 0.001046 | Val Loss: 0.003149 | LR: 7.76e-04 | Time: 3.0s[0m


                                                            

[32m2025-12-05 04:04:32[0m | [1mINFO    [0m | [36msrc.logger[0m:[36mlog_epoch[0m:[36m113[0m | [1mEpoch  34 | Train Loss: 0.000952 | Val Loss: 0.003244 | LR: 3.88e-04 | Time: 2.6s[0m


                                                            

[32m2025-12-05 04:04:35[0m | [1mINFO    [0m | [36msrc.logger[0m:[36mlog_epoch[0m:[36m113[0m | [1mEpoch  35 | Train Loss: 0.000892 | Val Loss: 0.003276 | LR: 3.88e-04 | Time: 2.4s[0m


                                                            

[32m2025-12-05 04:04:37[0m | [1mINFO    [0m | [36msrc.logger[0m:[36mlog_epoch[0m:[36m113[0m | [1mEpoch  36 | Train Loss: 0.000917 | Val Loss: 0.003115 | LR: 3.88e-04 | Time: 2.4s[0m
[32m2025-12-05 04:04:37[0m | [32m[1mSUCCESS [0m | [36msrc.logger[0m:[36mlog_training_complete[0m:[36m132[0m | [32m[1mTraining completed in 1.5 minutes | Best epoch: 21 | Best val_loss: 0.002864[0m
[32m2025-12-05 04:04:37[0m | [1mINFO    [0m | [36msrc.train[0m:[36mtrain[0m:[36m250[0m | [1mLoaded best model from epoch 21[0m




## 6.6 Training Visualization

In [27]:
# Plot learning curves
fig, axes = plt.subplots(1, 2, figsize=(14, 5))

# Loss curves
epochs = range(1, len(history['train_loss']) + 1)
axes[0].plot(epochs, history['train_loss'], 'b-', label='Train Loss')
axes[0].plot(epochs, history['val_loss'], 'r-', label='Validation Loss')
axes[0].axvline(x=history['best_epoch'] + 1, color='green', linestyle='--', label=f'Best Epoch ({history["best_epoch"]+1})')
axes[0].set_xlabel('Epoch')
axes[0].set_ylabel('Loss (MSE)')
axes[0].set_title('Training and Validation Loss')
axes[0].legend()
axes[0].grid(True)

# Learning rate
axes[1].plot(epochs, history['learning_rate'], 'g-')
axes[1].set_xlabel('Epoch')
axes[1].set_ylabel('Learning Rate')
axes[1].set_title('Learning Rate Schedule')
axes[1].set_yscale('log')
axes[1].grid(True)

plt.tight_layout()
os.makedirs(TRAINING_FIGURES_DIR, exist_ok=True)
save_figure(fig, os.path.join(TRAINING_FIGURES_DIR, 'learning_curves.png'))
plt.show()

Saved: d:\DeepLearning_final\results\figures\training\learning_curves.png


In [28]:
# Training summary
print("\n" + "=" * 60)
print("TRAINING SUMMARY")
print("=" * 60)
print(f"Total epochs trained: {len(history['train_loss'])}")
print(f"Best epoch: {history['best_epoch'] + 1}")
print(f"Best validation loss: {history['best_val_loss']:.6f}")
print(f"Final training loss: {history['train_loss'][-1]:.6f}")
print(f"Final validation loss: {history['val_loss'][-1]:.6f}")
print(f"Model saved to: {BEST_MODEL_PATH}")
print("=" * 60)


TRAINING SUMMARY
Total epochs trained: 36
Best epoch: 21
Best validation loss: 0.002864
Final training loss: 0.000917
Final validation loss: 0.003115
Model saved to: d:\DeepLearning_final\models\best_model.pth


## 6.7 Save Training History

In [29]:
# Save training history
from src.utils import save_json

history_to_save = {
    'train_loss': [float(x) for x in history['train_loss']],
    'val_loss': [float(x) for x in history['val_loss']],
    'learning_rate': [float(x) for x in history['learning_rate']],
    'best_epoch': int(history['best_epoch']),
    'best_val_loss': float(history['best_val_loss'])
}

save_json(history_to_save, os.path.join(TRAINING_FIGURES_DIR, 'training_history.json'))
print("Training history saved!")

Saved: d:\DeepLearning_final\results\figures\training\training_history.json
Training history saved!


## Summary

**Model Training completed:**
1. ✅ Loaded sequence data
2. ✅ Created DataLoaders
3. ✅ Built Encoder-Decoder model
4. ✅ Trained with GPU
5. ✅ Early stopping applied
6. ✅ Saved best model
7. ✅ Visualized learning curves

**Next step:** Evaluation (07_Evaluation.ipynb)