In [1]:
import sys
from pathlib import Path
from importlib import reload

project_root = Path.cwd().parent.parent
sys.path.insert(0, str(project_root))

import pandas as pd
import src.model
import src.trainer

reload(src.model)
reload(src.trainer)

from src.model import Decoder
from src.trainer import TimeSeriesDataset, TanaForecastTrainer

train_df = pd.read_csv(project_root / 'src' / 'datasets' / 'delhi' / 'DailyDelhiClimateTrain.csv')
test_df = pd.read_csv(project_root / 'src' / 'datasets' / 'delhi' / 'DailyDelhiClimateTest.csv')

train_df.head()

Unnamed: 0,date,meantemp,humidity,wind_speed,meanpressure
0,2013-01-01,10.0,84.5,0.0,1015.666667
1,2013-01-02,7.4,92.0,2.98,1017.8
2,2013-01-03,7.166667,87.0,4.633333,1018.666667
3,2013-01-04,8.666667,71.333333,1.233333,1017.166667
4,2013-01-05,6.0,86.833333,3.7,1016.5


In [2]:
import torch

feature_cols = ['meantemp', 'humidity', 'wind_speed', 'meanpressure']
target_cols = ['meantemp']

train_dataset = TimeSeriesDataset(
    df=train_df,
    context_window=90,
    prediction_length=7,
    feature_columns=feature_cols,
    target_columns=target_cols,
    stride=1,
    normalize=True
)

val_dataset = TimeSeriesDataset(
    df=test_df,
    context_window=90,
    prediction_length=7,
    feature_columns=feature_cols,
    target_columns=target_cols,
    stride=1,
    normalize=True
)

print(f"Train dataset size: {len(train_dataset)}")
print(f"Val dataset size: {len(val_dataset)}")

context, target = train_dataset[0]
print(f"Context shape: {context.shape}")
print(f"Target shape: {target.shape}")


Train dataset size: 1366
Val dataset size: 18
Context shape: torch.Size([4, 90])
Target shape: torch.Size([1, 7])


In [3]:
model = Decoder(
    context_window=90,
    prediction_length=7,
    d_model=len(feature_cols)
)

trainer = TanaForecastTrainer(
    model=model,
    train_dataset=train_dataset,
    val_dataset=val_dataset,
    batch_size=64,
    learning_rate=1e-3,
    num_epochs=50,
    checkpoint_dir=str(project_root / 'checkpoints' / 'delhi'),
    early_stopping_patience=15
)

history = trainer.train()


Training on cpu
Total epochs: 50
Batch size: 64
Train batches: 22
Val batches: 1
------------------------------------------------------------
Epoch 1/50 | Train Loss: 0.958672 | Val Loss: 1.801806 | LR: 9.99e-04 | Time: 0.12s
  → New best model saved (Val Loss: 1.801806)


  return F.mse_loss(input, target, reduction=self.reduction)
  return F.mse_loss(input, target, reduction=self.reduction)
  return F.mse_loss(input, target, reduction=self.reduction)


Epoch 2/50 | Train Loss: 0.946497 | Val Loss: 2.276402 | LR: 9.96e-04 | Time: 0.11s
Epoch 3/50 | Train Loss: 0.959686 | Val Loss: 2.028968 | LR: 9.91e-04 | Time: 0.11s
Epoch 4/50 | Train Loss: 0.947351 | Val Loss: 1.606601 | LR: 9.84e-04 | Time: 0.11s
  → New best model saved (Val Loss: 1.606601)
Epoch 5/50 | Train Loss: 0.949965 | Val Loss: 1.598361 | LR: 9.76e-04 | Time: 0.11s
  → New best model saved (Val Loss: 1.598361)
Epoch 6/50 | Train Loss: 0.943691 | Val Loss: 1.855033 | LR: 9.65e-04 | Time: 0.11s
Epoch 7/50 | Train Loss: 0.961923 | Val Loss: 1.968703 | LR: 9.53e-04 | Time: 0.10s
Epoch 8/50 | Train Loss: 0.947226 | Val Loss: 2.055658 | LR: 9.39e-04 | Time: 0.10s
Epoch 9/50 | Train Loss: 0.945059 | Val Loss: 1.857809 | LR: 9.23e-04 | Time: 0.10s
Epoch 10/50 | Train Loss: 0.951721 | Val Loss: 1.696071 | LR: 9.05e-04 | Time: 0.10s
Epoch 11/50 | Train Loss: 0.951917 | Val Loss: 1.714333 | LR: 8.86e-04 | Time: 0.10s
Epoch 12/50 | Train Loss: 0.953149 | Val Loss: 1.706296 | LR: 8.66

In [4]:
import plotly.graph_objects as go
from plotly.subplots import make_subplots

fig = make_subplots(
    rows=1, cols=2,
    subplot_titles=('Training History', 'Learning Rate Schedule')
)

fig.add_trace(
    go.Scatter(y=history['train_loss'], name='Train Loss', mode='lines'),
    row=1, col=1
)
fig.add_trace(
    go.Scatter(y=history['val_loss'], name='Val Loss', mode='lines'),
    row=1, col=1
)

fig.add_trace(
    go.Scatter(y=history['learning_rates'], name='Learning Rate', mode='lines', showlegend=False),
    row=1, col=2
)

fig.update_xaxes(title_text="Epoch", row=1, col=1)
fig.update_yaxes(title_text="Loss (MSE)", row=1, col=1)
fig.update_xaxes(title_text="Epoch", row=1, col=2)
fig.update_yaxes(title_text="Learning Rate", row=1, col=2)

fig.update_layout(height=500, width=1200, showlegend=True)
fig.show()

print(f"\nBest Validation Loss: {min(history['val_loss']):.6f}")
print(f"Final Train Loss: {history['train_loss'][-1]:.6f}")



Best Validation Loss: 1.598361
Final Train Loss: 0.937129


In [5]:
import plotly.graph_objects as go

context, target = val_dataset[10]

prediction = trainer.predict(context.unsqueeze(0))

context_denorm = val_dataset.denormalize(context, is_target=False)
target_denorm = val_dataset.denormalize(target, is_target=True)
prediction_denorm = val_dataset.denormalize(prediction.squeeze(0), is_target=True)

fig = go.Figure()

fig.add_trace(go.Scatter(
    x=list(range(90)),
    y=context_denorm[0].cpu().numpy(),
    mode='lines',
    name='Historical Temperature',
    line=dict(width=2)
))

fig.add_trace(go.Scatter(
    x=list(range(90, 97)),
    y=target_denorm[0].cpu().numpy(),
    mode='lines+markers',
    name='Actual Future',
    line=dict(width=2),
    marker=dict(size=8)
))

fig.add_trace(go.Scatter(
    x=list(range(90, 97)),
    y=prediction_denorm[0].cpu().numpy(),
    mode='lines+markers',
    name='Predicted Future',
    line=dict(width=2, dash='dash'),
    marker=dict(size=8, symbol='square')
))

fig.add_vline(x=90, line_dash="dot", line_color="red", opacity=0.5, annotation_text="Prediction Start")

fig.update_layout(
    title='Delhi Temperature Forecast (7-day ahead)',
    xaxis_title='Time Steps',
    yaxis_title='Temperature (°C)',
    height=500,
    width=1000,
    showlegend=True
)

fig.show()

mse = ((target_denorm[0] - prediction_denorm[0]) ** 2).mean().item()
mae = (target_denorm[0] - prediction_denorm[0]).abs().mean().item()
print(f"Sample MSE: {mse:.4f}")
print(f"Sample MAE: {mae:.4f}°C")


Sample MSE: 76.3343
Sample MAE: 8.6646°C
