In [19]:
import torch
import torch.nn as nn
import numpy as np
import pandas as pd
from sklearn.preprocessing import MinMaxScaler
from sklearn.utils import resample # We need this for bootstrapping
from torch.utils.data import TensorDataset, DataLoader 
import sys
import os
import pickle
sys.path.append('../')

In [20]:
from simulation.data_loader import get_weather_data

In [21]:
# --- Configuration ---
NUM_MODELS_IN_ENSEMBLE = 5
EPOCHS_PER_MODEL = 200 # Training for 100 epochs is a good balance
LOOK_BACK = 30
LOOK_FORWARD = 7
BATCH_SIZE = 64

In [22]:
models_dir = "../models"
os.makedirs(models_dir, exist_ok=True)

In [23]:

#### Data Preparation ---
print("--- Step 1: Fetching and Preparing Weather Data ---")
full_weather_data = get_weather_data(
    latitude=30.9010, longitude=75.8573,
    start_date="2010-01-01", end_date="2022-12-31"
)

if full_weather_data is None:
    raise Exception("Failed to fetch weather data.")

weather_values = full_weather_data[['temperature', 'rainfall']].values.astype(np.float32)

# We fit the scaler once on all the data and save it.
# All models in the ensemble will use the same scaler.
scaler = MinMaxScaler(feature_range=(0, 1))
scaled_weather_data = scaler.fit_transform(weather_values)
scaler_path = os.path.join(models_dir, "weather_data_scaler.pkl")
with open(scaler_path, 'wb') as f:
    pickle.dump(scaler, f)
print(f"Data scaler has been fit and saved to: {scaler_path}")

--- Step 1: Fetching and Preparing Weather Data ---
Fetching real weather data for Lat: 30.901, Long: 75.8573 from Open-Meteo...
Sucessfully fetched and processed real weather data from Open-Meteo.
Data scaler has been fit and saved to: ../models/weather_data_scaler.pkl


In [24]:
#### Create the Full Set of Sequences ---
def create_sequences(data, look_back=30, look_forward=7):
    X, y = [], []
    for i in range(len(data) - look_back - look_forward + 1):
        X.append(data[i:(i + look_back)])
        y.append(data[(i + look_back):(i + look_back + look_forward)])
    return np.array(X), np.array(y)

X_full, y_full = create_sequences(scaled_weather_data, LOOK_BACK, LOOK_FORWARD)


In [25]:
#### Define the LSTM Model Architecture (Same as before) ---
class WeatherLSTM(nn.Module):
    def __init__(self, input_size=2, hidden_layer_size=100, output_size=2, num_layers=2, look_forward=7):
        super().__init__()
        self.hidden_layer_size = hidden_layer_size
        self.num_layers = num_layers
        self.look_forward = look_forward
        self.lstm = nn.LSTM(input_size, hidden_layer_size, num_layers, batch_first=True)
        self.linear = nn.Linear(hidden_layer_size, output_size * look_forward)

    def forward(self, input_seq):
        h0 = torch.zeros(self.num_layers, input_seq.size(0), self.hidden_layer_size).to(input_seq.device)
        c0 = torch.zeros(self.num_layers, input_seq.size(0), self.hidden_layer_size).to(input_seq.device)
        lstm_out, _ = self.lstm(input_seq, (h0, c0))
        last_timestep_out = lstm_out[:, -1, :]
        predictions = self.linear(last_timestep_out)
        return predictions.view(-1, self.look_forward, 2)

In [None]:
#### The Ensemble Training Loop ---
print(f"\n--- Step 2: Starting Ensemble Training for {NUM_MODELS_IN_ENSEMBLE} Models ---")

device = torch.device('cuda' if torch.cuda.is_available() else 'cpu')
print(f"Training on device: {device}")

for model_idx in range(1, NUM_MODELS_IN_ENSEMBLE + 1):
    print(f"\n--- Training Model {model_idx}/{NUM_MODELS_IN_ENSEMBLE} ---")
    
    # --- Bootstrapping: Create a slightly different dataset for each model ---
    # We resample the full dataset with replacement. This means some data points
    # will be selected multiple times, and some will be left out for this model.
    # This variation is what makes the ensemble powerful.
    X_sample, y_sample = resample(X_full, y_full, replace=True, n_samples=len(X_full))
    
    X_tensor = torch.from_numpy(X_sample)
    y_tensor = torch.from_numpy(y_sample)
    
    train_dataset = TensorDataset(X_tensor, y_tensor)
    train_loader = DataLoader(train_dataset, batch_size=BATCH_SIZE, shuffle=True)
    
    # --- Initialize a new model for this training run ---
    model = WeatherLSTM(look_forward=LOOK_FORWARD).to(device)
    loss_function = nn.MSELoss()
    optimizer = torch.optim.Adam(model.parameters(), lr=0.001)
    
    # --- Train this specific model ---
    for epoch in range(EPOCHS_PER_MODEL):
        epoch_loss = 0.0
        for sequences, labels in train_loader:
            sequences = sequences.to(device)
            labels = labels.to(device)
            
            optimizer.zero_grad()
            y_pred = model(sequences)
            loss = loss_function(y_pred, labels)
            loss.backward()
            optimizer.step()
            epoch_loss += loss.item()
        
        avg_epoch_loss = epoch_loss / len(train_loader)
        print(f'  Epoch {epoch+1}/{EPOCHS_PER_MODEL} Loss: {avg_epoch_loss:.6f}')
            
    # --- Save this specific model ---
    model_path = os.path.join(models_dir, f"lstm_weather_forecaster_{model_idx}.pth")
    model.to('cpu')
    torch.save(model.state_dict(), model_path)
    print(f"Successfully trained and saved model {model_idx} to: {model_path}")

print("\n--- Ensemble Training Complete! ---")