In [1]:
import torch
import torch.nn as nn
import numpy as np
import pandas as pd
from sklearn.preprocessing import MinMaxScaler
from sklearn.utils import resample
from sklearn.model_selection import train_test_split
from torch.utils.data import TensorDataset, DataLoader 
import sys
import os
import pickle
sys.path.append('../')

In [2]:

from simulation.data_loader import get_weather_data 

In [3]:

#### configuration: uNCOMMENT the locatoin you want to train

#### Ludhiana, Punjab (for Wheat) 
LOCATION_NAME = "Ludhiana"
LATITUDE = 30.9010
LONGITUDE = 75.8573

##### Aurangabad, Bihar (for Rice) 
# LOCATION_NAME = "Aurangabad"
# LATITUDE = 24.75
# LONGITUDE = 84.37

#### Kolkata, West Bengal (for Rice)
# LOCATION_NAME = "Kolkata"
# LATITUDE = 22.5726
# LONGITUDE = 88.3639

#### Lucknow, Uttar Pradesh (for Sugarcane) 
# LOCATION_NAME = "Lucknow"
# LATITUDE = 26.8467
# LONGITUDE = 80.9462


In [4]:

#### Training Parameters 
NUM_MODELS_IN_ENSEMBLE = 5
EPOCHS_PER_MODEL = 100
LOOK_BACK = 30
LOOK_FORWARD = 7
BATCH_SIZE = 64
NUM_FEATURES = 4 


In [5]:

#### Dynamic File Naming
models_dir = "../models"
os.makedirs(models_dir, exist_ok=True)
# The saved files will now include the location name for easy identification
SCALER_PATH = os.path.join(models_dir, f"advanced_weather_data_scaler_{LOCATION_NAME.lower()}.pkl")
MODEL_SAVE_PREFIX = f"lstm_advanced_weather_forecaster_{LOCATION_NAME.lower()}"



In [6]:
#### Data Preparation 
print(f"--- Step 1: Fetching and Preparing Data for {LOCATION_NAME} ---")
full_weather_data = get_weather_data(latitude=LATITUDE, longitude=LONGITUDE, start_date="2010-01-01", end_date="2022-12-31")
if full_weather_data is None: raise Exception("Failed to fetch advanced weather data.")
weather_values = full_weather_data[['temperature', 'rainfall', 'humidity', 'wind_speed']].values.astype(np.float32)

scaler = MinMaxScaler(feature_range=(0, 1))
scaled_weather_data = scaler.fit_transform(weather_values)
with open(SCALER_PATH, 'wb') as f: pickle.dump(scaler, f)
print(f"Data scaler for {LOCATION_NAME} has been fit and saved to: {SCALER_PATH}")


--- Step 1: Fetching and Preparing Data for Ludhiana ---
Fetching ADVANCED weather data for Lat: 30.901, Long: 75.8573 from Open-Meteo...
Successfully fetched and processed advanced weather data.
Data scaler for Ludhiana has been fit and saved to: ../models/advanced_weather_data_scaler_ludhiana.pkl


In [7]:
#### Create Sequences 
def create_sequences(data, look_back=30, look_forward=7):
    X, y = [], []
    for i in range(len(data) - look_back - look_forward + 1):
        X.append(data[i:(i + look_back)])
        y.append(data[(i + look_back):(i + look_back + look_forward)])
    return np.array(X), np.array(y)

X_full, y_full = create_sequences(scaled_weather_data, LOOK_BACK, LOOK_FORWARD)


In [8]:

#### Define LSTM Model Architecture
class AdvancedWeatherLSTM(nn.Module):
    def __init__(self, input_size=NUM_FEATURES, hidden_layer_size=128, output_size=NUM_FEATURES, num_layers=2, look_forward=7):
        super().__init__()
        self.lstm = nn.LSTM(input_size, hidden_layer_size, num_layers, batch_first=True, dropout=0.1)
        self.linear = nn.Linear(hidden_layer_size, output_size * look_forward)
        self.look_forward = look_forward

    def forward(self, input_seq):
        lstm_out, _ = self.lstm(input_seq)
        predictions = self.linear(lstm_out[:, -1, :])
        return predictions.view(-1, self.look_forward, NUM_FEATURES)


In [9]:

#### The Ensemble Training Loop 
print(f"\n--- Step 2: Starting Ensemble Training for {NUM_MODELS_IN_ENSEMBLE} Models for {LOCATION_NAME} ---")
device = torch.device('cuda' if torch.cuda.is_available() else 'cpu')
print(f"Training on device: {device}")

saved_model_paths = []
for model_idx in range(1, NUM_MODELS_IN_ENSEMBLE + 1):
    print(f"\n--- Training Model {model_idx}/{NUM_MODELS_IN_ENSEMBLE} ---")
    
    X_sample, y_sample = resample(X_full, y_full, replace=True, n_samples=len(X_full))
    X_train, X_val, y_train, y_val = train_test_split(X_sample, y_sample, test_size=0.2, random_state=42)
    
    train_dataset = TensorDataset(torch.from_numpy(X_train), torch.from_numpy(y_train))
    val_dataset = TensorDataset(torch.from_numpy(X_val), torch.from_numpy(y_val))
    
    train_loader = DataLoader(train_dataset, batch_size=BATCH_SIZE, shuffle=True)
    val_loader = DataLoader(val_dataset, batch_size=BATCH_SIZE, shuffle=False)
    
    model = AdvancedWeatherLSTM().to(device)
    loss_function = nn.MSELoss()
    optimizer = torch.optim.Adam(model.parameters(), lr=0.001)
    scheduler = torch.optim.lr_scheduler.ReduceLROnPlateau(optimizer, 'min', factor=0.5, patience=5, verbose=False)
    
    best_val_loss = float('inf')
    patience_counter = 0
    early_stopping_patience = 10
    
    for epoch in range(EPOCHS_PER_MODEL):
        model.train()
        train_loss = 0.0
        for sequences, labels in train_loader:
            sequences, labels = sequences.to(device), labels.to(device)
            optimizer.zero_grad()
            y_pred = model(sequences)
            loss = loss_function(y_pred, labels)
            loss.backward()
            optimizer.step()
            train_loss += loss.item()
        
        model.eval()
        val_loss = 0.0
        with torch.no_grad():
            for sequences, labels in val_loader:
                sequences, labels = sequences.to(device), labels.to(device)
                y_pred = model(sequences)
                loss = loss_function(y_pred, labels)
                val_loss += loss.item()
        
        avg_train_loss = train_loss / len(train_loader)
        avg_val_loss = val_loss / len(val_loader)
        
        if (epoch + 1) % 10 == 0:
            print(f'  Model {model_idx} - Epoch {epoch+1}/{EPOCHS_PER_MODEL} | Train Loss: {avg_train_loss:.6f} | Val Loss: {avg_val_loss:.6f}')
        
        scheduler.step(avg_val_loss)
        if avg_val_loss < best_val_loss:
            best_val_loss = avg_val_loss
            best_model_path = os.path.join(models_dir, f"{MODEL_SAVE_PREFIX}_{model_idx}.pth")
            torch.save(model.state_dict(), best_model_path)
            patience_counter = 0
        else:
            patience_counter += 1
        
        if patience_counter >= early_stopping_patience:
            print(f"--- Early stopping triggered at epoch {epoch+1} ---")
            break
            
    print(f"Successfully trained and saved model {model_idx} to: {best_model_path}")
    saved_model_paths.append(best_model_path)

print(f"\n--- Ensemble Training for {LOCATION_NAME} Complete! ---")


--- Step 2: Starting Ensemble Training for 5 Models for Ludhiana ---
Training on device: cpu

--- Training Model 1/5 ---




  Model 1 - Epoch 10/100 | Train Loss: 0.006853 | Val Loss: 0.006526
  Model 1 - Epoch 20/100 | Train Loss: 0.006323 | Val Loss: 0.005989
  Model 1 - Epoch 30/100 | Train Loss: 0.006202 | Val Loss: 0.005862
  Model 1 - Epoch 40/100 | Train Loss: 0.005905 | Val Loss: 0.005635
  Model 1 - Epoch 50/100 | Train Loss: 0.005827 | Val Loss: 0.005571
  Model 1 - Epoch 60/100 | Train Loss: 0.005763 | Val Loss: 0.005501
  Model 1 - Epoch 70/100 | Train Loss: 0.005756 | Val Loss: 0.005522
  Model 1 - Epoch 80/100 | Train Loss: 0.005721 | Val Loss: 0.005659
  Model 1 - Epoch 90/100 | Train Loss: 0.005662 | Val Loss: 0.005380
  Model 1 - Epoch 100/100 | Train Loss: 0.005652 | Val Loss: 0.005411
Successfully trained and saved model 1 to: ../models/lstm_advanced_weather_forecaster_ludhiana_1.pth

--- Training Model 2/5 ---
  Model 2 - Epoch 10/100 | Train Loss: 0.006706 | Val Loss: 0.006778
  Model 2 - Epoch 20/100 | Train Loss: 0.006254 | Val Loss: 0.006131
  Model 2 - Epoch 30/100 | Train Loss: 0.0