# First Iteration

In [None]:
import time
import numpy as np
import torch as t
import torch.nn as nn
import matplotlib.pyplot as plt
from torch_geometric.data import Data
from torch_geometric.nn import GCNConv
from sklearn.neighbors import NearestNeighbors
import gc
import os

device = t.device('cuda' if t.cuda.is_available() else 'cpu')
print(f"Using device: {device}")

# Load tensors
train_inputs_tensor = t.load('f:/weather_forecasting/notebooks/final project/tensors/train_inputs_norm.pt').to(device)
train_targets_tensor = t.load('f:/weather_forecasting/notebooks/final project/tensors/train_targets_norm.pt').to(device)
print(f"Tensors loaded: train inputs {train_inputs_tensor.shape}, train targets {train_targets_tensor.shape}")
print(f"Normalized t2m_f mean: {train_targets_tensor[:, :, 5, :].mean().item():.2f}, std: {train_targets_tensor[:, :, 5, :].std().item():.2f}")

# Temporary Train/Test split
train_size = int(0.9 * train_inputs_tensor.shape[0])
print(f"Train size: {train_size}")
temp_train_inputs = train_inputs_tensor[:train_size]
temp_train_targets = train_targets_tensor[:train_size]
temp_test_inputs = train_inputs_tensor[train_size:]
temp_test_targets = train_targets_tensor[train_size:]
print(f"Train/test split: train shape {temp_train_inputs.shape}, test shape {temp_test_inputs.shape}")

# Move tensors to GPU upfront
temp_train_inputs = temp_train_inputs.to(device)
temp_train_targets = temp_train_targets.to(device)
temp_test_inputs = temp_test_inputs.to(device)
temp_test_targets = temp_test_targets.to(device)

# Define k values to test
k_values = [4, 8, 24]
results = {}  # Store MAE and RMSE for each k

# Coordinates for edge index (computed once)
num_nodes = 23937
lat_subset = np.linspace(50, 25, 101)
lon_subset = np.linspace(235, 294, 237)
coords = np.stack(np.meshgrid(lat_subset, lon_subset, indexing='ij'), axis=-1).reshape(-1, 2)

# Model and loss functions remain the same
class WeatherGNN(t.nn.Module):
    def __init__(self, num_features=15, num_outputs=1):
        super().__init__()
        self.conv1 = GCNConv(num_features, 256)
        self.conv2 = GCNConv(256, num_outputs)
        self.dropout = t.nn.Dropout(0.3)
        self.residual = t.nn.Linear(num_features, num_outputs)
        self.res_weight = t.nn.Parameter(t.tensor(2.0))

    def forward(self, x, edge_index):
        residual = self.residual(x) * self.res_weight
        x = self.conv1(x, edge_index).relu()
        x = self.dropout(x)
        x = self.conv2(x, edge_index)
        return x + residual

loss_fns = {
    'L1': lambda x, y: t.mean(t.abs(x - y)),
    'L2': lambda x, y: t.mean((x - y) ** 2),
    'Huber': nn.SmoothL1Loss(reduction='mean')
}

# Pre-compute t2m_f statistics for denormalization
t2m_f_mean = 42.36  # °F
t2m_f_std = 21.75   # °F

# Loop over k values
for k in k_values:
    print(f"\nTesting with k={k} neighbors...")
    
    # Compute edge index for current k
    nbrs = NearestNeighbors(n_neighbors=k+1).fit(coords)
    _, indices = nbrs.kneighbors(coords)
    edge_index = t.tensor(np.stack([np.repeat(np.arange(num_nodes), k), indices[:, 1:].flatten()]), dtype=t.long).to(device)
    print(f"Edge index computed for k={k}, shape: {edge_index.shape}")

    # Training loop with early stopping
    num_epochs = 10
    val_steps = list(range(0, temp_test_inputs.shape[0] - 1, 2))
    patience = 5
    best_val_loss = float('inf')
    patience_counter = 0
    best_model_state = None

    for loss_name, criterion in loss_fns.items():
        print(f"\nTraining with {loss_name} loss for t2m_f prediction (k={k})...")
        t.cuda.empty_cache()
        gc.collect()
        model = WeatherGNN(num_features=15, num_outputs=1).to(device)
        optimizer = t.optim.Adam(model.parameters(), lr=0.01)
        scheduler = t.optim.lr_scheduler.StepLR(optimizer, step_size=5, gamma=0.9)
        train_losses, val_losses = [], []

        for epoch in range(num_epochs):
            epoch_start = time.time()
            print(f"Starting Epoch {epoch+1}/{num_epochs}")
            model.train()
            total_loss = 0
            for t_step in range(temp_train_inputs.shape[0] - 1):
                input_x = temp_train_inputs[t_step].reshape(num_nodes, -1)
                target_y = temp_train_targets[t_step, :, 5, :].reshape(num_nodes, -1)
                optimizer.zero_grad()
                out = model(input_x, edge_index)
                loss = criterion(out, target_y)
                loss.backward()
                optimizer.step()
                total_loss += loss.item()
            scheduler.step()
            avg_loss = total_loss / (temp_train_inputs.shape[0] - 1)
            train_losses.append(avg_loss)
            epoch_time = time.time() - epoch_start
            print(f"Finished Epoch {epoch+1}/{num_epochs}, Train Loss: {avg_loss:.4f}, LR: {scheduler.get_last_lr()[0]:.6f}, Total runtime: {epoch_time:.2f}s")

            if epoch % 5 == 0 or epoch == num_epochs - 1:
                val_start = time.time()
                print(f"Starting Validation for Epoch {epoch+1}")
                model.eval()
                val_loss = 0
                with t.no_grad():
                    for t_step in val_steps:
                        input_x = temp_test_inputs[t_step].reshape(num_nodes, -1)
                        target_y = temp_test_targets[t_step, :, 5, :].reshape(num_nodes, -1)
                        out = model(input_x, edge_index)
                        val_loss += criterion(out, target_y).item()
                val_loss = val_loss / len(val_steps)
                val_losses.append(val_loss)
                val_time = time.time() - val_start
                print(f"Finished Validation for Epoch {epoch+1}, Validation Loss: {val_loss:.4f}, Total runtime: {val_time:.2f}s")

                if val_loss < best_val_loss:
                    best_val_loss = val_loss
                    patience_counter = 0
                    best_model_state = model.state_dict()
                else:
                    patience_counter += 1
                    if patience_counter >= patience:
                        print(f"Early stopping at epoch {epoch+1}: Validation loss has not improved for {patience} validations.")
                        break

        if best_model_state is not None:
            model.load_state_dict(best_model_state)
            print("Loaded best model state based on validation loss.")

        # Test evaluation
        test_start = time.time()
        print(f"Starting Final Test Evaluation for {loss_name} loss (k={k})")
        model.eval()
        test_preds, test_trues = [], []
        with t.no_grad():
            for t_step in range(temp_test_inputs.shape[0] - 1):
                input_x = temp_test_inputs[t_step].reshape(num_nodes, -1)
                target_y = temp_test_targets[t_step, :, 5, :].reshape(num_nodes, -1)
                out = model(input_x, edge_index)
                test_preds.append(out.cpu())
                test_trues.append(target_y.cpu())
        test_preds, test_trues = t.stack(test_preds), t.stack(test_trues)
        test_time = time.time() - test_start
        print(f"Finished Final Test Evaluation for {loss_name} loss, Total runtime: {test_time:.2f}s")

        # Denormalize and evaluate t2m_f
        preds_t2m = test_preds * t2m_f_std + t2m_f_mean
        trues_t2m = test_trues * t2m_f_std + t2m_f_mean
        mae_t2m = t.mean(t.abs(preds_t2m - trues_t2m)).item()
        rmse_t2m = t.sqrt(t.mean((preds_t2m - trues_t2m) ** 2)).item()
        print(f"t2m_f L1 norm (°F) (k={k}): {mae_t2m:.2f}")
        print(f"t2m_f RMSE (°F) (k={k}): {rmse_t2m:.2f}")

        # Store results
        results[(k, loss_name)] = {'MAE': mae_t2m, 'RMSE': rmse_t2m}

        # Denormalize sample outputs
        sample_preds = preds_t2m[:5, 0, 0].numpy()
        sample_targets = trues_t2m[:5, 0, 0].numpy()
        print(f"Sample t2m_f preds (°F) (k={k}): {sample_preds}")
        print(f"Sample t2m_f targets (°F) (k={k}): {sample_targets}")

        # Loss Plot
        plt.figure(figsize=(10, 6))
        plt.plot(range(1, len(train_losses) + 1), train_losses, label=f'Train Loss ({loss_name})', color='blue')
        plt.plot([i * 2 for i in range(len(val_losses))], val_losses, label='Validation Loss', color='orange', marker='o')
        plt.xlabel('Epoch')
        plt.ylabel(f'{loss_name} Loss (Normalized)')
        plt.title(f'Training and Validation Loss for t2m_f ({loss_name}, k={k})')
        plt.legend()
        plt.grid(True)
        plt.savefig(f'loss_plot_t2m_f_{loss_name}_k{k}.png')
        plt.show()

        # Time Series Plot for t2m_f
        plot_start = time.time()
        print(f"Starting Time Series Plotting for t2m_f (k={k})")
        plt.figure(figsize=(10, 6))
        plt.plot(preds_t2m[:, 0, 0].numpy(), label='Pred t2m_f', color='red', linestyle='--')
        plt.plot(trues_t2m[:, 0, 0].numpy(), label='True t2m_f', color='green')
        plt.xlabel('Time Step')
        plt.ylabel('Temperature (°F)')
        plt.title(f'Predicted vs Actual t2m_f (k={k})')
        plt.legend()
        plt.grid(True)
        plt.savefig(f't2m_f_timeseries_k{k}.png')
        print(f"Finished Time Series Plotting, Total runtime: {time.time() - plot_start:.2f}s")
        plt.show()

# Print summary of results
print("\nSummary of Results:")
for (k, loss_name), metrics in results.items():
    print(f"k={k}, Loss={loss_name}: MAE={metrics['MAE']:.2f}°F, RMSE={metrics['RMSE']:.2f}°F") 

# Second Iteration: Adding a third GCN Convolution layer to the model

In [None]:
import time
import numpy as np
import torch as t
import torch.nn as nn
import matplotlib.pyplot as plt
from torch_geometric.data import Data
from torch_geometric.nn import GCNConv
from sklearn.neighbors import NearestNeighbors
import gc
import os

device = t.device('cuda' if t.cuda.is_available() else 'cpu')
print(f"Using device: {device}")

# Load tensors
train_inputs_tensor = t.load('f:/weather_forecasting/notebooks/final project/tensors/train_inputs_norm.pt').to(device)
train_targets_tensor = t.load('f:/weather_forecasting/notebooks/final project/tensors/train_targets_norm.pt').to(device)
print(f"Tensors loaded: train inputs {train_inputs_tensor.shape}, train targets {train_targets_tensor.shape}")
print(f"Normalized t2m_f mean: {train_targets_tensor[:, :, 5, :].mean().item():.2f}, std: {train_targets_tensor[:, :, 5, :].std().item():.2f}")

# Temporary Train/Test split
train_size = int(0.9 * train_inputs_tensor.shape[0])
print(f"Train size: {train_size}")
temp_train_inputs = train_inputs_tensor[:train_size]
temp_train_targets = train_targets_tensor[:train_size]
temp_test_inputs = train_inputs_tensor[train_size:]
temp_test_targets = train_targets_tensor[train_size:]
print(f"Train/test split: train shape {temp_train_inputs.shape}, test shape {temp_test_inputs.shape}")

# Move tensors to GPU upfront
temp_train_inputs = temp_train_inputs.to(device)
temp_train_targets = temp_train_targets.to(device)
temp_test_inputs = temp_test_inputs.to(device)
temp_test_targets = temp_test_targets.to(device)

# Define k values to test
k_values = [4, 8, 24]
results = {}  # Store MAE and RMSE for each k

# Coordinates for edge index (computed once)
num_nodes = 23937
lat_subset = np.linspace(50, 25, 101)
lon_subset = np.linspace(235, 294, 237)
coords = np.stack(np.meshgrid(lat_subset, lon_subset, indexing='ij'), axis=-1).reshape(-1, 2)

# Updated model with three GCNConv layers
class WeatherGNN(t.nn.Module):
    def __init__(self, num_features=15, num_outputs=1):
        super().__init__()
        self.conv1 = GCNConv(num_features, 128)  # Lowered hidden_dim
        self.conv2 = GCNConv(128, 128)
        self.conv3 = GCNConv(128, num_outputs)  # Added third layer
        self.dropout = t.nn.Dropout(0.3)  # Increased dropout
        self.residual = t.nn.Linear(num_features, num_outputs)
        self.res_weight = t.nn.Parameter(t.tensor(2.0))

    def forward(self, x, edge_index):
        residual = self.residual(x) * self.res_weight
        x = self.conv1(x, edge_index).relu()
        x = self.dropout(x)
        x = self.conv2(x, edge_index).relu()
        x = self.dropout(x)
        x = self.conv3(x, edge_index)
        return x + residual

loss_fns = {
    'L1': lambda x, y: t.mean(t.abs(x - y)),
    'L2': lambda x, y: t.mean((x - y) ** 2),
    'Huber': nn.SmoothL1Loss(reduction='mean')
}

# Pre-compute t2m_f statistics for denormalization
t2m_f_mean = 42.36  # °F
t2m_f_std = 21.75   # °F

# Loop over k values
for k in k_values:
    print(f"\nTesting with k={k} neighbors...")
    
    # Compute edge index for current k
    nbrs = NearestNeighbors(n_neighbors=k+1).fit(coords)
    _, indices = nbrs.kneighbors(coords)
    edge_index = t.tensor(np.stack([np.repeat(np.arange(num_nodes), k), indices[:, 1:].flatten()]), dtype=t.long).to(device)
    print(f"Edge index computed for k={k}, shape: {edge_index.shape}")

    # Training loop with early stopping
    num_epochs = 10
    val_steps = list(range(0, temp_test_inputs.shape[0] - 1, 2))
    patience = 5
    best_val_loss = float('inf')
    patience_counter = 0
    best_model_state = None

    for loss_name, criterion in loss_fns.items():
        print(f"\nTraining with {loss_name} loss for t2m_f prediction (k={k})...")
        t.cuda.empty_cache()
        gc.collect()
        model = WeatherGNN(num_features=15, num_outputs=1).to(device)
        optimizer = t.optim.Adam(model.parameters(), lr=0.01)
        scheduler = t.optim.lr_scheduler.StepLR(optimizer, step_size=5, gamma=0.9)
        train_losses, val_losses = [], []

        for epoch in range(num_epochs):
            epoch_start = time.time()
            print(f"Starting Epoch {epoch+1}/{num_epochs}")
            model.train()
            total_loss = 0
            for t_step in range(temp_train_inputs.shape[0] - 1):
                input_x = temp_train_inputs[t_step].reshape(num_nodes, -1)
                target_y = temp_train_targets[t_step, :, 5, :].reshape(num_nodes, -1)
                optimizer.zero_grad()
                out = model(input_x, edge_index)
                loss = criterion(out, target_y)
                loss.backward()
                optimizer.step()
                total_loss += loss.item()
            scheduler.step()
            avg_loss = total_loss / (temp_train_inputs.shape[0] - 1)
            train_losses.append(avg_loss)
            epoch_time = time.time() - epoch_start
            print(f"Finished Epoch {epoch+1}/{num_epochs}, Train Loss: {avg_loss:.4f}, LR: {scheduler.get_last_lr()[0]:.6f}, Total runtime: {epoch_time:.2f}s")

            if epoch % 2 == 0 or epoch == num_epochs - 1:
                val_start = time.time()
                print(f"Starting Validation for Epoch {epoch+1}")
                model.eval()
                val_loss = 0
                with t.no_grad():
                    for t_step in val_steps:
                        input_x = temp_test_inputs[t_step].reshape(num_nodes, -1)
                        target_y = temp_test_targets[t_step, :, 5, :].reshape(num_nodes, -1)
                        out = model(input_x, edge_index)
                        val_loss += criterion(out, target_y).item()
                val_loss = val_loss / len(val_steps)
                val_losses.append(val_loss)
                val_time = time.time() - val_start
                print(f"Finished Validation for Epoch {epoch+1}, Validation Loss: {val_loss:.4f}, Total runtime: {val_time:.2f}s")

                if val_loss < best_val_loss:
                    best_val_loss = val_loss
                    patience_counter = 0
                    best_model_state = model.state_dict()
                else:
                    patience_counter += 1
                    if patience_counter >= patience:
                        print(f"Early stopping at epoch {epoch+1}: Validation loss has not improved for {patience} validations.")
                        break

        if best_model_state is not None:
            model.load_state_dict(best_model_state)
            print("Loaded best model state based on validation loss.")

        # Test evaluation
        test_start = time.time()
        print(f"Starting Final Test Evaluation for {loss_name} loss (k={k})")
        model.eval()
        test_preds, test_trues = [], []
        with t.no_grad():
            for t_step in range(temp_test_inputs.shape[0] - 1):
                input_x = temp_test_inputs[t_step].reshape(num_nodes, -1)
                target_y = temp_test_targets[t_step, :, 5, :].reshape(num_nodes, -1)
                out = model(input_x, edge_index)
                test_preds.append(out.cpu())
                test_trues.append(target_y.cpu())
        test_preds, test_trues = t.stack(test_preds), t.stack(test_trues)
        test_time = time.time() - test_start
        print(f"Finished Final Test Evaluation for {loss_name} loss, Total runtime: {test_time:.2f}s")

        # Denormalize and evaluate t2m_f
        preds_t2m = test_preds * t2m_f_std + t2m_f_mean
        trues_t2m = test_trues * t2m_f_std + t2m_f_mean
        mae_t2m = t.mean(t.abs(preds_t2m - trues_t2m)).item()
        rmse_t2m = t.sqrt(t.mean((preds_t2m - trues_t2m) ** 2)).item()
        print(f"t2m_f L1 norm (°F) (k={k}): {mae_t2m:.2f}")
        print(f"t2m_f RMSE (°F) (k={k}): {rmse_t2m:.2f}")

        # Store results
        results[(k, loss_name)] = {'MAE': mae_t2m, 'RMSE': rmse_t2m}

        # Denormalize sample outputs
        sample_preds = preds_t2m[:5, 0, 0].numpy()
        sample_targets = trues_t2m[:5, 0, 0].numpy()
        print(f"Sample t2m_f preds (°F) (k={k}): {sample_preds}")
        print(f"Sample t2m_f targets (°F) (k={k}): {sample_targets}")

        # Loss Plot
        plt.figure(figsize=(10, 6))
        plt.plot(range(1, len(train_losses) + 1), train_losses, label=f'Train Loss ({loss_name})', color='blue')
        plt.plot([i * 2 for i in range(len(val_losses))], val_losses, label='Validation Loss', color='orange', marker='o')
        plt.xlabel('Epoch')
        plt.ylabel(f'{loss_name} Loss (Normalized)')
        plt.title(f'Training and Validation Loss for t2m_f ({loss_name}, k={k})')
        plt.legend()
        plt.grid(True)
        plt.savefig(f'loss_plot_t2m_f_{loss_name}_k{k}.png')
        plt.show()

        # Time Series Plot for t2m_f
        plot_start = time.time()
        print(f"Starting Time Series Plotting for t2m_f (k={k})")
        plt.figure(figsize=(10, 6))
        plt.plot(preds_t2m[:, 0, 0].numpy(), label='Pred t2m_f', color='red', linestyle='--')
        plt.plot(trues_t2m[:, 0, 0].numpy(), label='True t2m_f', color='green')
        plt.xlabel('Time Step')
        plt.ylabel('Temperature (°F)')
        plt.title(f'Predicted vs Actual t2m_f (k={k})')
        plt.legend()
        plt.grid(True)
        plt.savefig(f't2m_f_timeseries_k{k}.png')
        print(f"Finished Time Series Plotting, Total runtime: {time.time() - plot_start:.2f}s")
        plt.show()

# Print summary of results
print("\nSummary of Results:")
for (k, loss_name), metrics in results.items():
    print(f"k={k}, Loss={loss_name}: MAE={metrics['MAE']:.2f}°F, RMSE={metrics['RMSE']:.2f}°F")

# Third Iteration: Replacing the third GCN layer with a GRU layer 

In [None]:
import time
import numpy as np
import torch as t
import torch.nn as nn
import matplotlib.pyplot as plt
from torch_geometric.data import Data
from torch_geometric.nn import GCNConv
from sklearn.neighbors import NearestNeighbors
import gc
import os
from torch.amp import GradScaler, autocast

device = t.device('cuda' if t.cuda.is_available() else 'cpu')
print(f"Using device: {device}")

# Load tensors and squeeze last dimension
train_inputs_tensor = t.load('f:/weather_forecasting/notebooks/final project/tensors/train_inputs_norm.pt').squeeze(-1).to(device)
train_targets_tensor = t.load('f:/weather_forecasting/notebooks/final project/tensors/train_targets_norm.pt').squeeze(-1).to(device)
print(f"Tensors loaded: train inputs {train_inputs_tensor.shape}, train targets {train_targets_tensor.shape}")
print(f"Normalized t2m_f mean: {train_targets_tensor[:, :, 5].mean().item():.2f}, std: {train_targets_tensor[:, :, 5].std().item():.2f}")

# Temporary Train/Test split (90/10)
train_size = int(0.9 * train_inputs_tensor.shape[0])
print(f"Train size: {train_size}")
temp_train_inputs = train_inputs_tensor[:train_size]
temp_train_targets = train_targets_tensor[:train_size]
temp_test_inputs = train_inputs_tensor[train_size:]
temp_test_targets = train_targets_tensor[train_size:]
print(f"Train/test split: train shape {temp_train_inputs.shape}, test shape {temp_test_inputs.shape}")

# Limit training data to first 100 time steps
temp_train_inputs = temp_train_inputs[:100]
temp_train_targets = temp_train_targets[:100]
print(f"Limited training data to first 100 time steps: train shape {temp_train_inputs.shape}")

# Move tensors to GPU upfront
temp_train_inputs = temp_train_inputs.to(device)
temp_train_targets = temp_train_targets.to(device)
temp_test_inputs = temp_test_inputs.to(device)
temp_test_targets = temp_test_targets.to(device)

# Prepare data for batching
batch_size = 8  # Reduced batch size
seq_len = 5     # Reduced sequence length
num_nodes = 23937
num_train_steps = temp_train_inputs.shape[0] - 1
num_batches = (num_train_steps - seq_len) // batch_size
num_test_steps = temp_test_inputs.shape[0] - 1
val_steps = list(range(0, max(1, num_test_steps - seq_len + 1), 2))
print(f"Batching: batch_size={batch_size}, seq_len={seq_len}, num_batches={num_batches}, val_steps={val_steps}")

# Mixed precision training setup
scaler = GradScaler('cuda')

# Define k values to test
k_values = [4, 8, 24]
results = {}  # Store MAE and RMSE for each k

# Coordinates for edge index (computed once)
lat_subset = np.linspace(50, 25, 101)
lon_subset = np.linspace(235, 294, 237)
coords = np.stack(np.meshgrid(lat_subset, lon_subset, indexing='ij'), axis=-1).reshape(-1, 2)

# WeatherGNNGRU model
class WeatherGNNGRU(t.nn.Module):
    def __init__(self, num_features=15, hidden_dim=128, gru_hidden=16, num_outputs=1):
        super().__init__()
        self.conv1 = GCNConv(num_features, hidden_dim)
        self.conv2 = GCNConv(hidden_dim, hidden_dim)
        self.gru = nn.GRU(hidden_dim, gru_hidden, num_layers=1, batch_first=True)
        self.fc = nn.Linear(gru_hidden, num_outputs)
        self.dropout = nn.Dropout(0.3)
        self.residual = nn.Linear(num_features, num_outputs)
        self.res_weight = nn.Parameter(t.tensor(2.0))

    def forward(self, x, edge_index, batch_size, seq_len, num_nodes):
        batch_size, seq_len, num_nodes, num_features = x.shape
        x = x.view(batch_size * seq_len, num_nodes, num_features)
        residual = self.residual(x).view(batch_size, seq_len, num_nodes, 1) * self.res_weight
        x = self.conv1(x, edge_index).relu()
        x = self.dropout(x)
        x = self.conv2(x, edge_index).relu()
        x = x.view(batch_size, seq_len, num_nodes, -1)
        x = x.mean(dim=2)
        x, _ = self.gru(x)
        out = self.fc(x)
        out = out[:, -1, :].unsqueeze(-1).expand(-1, num_nodes, -1)
        residual = residual[:, -1, :, :]
        return out + residual

loss_fns = {
    'L1': lambda x, y: t.mean(t.abs(x - y)),
    'L2': lambda x, y: t.mean((x - y) ** 2),
    'Huber': nn.SmoothL1Loss(reduction='mean')
}

# Pre-compute t2m_f statistics for denormalization
t2m_f_mean = 42.36  # °F
t2m_f_std = 21.75   # °F

# Loop over k values
for k in k_values:
    print(f"\nTesting with k={k} neighbors...")
    
    # Compute edge index for current k
    nbrs = NearestNeighbors(n_neighbors=k+1).fit(coords)
    _, indices = nbrs.kneighbors(coords)
    edge_index = t.tensor(np.stack([np.repeat(np.arange(num_nodes), k), indices[:, 1:].flatten()]), dtype=t.long).to(device)
    print(f"Edge index computed for k={k}, shape: {edge_index.shape}")

    # Training loop with early stopping
    num_epochs = 10
    patience = 5
    best_val_loss = float('inf')
    patience_counter = 0
    best_model_state = None

    for loss_name, criterion in loss_fns.items():
        print(f"\nTraining with {loss_name} loss for t2m_f prediction (k={k})...")
        t.cuda.empty_cache()
        gc.collect()
        model = WeatherGNNGRU(num_features=15, hidden_dim=128, gru_hidden=16, num_outputs=1).to(device)
        optimizer = t.optim.Adam(model.parameters(), lr=0.01)
        scheduler = t.optim.lr_scheduler.StepLR(optimizer, step_size=5, gamma=0.9)
        train_losses, val_losses = [], []

        for epoch in range(num_epochs):
            epoch_start = time.time()
            print(f"Starting Epoch {epoch+1}/{num_epochs}")
            model.train()
            total_loss = 0
            num_valid_batches = 0

            for batch_idx in range(num_batches):
                start_idx = batch_idx * batch_size
                end_idx = start_idx + batch_size
                if end_idx + seq_len > num_train_steps:
                    continue
                input_x = temp_train_inputs[start_idx:end_idx + seq_len].unfold(0, seq_len, 1).permute(0, 1, 3, 2)
                target_y = temp_train_targets[start_idx:end_idx + seq_len, :, 5].unfold(0, seq_len, 1).unsqueeze(-1)
                input_x = input_x.reshape(-1, seq_len, num_nodes, 15)
                target_y = target_y.reshape(-1, seq_len, num_nodes, 1)
                optimizer.zero_grad()
                with autocast('cuda'):
                    out = model(input_x, edge_index, input_x.shape[0], seq_len, num_nodes)
                    loss = criterion(out, target_y[:, -1, :, :])
                scaler.scale(loss).backward()
                scaler.step(optimizer)
                scaler.update()
                total_loss += loss.item()
                num_valid_batches += 1

            scheduler.step()
            avg_loss = total_loss / max(num_valid_batches, 1)
            train_losses.append(avg_loss)
            epoch_time = time.time() - epoch_start
            print(f"Finished Epoch {epoch+1}/{num_epochs}, Train Loss: {avg_loss:.4f}, LR: {scheduler.get_last_lr()[0]:.6f}, Total runtime: {epoch_time:.2f}s")

            if epoch % 2 == 0 or epoch == num_epochs - 1:
                val_start = time.time()
                print(f"Starting Validation for Epoch {epoch+1}")
                model.eval()
                val_loss = 0
                num_valid_val_batches = 0
                with t.no_grad():
                    for t_step in val_steps:
                        if t_step + seq_len > num_test_steps:
                            continue
                        input_x = temp_test_inputs[t_step:t_step + seq_len].unsqueeze(0)
                        target_y = temp_test_targets[t_step:t_step + seq_len, :, 5].unsqueeze(0).unsqueeze(-1)
                        with autocast('cuda'):
                            out = model(input_x, edge_index, 1, seq_len, num_nodes)
                            val_loss += criterion(out, target_y[:, -1, :, :]).item()
                        num_valid_val_batches += 1
                val_loss = val_loss / max(num_valid_val_batches, 1)
                val_losses.append(val_loss)
                val_time = time.time() - val_start
                print(f"Finished Validation for Epoch {epoch+1}, Validation Loss: {val_loss:.4f}, Total runtime: {val_time:.2f}s")

                if val_loss < best_val_loss:
                    best_val_loss = val_loss
                    patience_counter = 0
                    best_model_state = model.state_dict()
                else:
                    patience_counter += 1
                    if patience_counter >= patience:
                        print(f"Early stopping at epoch {epoch+1}: Validation loss has not improved for {patience} validations.")
                        break

        if best_model_state is not None:
            model.load_state_dict(best_model_state)
            print("Loaded best model state based on validation loss.")

        # Test evaluation
        test_start = time.time()
        print(f"Starting Final Test Evaluation for {loss_name} loss (k={k})")
        model.eval()
        test_preds, test_trues = [], []
        with t.no_grad():
            for t_step in range(num_test_steps - seq_len + 1):
                input_x = temp_test_inputs[t_step:t_step + seq_len].unsqueeze(0)
                target_y = temp_test_targets[t_step:t_step + seq_len, :, 5].unsqueeze(0).unsqueeze(-1)
                with autocast('cuda'):
                    out = model(input_x, edge_index, 1, seq_len, num_nodes)
                test_preds.append(out[:, :, 0].cpu())  # Fix: Use 3D indexing
                test_trues.append(target_y[:, -1, :, 0].cpu())
        test_preds = t.cat(test_preds, dim=0)
        test_trues = t.cat(test_trues, dim=0)
        test_time = time.time() - test_start
        print(f"Finished Final Test Evaluation for {loss_name} loss, Total runtime: {test_time:.2f}s")

        # Denormalize and evaluate t2m_f
        preds_t2m = test_preds * t2m_f_std + t2m_f_mean
        trues_t2m = test_trues * t2m_f_std + t2m_f_mean
        mae_t2m = t.mean(t.abs(preds_t2m - trues_t2m)).item()
        rmse_t2m = t.sqrt(t.mean((preds_t2m - trues_t2m) ** 2)).item()
        print(f"t2m_f L1 norm (°F) (k={k}): {mae_t2m:.2f}")
        print(f"t2m_f RMSE (°F) (k={k}): {rmse_t2m:.2f}")

        # Store results
        results[(k, loss_name)] = {'MAE': mae_t2m, 'RMSE': rmse_t2m}

        # Denormalize sample outputs
        sample_preds = preds_t2m[:5, 0].numpy()
        sample_targets = trues_t2m[:5, 0].numpy()
        print(f"Sample t2m_f preds (°F) (k={k}): {sample_preds}")
        print(f"Sample t2m_f targets (°F) (k={k}): {sample_targets}")

        # Loss Plot
        plt.figure(figsize=(10, 6))
        plt.plot(range(1, len(train_losses) + 1), train_losses, label=f'Train Loss ({loss_name})', color='blue')
        plt.plot([i * 2 for i in range(len(val_losses))], val_losses, label='Validation Loss', color='orange', marker='o')
        plt.xlabel('Epoch')
        plt.ylabel(f'{loss_name} Loss (Normalized)')
        plt.title(f'Training and Validation Loss for t2m_f ({loss_name}, k={k})')
        plt.legend()
        plt.grid(True)
        plt.savefig(f'loss_plot_t2m_f_{loss_name}_k{k}.png')
        plt.show()

        # Time Series Plot for t2m_f
        plot_start = time.time()
        print(f"Starting Time Series Plotting for t2m_f (k={k})")
        plt.figure(figsize=(10, 6))
        plt.plot(preds_t2m[:, 0].numpy(), label='Pred t2m_f', color='red', linestyle='--')
        plt.plot(trues_t2m[:, 0].numpy(), label='True t2m_f', color='green')
        plt.xlabel('Time Step')
        plt.ylabel('Temperature (°F)')
        plt.title(f'Predicted vs Actual t2m_f (k={k})')
        plt.legend()
        plt.grid(True)
        plt.savefig(f't2m_f_timeseries_k{k}.png')
        plt.show()

# Print summary of results
print("\nSummary of Results:")
for (k, loss_name), metrics in results.items():
    print(f"k={k}, Loss={loss_name}: MAE={metrics['MAE']:.2f}°F, RMSE={metrics['RMSE']:.2f}°F")

## Cut batch size in half to fix memory allocation error for k=24

In [None]:
import time
import numpy as np
import torch as t
import torch.nn as nn
import matplotlib.pyplot as plt
from torch_geometric.data import Data
from torch_geometric.nn import GCNConv
from sklearn.neighbors import NearestNeighbors
import gc
import os
from torch.amp import GradScaler, autocast

# Set environment variable to reduce memory fragmentation
os.environ["PYTORCH_CUDA_ALLOC_CONF"] = "expandable_segments:True"

device = t.device('cuda' if t.cuda.is_available() else 'cpu')
print(f"Using device: {device}")

# Load tensors and squeeze last dimension
train_inputs_tensor = t.load('f:/weather_forecasting/notebooks/final project/tensors/train_inputs_norm.pt').squeeze(-1).to(device)
train_targets_tensor = t.load('f:/weather_forecasting/notebooks/final project/tensors/train_targets_norm.pt').squeeze(-1).to(device)
print(f"Tensors loaded: train inputs {train_inputs_tensor.shape}, train targets {train_targets_tensor.shape}")
print(f"Normalized t2m_f mean: {train_targets_tensor[:, :, 5].mean().item():.2f}, std: {train_targets_tensor[:, :, 5].std().item():.2f}")

# Temporary Train/Test split (90/10)
train_size = int(0.9 * train_inputs_tensor.shape[0])
print(f"Train size: {train_size}")
temp_train_inputs = train_inputs_tensor[:train_size]
temp_train_targets = train_targets_tensor[:train_size]
temp_test_inputs = train_inputs_tensor[train_size:]
temp_test_targets = train_targets_tensor[train_size:]
print(f"Train/test split: train shape {temp_train_inputs.shape}, test shape {temp_test_inputs.shape}")

# Limit training data to first 100 time steps
temp_train_inputs = temp_train_inputs[:100]
temp_train_targets = temp_train_targets[:100]
print(f"Limited training data to first 100 time steps: train shape {temp_train_inputs.shape}")

# Move tensors to GPU upfront
temp_train_inputs = temp_train_inputs.to(device)
temp_train_targets = temp_train_targets.to(device)
temp_test_inputs = temp_test_inputs.to(device)
temp_test_targets = temp_test_targets.to(device)

# Prepare data for batching
batch_size = 4  # Reduced batch size
seq_len = 5     # Reduced sequence length
num_nodes = 23937
num_train_steps = temp_train_inputs.shape[0] - 1
num_batches = (num_train_steps - seq_len) // batch_size
num_test_steps = temp_test_inputs.shape[0] - 1
val_steps = list(range(0, max(1, num_test_steps - seq_len + 1), 2))
print(f"Batching: batch_size={batch_size}, seq_len={seq_len}, num_batches={num_batches}, val_steps={val_steps}")

# Mixed precision training setup
scaler = GradScaler('cuda')

# Define k values to test
k_values = [4, 8, 24]
results = {}  # Store MAE and RMSE for each k

# Coordinates for edge index (computed once)
lat_subset = np.linspace(50, 25, 101)
lon_subset = np.linspace(235, 294, 237)
coords = np.stack(np.meshgrid(lat_subset, lon_subset, indexing='ij'), axis=-1).reshape(-1, 2)

# WeatherGNNGRU model
class WeatherGNNGRU(t.nn.Module):
    def __init__(self, num_features=15, hidden_dim=64, gru_hidden=16, num_outputs=1):
        super().__init__()
        self.conv1 = GCNConv(num_features, hidden_dim)
        self.conv2 = GCNConv(hidden_dim, hidden_dim)
        self.gru = nn.GRU(hidden_dim, gru_hidden, num_layers=1, batch_first=True)
        self.fc = nn.Linear(gru_hidden, num_outputs)
        self.dropout = nn.Dropout(0.3)
        self.residual = nn.Linear(num_features, num_outputs)
        self.res_weight = nn.Parameter(t.tensor(2.0))

    def forward(self, x, edge_index, batch_size, seq_len, num_nodes):
        batch_size, seq_len, num_nodes, num_features = x.shape
        x = x.view(batch_size * seq_len, num_nodes, num_features)
        residual = self.residual(x).view(batch_size, seq_len, num_nodes, 1) * self.res_weight
        x = self.conv1(x, edge_index).relu()
        x = self.dropout(x)
        x = self.conv2(x, edge_index).relu()
        x = x.view(batch_size, seq_len, num_nodes, -1)
        x = x.mean(dim=2)
        x, _ = self.gru(x)
        out = self.fc(x)
        out = out[:, -1, :].unsqueeze(-1).expand(-1, num_nodes, -1)
        residual = residual[:, -1, :, :]
        return out + residual

loss_fns = {
    'L1': lambda x, y: t.mean(t.abs(x - y)),
    'L2': lambda x, y: t.mean((x - y) ** 2),
    'Huber': nn.SmoothL1Loss(reduction='mean')
}

# Pre-compute t2m_f statistics for denormalization
t2m_f_mean = 42.36  # °F
t2m_f_std = 21.75   # °F

# Loop over k values
for k in k_values:
    print(f"\nTesting with k={k} neighbors...")
    
    # Compute edge index for current k
    nbrs = NearestNeighbors(n_neighbors=k+1).fit(coords)
    _, indices = nbrs.kneighbors(coords)
    edge_index = t.tensor(np.stack([np.repeat(np.arange(num_nodes), k), indices[:, 1:].flatten()]), dtype=t.long).to(device)
    print(f"Edge index computed for k={k}, shape: {edge_index.shape}")

    # Training loop with early stopping
    num_epochs = 10
    patience = 5
    best_val_loss = float('inf')
    patience_counter = 0
    best_model_state = None

    for loss_name, criterion in loss_fns.items():
        print(f"\nTraining with {loss_name} loss for t2m_f prediction (k={k})...")
        t.cuda.empty_cache()
        gc.collect()
        model = WeatherGNNGRU(num_features=15, hidden_dim=64, gru_hidden=16, num_outputs=1).to(device)
        optimizer = t.optim.Adam(model.parameters(), lr=0.01)
        scheduler = t.optim.lr_scheduler.StepLR(optimizer, step_size=5, gamma=0.9)
        train_losses, val_losses = [], []

        for epoch in range(num_epochs):
            epoch_start = time.time()
            print(f"Starting Epoch {epoch+1}/{num_epochs}")
            model.train()
            total_loss = 0
            num_valid_batches = 0

            for batch_idx in range(num_batches):
                start_idx = batch_idx * batch_size
                end_idx = start_idx + batch_size
                if end_idx + seq_len > num_train_steps:
                    continue
                input_x = temp_train_inputs[start_idx:end_idx + seq_len].unfold(0, seq_len, 1).permute(0, 1, 3, 2)
                target_y = temp_train_targets[start_idx:end_idx + seq_len, :, 5].unfold(0, seq_len, 1).unsqueeze(-1)
                input_x = input_x.reshape(-1, seq_len, num_nodes, 15)
                target_y = target_y.reshape(-1, seq_len, num_nodes, 1)
                optimizer.zero_grad()
                with autocast('cuda'):
                    out = model(input_x, edge_index, input_x.shape[0], seq_len, num_nodes)
                    loss = criterion(out, target_y[:, -1, :, :])
                scaler.scale(loss).backward()
                scaler.step(optimizer)
                scaler.update()
                total_loss += loss.item()
                num_valid_batches += 1

            scheduler.step()
            avg_loss = total_loss / max(num_valid_batches, 1)
            train_losses.append(avg_loss)
            epoch_time = time.time() - epoch_start
            print(f"Finished Epoch {epoch+1}/{num_epochs}, Train Loss: {avg_loss:.4f}, LR: {scheduler.get_last_lr()[0]:.6f}, Total runtime: {epoch_time:.2f}s")

            if epoch % 2 == 0 or epoch == num_epochs - 1:
                val_start = time.time()
                print(f"Starting Validation for Epoch {epoch+1}")
                model.eval()
                val_loss = 0
                num_valid_val_batches = 0
                with t.no_grad():
                    for t_step in val_steps:
                        if t_step + seq_len > num_test_steps:
                            continue
                        input_x = temp_test_inputs[t_step:t_step + seq_len].unsqueeze(0)
                        target_y = temp_test_targets[t_step:t_step + seq_len, :, 5].unsqueeze(0).unsqueeze(-1)
                        with autocast('cuda'):
                            out = model(input_x, edge_index, 1, seq_len, num_nodes)
                            val_loss += criterion(out, target_y[:, -1, :, :]).item()
                        num_valid_val_batches += 1
                val_loss = val_loss / max(num_valid_val_batches, 1)
                val_losses.append(val_loss)
                val_time = time.time() - val_start
                print(f"Finished Validation for Epoch {epoch+1}, Validation Loss: {val_loss:.4f}, Total runtime: {val_time:.2f}s")

                if val_loss < best_val_loss:
                    best_val_loss = val_loss
                    patience_counter = 0
                    best_model_state = model.state_dict()
                else:
                    patience_counter += 1
                    if patience_counter >= patience:
                        print(f"Early stopping at epoch {epoch+1}: Validation loss has not improved for {patience} validations.")
                        break

        if best_model_state is not None:
            model.load_state_dict(best_model_state)
            print("Loaded best model state based on validation loss.")

        # Test evaluation
        test_start = time.time()
        print(f"Starting Final Test Evaluation for {loss_name} loss (k={k})")
        model.eval()
        test_preds, test_trues = [], []
        with t.no_grad():
            for t_step in range(num_test_steps - seq_len + 1):
                input_x = temp_test_inputs[t_step:t_step + seq_len].unsqueeze(0)
                target_y = temp_test_targets[t_step:t_step + seq_len, :, 5].unsqueeze(0).unsqueeze(-1)
                with autocast('cuda'):
                    out = model(input_x, edge_index, 1, seq_len, num_nodes)
                test_preds.append(out[:, :, 0].cpu())
                test_trues.append(target_y[:, -1, :, 0].cpu())
        test_preds = t.cat(test_preds, dim=0)
        test_trues = t.cat(test_trues, dim=0)
        test_time = time.time() - test_start
        print(f"Finished Final Test Evaluation for {loss_name} loss, Total runtime: {test_time:.2f}s")

        # Denormalize and evaluate t2m_f
        preds_t2m = test_preds * t2m_f_std + t2m_f_mean
        trues_t2m = test_trues * t2m_f_std + t2m_f_mean
        mae_t2m = t.mean(t.abs(preds_t2m - trues_t2m)).item()
        rmse_t2m = t.sqrt(t.mean((preds_t2m - trues_t2m) ** 2)).item()
        print(f"t2m_f L1 norm (°F) (k={k}): {mae_t2m:.2f}")
        print(f"t2m_f RMSE (°F) (k={k}): {rmse_t2m:.2f}")

        # Store results
        results[(k, loss_name)] = {'MAE': mae_t2m, 'RMSE': rmse_t2m}

        # Denormalize sample outputs
        sample_preds = preds_t2m[:5, 0].numpy()
        sample_targets = trues_t2m[:5, 0].numpy()
        print(f"Sample t2m_f preds (°F) (k={k}): {sample_preds}")
        print(f"Sample t2m_f targets (°F) (k={k}): {sample_targets}")

        # Loss Plot
        plt.figure(figsize=(10, 6))
        plt.plot(range(1, len(train_losses) + 1), train_losses, label=f'Train Loss ({loss_name})', color='blue')
        plt.plot([i * 2 for i in range(len(val_losses))], val_losses, label='Validation Loss', color='orange', marker='o')
        plt.xlabel('Epoch')
        plt.ylabel(f'{loss_name} Loss (Normalized)')
        plt.title(f'Training and Validation Loss for t2m_f ({loss_name}, k={k})')
        plt.legend()
        plt.grid(True)
        plt.savefig(f'loss_plot_t2m_f_{loss_name}_k{k}.png')
        plt.show()

        # Time Series Plot for t2m_f
        plot_start = time.time()
        print(f"Starting Time Series Plotting for t2m_f (k={k})")
        plt.figure(figsize=(10, 6))
        plt.plot(preds_t2m[:, 0].numpy(), label='Pred t2m_f', color='red', linestyle='--')
        plt.plot(trues_t2m[:, 0].numpy(), label='True t2m_f', color='green')
        plt.xlabel('Time Step')
        plt.ylabel('Temperature (°F)')
        plt.title(f'Predicted vs Actual t2m_f (k={k})')
        plt.legend()
        plt.grid(True)
        plt.savefig(f't2m_f_timeseries_k{k}.png')
        print(f"Finished Time Series Plotting, Total runtime: {time.time() - plot_start:.2f}s")
        plt.show()

# Print summary of results
print("\nSummary of Results:")
for (k, loss_name), metrics in results.items():
    print(f"k={k}, Loss={loss_name}: MAE={metrics['MAE']:.2f}°F, RMSE={metrics['RMSE']:.2f}°F")

# Fourth Iteration: Replacing the GRU Layer with a Temporal Convolution Layer

In [None]:
import time
import numpy as np
import torch as t
import torch.nn as nn
import matplotlib.pyplot as plt
from torch_geometric.data import Data
from torch_geometric.nn import GCNConv
from sklearn.neighbors import NearestNeighbors
import gc
import os

device = t.device('cuda' if t.cuda.is_available() else 'cpu')
print(f"Using device: {device}")

# Load tensors (assuming they are pre-normalized and stored)
train_inputs_tensor = t.load('f:/weather_forecasting/notebooks/final project/tensors/train_inputs_norm.pt').to(device)
train_targets_tensor = t.load('f:/weather_forecasting/notebooks/final project/tensors/train_targets_norm.pt').to(device)
print(f"Tensors loaded: train inputs {train_inputs_tensor.shape}, train targets {train_targets_tensor.shape}")

# Temporary Train/Test split (90% train, 10% test)
train_size = int(0.9 * train_inputs_tensor.shape[0])
temp_train_inputs = train_inputs_tensor[:100]
temp_train_targets = train_targets_tensor[:100]
temp_test_inputs = train_inputs_tensor[train_size:]
temp_test_targets = train_targets_tensor[train_size:]
print(f"Train/test split: train shape {temp_train_inputs.shape}, test shape {temp_test_inputs.shape}")

# Move tensors to GPU
temp_train_inputs = temp_train_inputs.to(device)
temp_train_targets = temp_train_targets.to(device)
temp_test_inputs = temp_test_inputs.to(device)
temp_test_targets = temp_test_targets.to(device)

# Define directories for saving plots and models
loss_plot_dir = r'F:/weather_forecasting/notebooks/final project/png/model construction & training/1dConv/loss plots'
pred_plot_dir = r'F:/weather_forecasting/notebooks/final project/png/model construction & training/1dConv/pred vs true'
model_save_dir = r'F:/weather_forecasting/notebooks/final project/models/1dConv'
os.makedirs(loss_plot_dir, exist_ok=True)
os.makedirs(pred_plot_dir, exist_ok=True)
os.makedirs(model_save_dir, exist_ok=True)

# Function to create windowed batches on-the-fly (memory-efficient)
def windowed_batch_generator(inputs_tensor, targets_tensor, window_size, batch_size, num_nodes, device):
    num_time_steps = inputs_tensor.shape[0]
    for start in range(window_size - 1, num_time_steps - 1, batch_size):
        end = min(start + batch_size, num_time_steps - 1)
        windows = []
        target_batch = []
        for i in range(start, end):
            window = inputs_tensor[i - window_size + 1:i + 1].squeeze(-1).to(device)  # Shape: [window_size, num_nodes, num_features]
            windows.append(window)
            target = targets_tensor[i + 1, :, 5, :].squeeze(-1).to(device)  # Shape: [num_nodes]
            target_batch.append(target)
        yield t.stack(windows), t.stack(target_batch).unsqueeze(-1)  # [batch_size, window_size, num_nodes, num_features], [batch_size, num_nodes, 1]

class WeatherGNNWindow(nn.Module):
    def __init__(self, num_features=15, window_size=1, new_features=64, hidden_dim=128, num_outputs=1):
        super().__init__()
        self.num_outputs = num_outputs
        self.temporal = nn.Conv1d(
            in_channels=num_features,
            out_channels=new_features,
            kernel_size=window_size
        )
        self.gcn1 = GCNConv(new_features, hidden_dim)
        self.gcn2 = GCNConv(hidden_dim, num_outputs)
        self.residual = nn.Linear(new_features, num_outputs)
        self.dropout = nn.Dropout(0.3)

    def forward(self, x, edge_index, batch_size, num_nodes):
        batch_size, window_size, num_nodes, num_features = x.shape
        x = x.permute(0, 2, 3, 1)  # [batch_size, num_nodes, num_features, window_size]
        x = x.reshape(batch_size * num_nodes, num_features, window_size)
        temporal_out = self.temporal(x).squeeze(-1)  # [batch_size * num_nodes, new_features]
        residual = self.residual(temporal_out)      # [batch_size * num_nodes, num_outputs]
        edge_index_list = [edge_index + i * num_nodes for i in range(batch_size)]
        batched_edge_index = t.cat(edge_index_list, dim=1)
        x = self.gcn1(temporal_out, batched_edge_index).relu()
        x = self.dropout(x)
        x = self.gcn2(x, batched_edge_index)
        out = x + residual
        out = out.view(batch_size, num_nodes, self.num_outputs)
        return out

# Define parameters
num_nodes = 23937
lat_subset = np.linspace(50, 25, 101)
lon_subset = np.linspace(235, 294, 237)
coords = np.stack(np.meshgrid(lat_subset, lon_subset, indexing='ij'), axis=-1).reshape(-1, 2)
window_sizes = [1, 3, 6, 12]
k_values = [4, 8, 24]
loss_fns = {
    'L1': lambda x, y: t.mean(t.abs(x - y)),
    'L2': lambda x, y: t.mean((x - y) ** 2),
    'Huber': nn.SmoothL1Loss(reduction='mean')
}
t2m_f_mean = 42.36  # °F
t2m_f_std = 21.75   # °F
num_epochs = 10
batch_size = 32

# Main loop over window sizes
for window_size in window_sizes:
    print(f"\n--- Experiment with window_size={window_size} ---")
    loss_curves = {}
    pred_data = {}
    
    # Adjust target tensors
    train_targets = temp_train_targets[window_size:]
    test_targets = temp_test_targets[window_size:]
    
    for k in k_values:
        print(f"\nTesting with k={k} neighbors...")
        # Compute edge index for current k
        nbrs = NearestNeighbors(n_neighbors=k+1).fit(coords)
        _, indices = nbrs.kneighbors(coords)
        edge_index = t.tensor(np.stack([np.repeat(np.arange(num_nodes), k), indices[:, 1:].flatten()]), dtype=t.long).to(device)
        
        for loss_name, criterion in loss_fns.items():
            print(f"\nTraining with {loss_name} loss (k={k})...")
            t.cuda.empty_cache()
            gc.collect()
            model = WeatherGNNWindow(num_features=15, window_size=window_size, num_outputs=1).to(device)
            optimizer = t.optim.Adam(model.parameters(), lr=0.01)
            scheduler = t.optim.lr_scheduler.StepLR(optimizer, step_size=2, gamma=0.9)
            train_losses, val_losses = [], []
            val_epochs = []

            # Training loop
            for epoch in range(num_epochs):
                epoch_start = time.time()
                model.train()
                total_loss = 0
                batch_count = 0
                for batch_inputs, batch_targets in windowed_batch_generator(temp_train_inputs, temp_train_targets, window_size, batch_size, num_nodes, device):
                    optimizer.zero_grad()
                    out = model(batch_inputs, edge_index, batch_size=batch_inputs.shape[0], num_nodes=num_nodes)
                    loss = criterion(out, batch_targets)
                    loss.backward()
                    optimizer.step()
                    total_loss += loss.item()
                    batch_count += 1
                avg_loss = total_loss / batch_count
                train_losses.append(avg_loss)
                epoch_time = time.time() - epoch_start
                print(f"Epoch {epoch+1}/{num_epochs}, Train Loss: {avg_loss:.4f}, Time: {epoch_time:.2f}s")

                if epoch % 2 == 0 or epoch == num_epochs - 1:
                    val_start = time.time()
                    model.eval()
                    val_loss = 0
                    val_batch_count = 0
                    with t.no_grad():
                        for batch_inputs, batch_targets in windowed_batch_generator(temp_test_inputs, temp_test_targets, window_size, batch_size, num_nodes, device):
                            out = model(batch_inputs, edge_index, batch_size=batch_inputs.shape[0], num_nodes=num_nodes)
                            val_loss += criterion(out, batch_targets).item()
                            val_batch_count += 1
                    val_loss /= val_batch_count
                    val_losses.append(val_loss)
                    val_epochs.append(epoch + 1)
                    val_time = time.time() - val_start
                    print(f"Validation Loss: {val_loss:.4f}, Validation Time: {val_time:.2f}s")

            # Save the trained model
            model_path = os.path.join(model_save_dir, f'weather_gnn_window{window_size}_k{k}_{loss_name}.pth')
            t.save(model.state_dict(), model_path)
            print(f"Saved model to {model_path}")

            # Test evaluation
            model.eval()
            test_preds, test_trues = [], []
            with t.no_grad():
                for batch_inputs, batch_targets in windowed_batch_generator(temp_test_inputs, temp_test_targets, window_size, batch_size, num_nodes, device):
                    out = model(batch_inputs, edge_index, batch_size=batch_inputs.shape[0], num_nodes=num_nodes)
                    test_preds.append(out.cpu())
                    test_trues.append(batch_targets.cpu())
            preds_t2m = t.cat(test_preds, dim=0) * t2m_f_std + t2m_f_mean
            trues_t2m = t.cat(test_trues, dim=0) * t2m_f_std + t2m_f_mean

            # Save individual loss plot
            plt.figure(figsize=(10, 6))
            plt.plot(range(1, len(train_losses) + 1), train_losses, label='Train Loss', color='blue')
            plt.plot(val_epochs, val_losses, label='Validation Loss', color='orange', marker='o')
            plt.xlabel('Epoch')
            plt.ylabel(f'{loss_name} Loss (Normalized)')
            plt.title(f'Loss Curves (window_size={window_size}, k={k}, {loss_name})')
            plt.legend()
            plt.grid(True)
            plt.savefig(os.path.join(loss_plot_dir, f'loss_plot_window{window_size}_k{k}_{loss_name}.png'))
            plt.close()

            # Save individual pred vs true plot
            plt.figure(figsize=(10, 6))
            plt.plot(preds_t2m[:, 0, 0].numpy(), label='Pred t2m_f', color='red', linestyle='--')
            plt.plot(trues_t2m[:, 0, 0].numpy(), label='True t2m_f', color='green')
            plt.xlabel('Time Step')
            plt.ylabel('Temperature (°F)')
            plt.title(f'Pred vs True t2m_f (window_size={window_size}, k={k}, {loss_name})')
            plt.legend()
            plt.grid(True)
            plt.savefig(os.path.join(pred_plot_dir, f't2m_f_timeseries_window{window_size}_k{k}_{loss_name}.png'))
            plt.close()

            # Store data for combined plots
            loss_curves[(k, loss_name)] = {'train': train_losses, 'val': val_losses, 'val_epochs': val_epochs}
            pred_data[(k, loss_name)] = {'preds': preds_t2m[:, 0, 0].numpy(), 'trues': trues_t2m[:, 0, 0].numpy()}

    # Combined loss curves
    fig, axes = plt.subplots(len(k_values), len(loss_fns), figsize=(15, 10), sharex=True, sharey=True)
    for i, k in enumerate(k_values):
        for j, loss_name in enumerate(loss_fns):
            ax = axes[i, j]
            data = loss_curves.get((k, loss_name), None)
            if data:
                ax.plot(range(1, len(data['train']) + 1), data['train'], label='Train')
                ax.plot(data['val_epochs'], data['val'], label='Val', marker='o')
                ax.set_title(f'k={k}, {loss_name}')
                ax.legend()
    plt.tight_layout()
    plt.savefig(os.path.join(loss_plot_dir, f'combined_loss_window{window_size}.png'))
    plt.show()

    # Combined pred vs true
    fig, axes = plt.subplots(len(k_values), len(loss_fns), figsize=(15, 10), sharex=True, sharey=True)
    for i, k in enumerate(k_values):
        for j, loss_name in enumerate(loss_fns):
            ax = axes[i, j]
            data = pred_data.get((k, loss_name), None)
            if data:
                num_steps = min(100, len(data['preds']))
                ax.plot(data['preds'][:num_steps], label='Pred', linestyle='--')
                ax.plot(data['trues'][:num_steps], label='True')
                ax.set_title(f'k={k}, {loss_name}')
                ax.legend()
    plt.tight_layout()
    plt.savefig(os.path.join(pred_plot_dir, f'combined_pred_vs_true_window{window_size}.png'))
    plt.show()

# Chosen Model

In [None]:
import time
import numpy as np
import torch as t
import torch.nn as nn
import matplotlib.pyplot as plt
from torch_geometric.data import Data
from torch_geometric.nn import GCNConv
from sklearn.neighbors import NearestNeighbors
import gc
import os

device = t.device('cuda' if t.cuda.is_available() else 'cpu')
print(f"Using device: {device}")

# Load tensors
train_inputs_tensor = t.load('f:/weather_forecasting/notebooks/final project/tensors/train_inputs_norm.pt').to(device)
train_targets_tensor = t.load('f:/weather_forecasting/notebooks/final project/tensors/train_targets_norm.pt').to(device)
print(f"Tensors loaded: train inputs {train_inputs_tensor.shape}, train targets {train_targets_tensor.shape}")
print(f"Normalized t2m_f mean: {train_targets_tensor[:, :, 5, :].mean().item():.2f}, std: {train_targets_tensor[:, :, 5, :].std().item():.2f}")

# Temporary Train/Test split
train_size = int(0.9 * train_inputs_tensor.shape[0])
print(f"Train size: {train_size}")
temp_train_inputs = train_inputs_tensor[:train_size]
temp_train_targets = train_targets_tensor[:train_size]
temp_test_inputs = train_inputs_tensor[train_size:]
temp_test_targets = train_targets_tensor[train_size:]
print(f"Train/test split: train shape {temp_train_inputs.shape}, test shape {temp_test_inputs.shape}")

# Move tensors to GPU upfront
temp_train_inputs = temp_train_inputs.to(device)
temp_train_targets = temp_train_targets.to(device)
temp_test_inputs = temp_test_inputs.to(device)
temp_test_targets = temp_test_targets.to(device)

# Edge index (computed on CPU, then moved to GPU)
num_nodes = 23937
k = 8
lat_subset = np.linspace(50, 25, 101)
lon_subset = np.linspace(235, 294, 237)
coords = np.stack(np.meshgrid(lat_subset, lon_subset, indexing='ij'), axis=-1).reshape(-1, 2)
nbrs = NearestNeighbors(n_neighbors=k+1).fit(coords)
_, indices = nbrs.kneighbors(coords)
edge_index = t.tensor(np.stack([np.repeat(np.arange(num_nodes), k), indices[:, 1:].flatten()]), dtype=t.long).to(device)

# Model (simplified GNN without temporal layer)
class WeatherGNN(t.nn.Module):
    def __init__(self, num_features=15, hidden_dims=128, num_outputs=1):
        super().__init__()
        self.conv1 = GCNConv(num_features, hidden_dims)
        self.conv2 = GCNConv(hidden_dims, hidden_dims)
        self.conv3 = GCNConv(hidden_dims, num_outputs)
        self.dropout = t.nn.Dropout(0.3)
        self.residual = t.nn.Linear(num_features, num_outputs)
        self.res_weight = t.nn.Parameter(t.tensor(2.0))

    def forward(self, x, edge_index):
        residual = self.residual(x) * self.res_weight
        x = self.conv1(x, edge_index).relu()
        x = self.dropout(x)
        x = self.conv2(x, edge_index).relu()
        x = self.dropout(x)
        x = self.conv3(x, edge_index)
        return x + residual

# Define L1 loss function directly
def l1_loss(x, y):
    return t.mean(t.abs(x - y))

# Pre-compute t2m_f statistics for denormalization
t2m_f_std = train_targets_tensor[:, :, 5, :].std().item()
t2m_f_mean = train_targets_tensor[:, :, 5, :].mean().item()
t2m_f_mean = 42.36  # °F
t2m_f_std = 21.75   # °F

# Training loop with early stopping
num_epochs = 10
val_steps = list(range(0, temp_test_inputs.shape[0] - 1, 2))
patience = 5
best_val_loss = float('inf')
patience_counter = 0
best_model_state = None

print("\nTraining with L1 loss for t2m_f prediction...")
t.cuda.empty_cache()
gc.collect()
model = WeatherGNN(num_features=15, hidden_dims=128, num_outputs=1).to(device)
optimizer = t.optim.Adam(model.parameters(), lr=0.01)
scheduler = t.optim.lr_scheduler.StepLR(optimizer, step_size=5, gamma=0.9)
train_losses, val_losses = [], []
val_epochs = []

for epoch in range(num_epochs):
    epoch_start = time.time()
    print(f"Starting Epoch {epoch+1}/{num_epochs}")
    model.train()
    total_loss = 0
    for t_step in range(temp_train_inputs.shape[0] - 1):
        input_x = temp_train_inputs[t_step].reshape(num_nodes, -1)
        target_y = temp_train_targets[t_step, :, 5, :].reshape(num_nodes, -1)
        optimizer.zero_grad()
        out = model(input_x, edge_index)
        loss = l1_loss(out, target_y)
        loss.backward()
        optimizer.step()
        total_loss += loss.item()
    scheduler.step()
    avg_loss = total_loss / (temp_train_inputs.shape[0] - 1)
    train_losses.append(avg_loss)
    epoch_time = time.time() - epoch_start
    print(f"Finished Epoch {epoch+1}/{num_epochs}, Train Loss: {avg_loss:.4f}, LR: {scheduler.get_last_lr()[0]:.6f}, Total runtime: {epoch_time:.2f}s")

    if epoch % 2 == 0 or epoch == num_epochs - 1:
        val_start = time.time()
        print(f"Starting Validation for Epoch {epoch+1}")
        model.eval()
        val_loss = 0
        with t.no_grad():
            for t_step in val_steps:
                input_x = temp_test_inputs[t_step].reshape(num_nodes, -1)
                target_y = temp_test_targets[t_step, :, 5, :].reshape(num_nodes, -1)
                out = model(input_x, edge_index)
                val_loss += l1_loss(out, target_y).item()
        val_loss = val_loss / len(val_steps)
        val_losses.append(val_loss)
        val_time = time.time() - val_start
        val_epochs.append(epoch + 1)
        print(f"Finished Validation for Epoch {epoch+1}, Validation Loss: {val_loss:.4f}, Total runtime: {val_time:.2f}s")

        # Early stopping
        if val_loss < best_val_loss:
            best_val_loss = val_loss
            patience_counter = 0
            best_model_state = model.state_dict()
        else:
            patience_counter += 1
            if patience_counter >= patience:
                print(f"Early stopping at epoch {epoch+1}: Validation loss has not improved for {patience} validations.")
                break

# Save the best model state
t.save(best_model_state, 'best_weather_gnn_model.pth')
print("Best model saved to 'best_weather_gnn_model.pth'")

# Chosen Model: Different Learning Rates

In [None]:
import time
import numpy as np
import torch as t
import torch.nn as nn
import matplotlib.pyplot as plt
from torch_geometric.data import Data
from torch_geometric.nn import GCNConv
from sklearn.neighbors import NearestNeighbors
import gc
import os
import subprocess
from datetime import datetime

device = t.device('cuda' if t.cuda.is_available() else 'cpu')
print(f"Using device: {device}")

# Load tensors
train_inputs_tensor = t.load('f:/weather_forecasting/notebooks/final project/tensors/train_inputs_norm.pt').to(device)
train_targets_tensor = t.load('f:/weather_forecasting/notebooks/final project/tensors/train_targets_norm.pt').to(device)
print(f"Tensors loaded: train inputs {train_inputs_tensor.shape}, train targets {train_targets_tensor.shape}")
print(f"Normalized t2m_f mean: {train_targets_tensor[:, :, 5, :].mean().item():.2f}, std: {train_targets_tensor[:, :, 5, :].std().item():.2f}")

# Temporary Train/Test split
train_size = int(0.9 * train_inputs_tensor.shape[0])
print(f"Train size: {train_size}")
temp_train_inputs = train_inputs_tensor[:train_size]
temp_train_targets = train_targets_tensor[:train_size]
temp_test_inputs = train_inputs_tensor[train_size:]
temp_test_targets = train_targets_tensor[train_size:]
print(f"Train/test split: train shape {temp_train_inputs.shape}, test shape {temp_test_inputs.shape}")

# Move tensors to GPU upfront
temp_train_inputs = temp_train_inputs.to(device)
temp_train_targets = temp_train_targets.to(device)
temp_test_inputs = temp_test_inputs.to(device)
temp_test_targets = temp_test_targets.to(device)

# Edge index (computed on CPU, then moved to GPU)
num_nodes = 23937
k = 8
lat_subset = np.linspace(50, 25, 101)
lon_subset = np.linspace(235, 294, 237)
coords = np.stack(np.meshgrid(lat_subset, lon_subset, indexing='ij'), axis=-1).reshape(-1, 2)
nbrs = NearestNeighbors(n_neighbors=k+1).fit(coords)
_, indices = nbrs.kneighbors(coords)
edge_index = t.tensor(np.stack([np.repeat(np.arange(num_nodes), k), indices[:, 1:].flatten()]), dtype=t.long).to(device)

# Model (simplified GNN without temporal layer)
class WeatherGNN(t.nn.Module):
    def __init__(self, num_features=15, hidden_dims=128, num_outputs=1):
        super().__init__()
        self.conv1 = GCNConv(num_features, hidden_dims)
        self.conv2 = GCNConv(hidden_dims, hidden_dims)
        self.conv3 = GCNConv(hidden_dims, num_outputs)
        self.dropout = t.nn.Dropout(0.3)
        self.residual = t.nn.Linear(num_features, num_outputs)
        self.res_weight = t.nn.Parameter(t.tensor(2.0))

    def forward(self, x, edge_index):
        residual = self.residual(x) * self.res_weight
        x = self.conv1(x, edge_index).relu()
        x = self.dropout(x)
        x = self.conv2(x, edge_index).relu()
        x = self.dropout(x)
        x = self.conv3(x, edge_index)
        return x + residual

# Define L1 loss function directly
def l1_loss(x, y):
    return t.mean(t.abs(x - y))

# Pre-compute t2m_f statistics for denormalization
t2m_f_std = train_targets_tensor[:, :, 5, :].std().item()
t2m_f_mean = train_targets_tensor[:, :, 5, :].mean().item()
t2m_f_mean = 42.36  # °F
t2m_f_std = 21.75   # °F

# Learning rate configurations
learning_rates = [
    (0.01, False),   # Stationary
    (0.005, False),  # Stationary
    (0.001, False),  # Stationary
    (0.0005, False), # Stationary
    (0.01, True),    # Adaptive with StepLR
    (0.005, True),   # Adaptive with StepLR
    (0.001, True),   # Adaptive with StepLR
    (0.0005, True)   # Adaptive with StepLR
]

# Store results for plotting
all_train_losses = []
all_val_losses = []
all_val_epochs = []
labels = []

# Training loop for each learning rate
num_epochs = 18
val_steps = list(range(0, temp_test_inputs.shape[0] - 1, 2))
patience = 5
timestamp = datetime.now().strftime("%Y%m%d_%H%M%S")

for lr, use_adaptive in learning_rates:
    # Generate unique model filename
    lr_str = str(lr).replace('.', 'p')
    scheduler_str = 'adaptive' if use_adaptive else 'stationary'
    model_filename = f'model_lr{lr_str}_{scheduler_str}_{timestamp}.pth'
    
    print(f"\nTraining with learning rate {lr}, Scheduler: {scheduler_str}...")
    t.cuda.empty_cache()
    gc.collect()
    model = WeatherGNN(num_features=15, num_outputs=1).to(device)
    optimizer = t.optim.Adam(model.parameters(), lr=lr)
    scheduler = t.optim.lr_scheduler.StepLR(optimizer, step_size=3, gamma=0.9) if use_adaptive else None
    train_losses, val_losses = [], []
    val_epochs = []
    best_val_loss = float('inf')
    patience_counter = 0
    best_model_state = None

    for epoch in range(num_epochs):
        epoch_start = time.time()
        print(f"Starting Epoch {epoch+1}/{num_epochs}")
        model.train()
        total_loss = 0
        for t_step in range(temp_train_inputs.shape[0] - 1):
            input_x = temp_train_inputs[t_step].reshape(num_nodes, -1)
            target_y = temp_train_targets[t_step, :, 5, :].reshape(num_nodes, -1)
            optimizer.zero_grad()
            out = model(input_x, edge_index)
            loss = l1_loss(out, target_y)
            loss.backward()
            optimizer.step()
            total_loss += loss.item()
        if scheduler:
            scheduler.step()
        avg_loss = total_loss / (temp_train_inputs.shape[0] - 1)
        train_losses.append(avg_loss)
        epoch_time = time.time() - epoch_start
        print(f"Finished Epoch {epoch+1}/{num_epochs}, Train Loss: {avg_loss:.4f}, LR: {optimizer.param_groups[0]['lr']:.6f}, Total runtime: {epoch_time:.2f}s")

        if epoch % 2 == 0 or epoch == num_epochs - 1:
            val_start = time.time()
            print(f"Starting Validation for Epoch {epoch+1}")
            model.eval()
            val_loss = 0
            with t.no_grad():
                for t_step in val_steps:
                    input_x = temp_test_inputs[t_step].reshape(num_nodes, -1)
                    target_y = temp_test_targets[t_step, :, 5, :].reshape(num_nodes, -1)
                    out = model(input_x, edge_index)
                    val_loss += l1_loss(out, target_y).item()
            val_loss = val_loss / len(val_steps)
            val_losses.append(val_loss)
            val_epochs.append(epoch + 1)
            val_time = time.time() - val_start
            print(f"Finished Validation for Epoch {epoch+1}, Validation Loss: {val_loss:.4f}, Total runtime: {val_time:.2f}s")

            # Early stopping
            if val_loss < best_val_loss:
                best_val_loss = val_loss
                patience_counter = 0
                best_model_state = model.state_dict()
            else:
                patience_counter += 1
                if patience_counter >= patience:
                    print(f"Early stopping at epoch {epoch+1}: Validation loss has not improved for {patience} validations.")
                    break

    # Save the best model state
    t.save(best_model_state, model_filename)
    print(f"Best model saved to '{model_filename}'")

    # Create executable with PyInstaller
    print(f"Starting creation of executable for {model_filename}")
    exe_start = time.time()
    # Create a temporary script for the model
    script_content = f"""
import torch
import torch.nn as nn
from torch_geometric.nn import GCNConv

class WeatherGNN(nn.Module):
    def __init__(self, num_features=15, num_outputs=1):
        super().__init__()
        self.conv1 = GCNConv(num_features, 128)
        self.conv2 = GCNConv(128, 128)
        self.conv3 = GCNConv(128, num_outputs)
        self.dropout = nn.Dropout(0.3)
        self.residual = nn.Linear(num_features, num_outputs)
        self.res_weight = nn.Parameter(torch.tensor(2.0))

    def forward(self, x, edge_index):
        residual = self.residual(x) * self.res_weight
        x = self.conv1(x, edge_index).relu()
        x = self.dropout(x)
        x = self.conv2(x, edge_index).relu()
        x = self.dropout(x)
        x = self.conv3(x, edge_index)
        return x + residual

def load_model(model_path):
    device = torch.device('cuda' if torch.cuda.is_available() else 'cpu')
    model = WeatherGNN(num_features=15, num_outputs=1).to(device)
    model.load_state_dict(torch.load(model_path))
    model.eval()
    return model

if __name__ == '__main__':
    model = load_model('{model_filename}')
    print(f"Loaded model from {model_filename}")
"""
    temp_script_path = f'temp_model_script_{lr_str}_{scheduler_str}_{timestamp}.py'
    with open(temp_script_path, 'w') as f:
        f.write(script_content)

    # Run PyInstaller to create executable
    exe_filename = f'model_lr{lr_str}_{scheduler_str}_{timestamp}'
    try:
        subprocess.run([
            'pyinstaller',
            '--onefile',
            '--name', exe_filename,
            temp_script_path
        ], check=True)
        print(f"Executable creation completed: {exe_filename}.exe in dist directory")
    except subprocess.CalledProcessError as e:
        print(f"Failed to create executable: {e}")
    finally:
        # Clean up temporary script
        if os.path.exists(temp_script_path):
            os.remove(temp_script_path)
    exe_time = time.time() - exe_start
    print(f"Executable creation took {exe_time:.2f} seconds")

    # Store results for plotting
    all_train_losses.append(train_losses)
    all_val_losses.append(val_losses)
    all_val_epochs.append(val_epochs)
    labels.append(f'lr={lr}, {scheduler_str}')

# Plot loss curves in subplots
fig, axes = plt.subplots(4, 2, figsize=(15, 20))
axes = axes.flatten()
for i, (train_losses, val_losses, val_epochs, label) in enumerate(zip(all_train_losses, all_val_losses, all_val_epochs, labels)):
    ax = axes[i]
    ax.plot(range(1, len(train_losses) + 1), train_losses, label='Train Loss', color='blue')
    ax.plot(val_epochs, val_losses, label='Validation Loss', color='orange', marker='o')
    ax.set_xlabel('Epoch')
    ax.set_ylabel('L1 Loss (Normalized)')
    ax.set_title(f'Loss Curves ({label})')
    ax.legend()
    ax.grid(True)
fig.tight_layout()
plt.savefig(f'loss_curves_comparison_{timestamp}.png')
plt.show()

# Plot training vs validation loss in subplots
fig, axes = plt.subplots(4, 2, figsize=(15, 20))
axes = axes.flatten()
for i, (train_losses, val_losses, val_epochs, label) in enumerate(zip(all_train_losses, all_val_losses, all_val_epochs, labels)):
    ax = axes[i]
    # Interpolate validation losses to match training loss length
    val_losses_interp = np.interp(range(1, len(train_losses) + 1), val_epochs, val_losses)
    ax.scatter(train_losses, val_losses_interp, marker='o')
    ax.set_xlabel('Training Loss')
    ax.set_ylabel('Validation Loss')
    ax.set_title(f'Training vs Validation Loss ({label})')
    ax.grid(True)
fig.tight_layout()
plt.savefig(f'train_vs_val_loss_comparison_{timestamp}.png')
plt.show()

# Chosen Model: Extended Training

In [None]:
import time
import numpy as np
import torch as t
import torch.nn as nn
import matplotlib.pyplot as plt
from torch_geometric.data import Data
from torch_geometric.nn import GCNConv
from sklearn.neighbors import NearestNeighbors
import gc
import os

device = t.device('cuda' if t.cuda.is_available() else 'cpu')
print(f"Using device: {device}")

# Load tensors
train_inputs_tensor = t.load('f:/weather_forecasting/notebooks/final project/tensors/train_inputs_norm.pt').to(device)
train_targets_tensor = t.load('f:/weather_forecasting/notebooks/final project/tensors/train_targets_norm.pt').to(device)
print(f"Tensors loaded: train inputs {train_inputs_tensor.shape}, train targets {train_targets_tensor.shape}")
print(f"Normalized t2m_f mean: {train_targets_tensor[:, :, 5, :].mean().item():.2f}, std: {train_targets_tensor[:, :, 5, :].std().item():.2f}")

# Temporary Train/Test split
train_size = int(0.9 * train_inputs_tensor.shape[0])
print(f"Train size: {train_size}")
temp_train_inputs = train_inputs_tensor[:train_size]
temp_train_targets = train_targets_tensor[:train_size]
temp_test_inputs = train_inputs_tensor[train_size:]
temp_test_targets = train_targets_tensor[train_size:]
print(f"Train/test split: train shape {temp_train_inputs.shape}, test shape {temp_test_inputs.shape}")

# Move tensors to GPU upfront
temp_train_inputs = temp_train_inputs.to(device)
temp_train_targets = temp_train_targets.to(device)
temp_test_inputs = temp_test_inputs.to(device)
temp_test_targets = temp_test_targets.to(device)

# Edge index (computed on CPU, then moved to GPU)
num_nodes = 23937
k = 8
lat_subset = np.linspace(50, 25, 101)
lon_subset = np.linspace(235, 294, 237)
coords = np.stack(np.meshgrid(lat_subset, lon_subset, indexing='ij'), axis=-1).reshape(-1, 2)
nbrs = NearestNeighbors(n_neighbors=k+1).fit(coords)
_, indices = nbrs.kneighbors(coords)
edge_index = t.tensor(np.stack([np.repeat(np.arange(num_nodes), k), indices[:, 1:].flatten()]), dtype=t.long).to(device)

# Model (updated with third GCNConv layer, lower hidden dimensions)
class WeatherGNN(t.nn.Module):
    def __init__(self, num_features=15, num_outputs=1):
        super().__init__()
        self.conv1 = GCNConv(num_features, 128)  # Lowered hidden_dim
        self.conv2 = GCNConv(128, 128)
        self.conv3 = GCNConv(128, num_outputs)  # Added third layer
        self.dropout = t.nn.Dropout(0.3)  # Increased dropout
        self.residual = t.nn.Linear(num_features, num_outputs)
        self.res_weight = t.nn.Parameter(t.tensor(2.0))

    def forward(self, x, edge_index):
        residual = self.residual(x) * self.res_weight
        x = self.conv1(x, edge_index).relu()
        x = self.dropout(x)
        x = self.conv2(x, edge_index).relu()
        x = self.dropout(x)
        x = self.conv3(x, edge_index)
        return x + residual

# Loss function (L1 only for now)
loss_fns = {
    'L1': lambda x, y: t.mean(t.abs(x - y))
}

# Pre-compute t2m_f statistics for denormalization (index 5)
t2m_f_std = train_targets_tensor[:, :, 5, :].std().item()
t2m_f_mean = train_targets_tensor[:, :, 5, :].mean().item()
# Use previously validated stats for accuracy
t2m_f_mean = 42.36  # °F
t2m_f_std = 21.75   # °F

# Training loop with early stopping
num_epochs = 35  # Reduced epochs
val_steps = list(range(0, temp_test_inputs.shape[0] - 1, 2))  # Validate every 2 steps
patience = 5  # Early stopping patience
best_val_loss = float('inf')
patience_counter = 0
best_model_state = None

for loss_name, criterion in loss_fns.items():
    print(f"\nTraining with {loss_name} loss for t2m_f prediction...")
    t.cuda.empty_cache()
    gc.collect()
    model = WeatherGNN(num_features=15, num_outputs=1).to(device)
    optimizer = t.optim.Adam(model.parameters(), lr=0.01)
    scheduler = t.optim.lr_scheduler.StepLR(optimizer, step_size=5, gamma=0.9)  # Slower decay
    train_losses, val_losses = [], []

    for epoch in range(num_epochs):
        epoch_start = time.time()
        print(f"Starting Epoch {epoch+1}/{num_epochs}")
        model.train()
        total_loss = 0
        for t_step in range(temp_train_inputs.shape[0] - 1):
            input_x = temp_train_inputs[t_step].reshape(num_nodes, -1)  # (23937, 15)
            target_y = temp_train_targets[t_step, :, 5, :].reshape(num_nodes, -1)  # (23937, 1) for t2m_f
            optimizer.zero_grad()
            out = model(input_x, edge_index)
            loss = criterion(out, target_y)
            loss.backward()
            optimizer.step()
            total_loss += loss.item()
        scheduler.step()
        avg_loss = total_loss / (temp_train_inputs.shape[0] - 1)
        train_losses.append(avg_loss)
        epoch_time = time.time() - epoch_start
        print(f"Finished Epoch {epoch+1}/{num_epochs}, Train Loss: {avg_loss:.4f}, LR: {scheduler.get_last_lr()[0]:.6f}, Total runtime: {epoch_time:.2f}s")

        if epoch % 5 == 0 or epoch == num_epochs - 1:
            val_start = time.time()
            print(f"Starting Validation for Epoch {epoch+1}")
            model.eval()
            val_loss = 0
            with t.no_grad():
                for t_step in val_steps:
                    input_x = temp_test_inputs[t_step].reshape(num_nodes, -1)
                    target_y = temp_test_targets[t_step, :, 5, :].reshape(num_nodes, -1)
                    out = model(input_x, edge_index)
                    val_loss += criterion(out, target_y).item()
            val_loss = val_loss / len(val_steps)
            val_losses.append(val_loss)
            val_time = time.time() - val_start
            print(f"Finished Validation for Epoch {epoch+1}, Validation Loss: {val_loss:.4f}, Total runtime: {val_time:.2f}s")

            # Early stopping
            if val_loss < best_val_loss:
                best_val_loss = val_loss
                patience_counter = 0
                best_model_state = model.state_dict()
            else:
                patience_counter += 1
                if patience_counter >= patience:
                    print(f"Early stopping at epoch {epoch+1}: Validation loss has not improved for {patience} validations.")
                    break

    # Save the best model state
    t.save(best_model_state, r'f:\weather_forecasting\notebooks\final project\models\paths\best_weather_gnn_model_ext_training.pth')
    print("Best model saved to 'f:/weather_forecasting/notebooks/final project/models/paths/best_weather_gnn_model_ext_training.pth'")

    # Load best model state
    if best_model_state is not None:
        model.load_state_dict(best_model_state)
        print("Loaded best model state based on validation loss.")

    # Test evaluation
    test_start = time.time()
    print(f"Starting Final Test Evaluation for {loss_name} loss")
    model.eval()
    test_preds, test_trues = [], []
    with t.no_grad():
        for t_step in range(temp_test_inputs.shape[0] - 1):
            input_x = temp_test_inputs[t_step].reshape(num_nodes, -1)
            target_y = temp_test_targets[t_step, :, 5, :].reshape(num_nodes, -1)
            out = model(input_x, edge_index)
            test_preds.append(out.cpu())
            test_trues.append(target_y.cpu())
    test_preds, test_trues = t.stack(test_preds), t.stack(test_trues)
    test_time = time.time() - test_start
    print(f"Finished Final Test Evaluation for {loss_name} loss, Total runtime: {test_time:.2f}s")

    # Denormalize and evaluate t2m_f
    preds_t2m = test_preds * t2m_f_std + t2m_f_mean
    trues_t2m = test_trues * t2m_f_std + t2m_f_mean
    mae_t2m = t.mean(t.abs(preds_t2m - trues_t2m)).item()
    rmse_t2m = t.sqrt(t.mean((preds_t2m - trues_t2m) ** 2)).item()
    print(f"t2m_f L1 norm (°F): {mae_t2m:.2f}")
    print(f"t2m_f RMSE (°F): {rmse_t2m:.2f}")

    # Denormalize sample outputs
    sample_preds = preds_t2m[:5, 0, 0].numpy()
    sample_targets = trues_t2m[:5, 0, 0].numpy()
    print(f"Sample t2m_f preds (°F): {sample_preds}")
    print(f"Sample t2m_f targets (°F): {sample_targets}")

    # Loss Plot
    plt.figure(figsize=(10, 6))
    plt.plot(range(1, len(train_losses) + 1), train_losses, label=f'Train Loss ({loss_name})', color='blue')
    plt.plot([i * 5 for i in range(len(val_losses))], val_losses, label='Validation Loss', color='orange', marker='o')
    plt.xlabel('Epoch')
    plt.ylabel(f'{loss_name} Loss (Normalized)')
    plt.title(f'Training and Validation Loss for t2m_f ({loss_name})')
    plt.legend()
    plt.grid(True)
    plt.savefig(f'loss_plot_t2m_f_{loss_name}.png')
    plt.show()

    # Time Series Plot for t2m_f
    plot_start = time.time()
    print("Starting Time Series Plotting for t2m_f")
    plt.figure(figsize=(10, 6))
    plt.plot(preds_t2m[:, 0, 0].numpy(), label='Pred t2m_f', color='red', linestyle='--')
    plt.plot(trues_t2m[:, 0, 0].numpy(), label='True t2m_f', color='green')
    plt.xlabel('Time Step')
    plt.ylabel('Temperature (°F)')
    plt.title('Predicted vs Actual t2m_f')
    plt.legend()
    plt.grid(True)
    plt.savefig('t2m_f_timeseries.png')
    print(f"Finished Time Series Plotting, Total runtime: {time.time() - plot_start:.2f}s")
    plt.show()

In [None]:
# Load best model state
if best_model_state is not None:
    model.load_state_dict(best_model_state)
    print("Loaded best model state based on validation loss.")

# Test evaluation
test_start = time.time()
print("Starting Final Test Evaluation")
model.eval()
test_preds, test_trues = [], []
with t.no_grad():
    for t_step in range(temp_test_inputs.shape[0] - 1):
        input_x = temp_test_inputs[t_step].reshape(num_nodes, -1)
        target_y = temp_test_targets[t_step, :, 5, :].reshape(num_nodes, -1)
        out = model(input_x, edge_index)
        test_preds.append(out.cpu())
        test_trues.append(target_y.cpu())
test_preds, test_trues = t.stack(test_preds), t.stack(test_trues)
test_time = time.time() - test_start
print(f"Finished Final Test Evaluation, Total runtime: {test_time:.2f}s")

# Denormalize and evaluate t2m_f
preds_t2m = test_preds * t2m_f_std + t2m_f_mean
trues_t2m = test_trues * t2m_f_std + t2m_f_mean
mae_t2m = t.mean(t.abs(preds_t2m - trues_t2m)).item()
rmse_t2m = t.sqrt(t.mean((preds_t2m - trues_t2m) ** 2)).item()
print(f"t2m_f L1 norm (°F): {mae_t2m:.2f}")
print(f"t2m_f RMSE (°F): {rmse_t2m:.2f}")

# Denormalize sample outputs
sample_preds = preds_t2m[:5, 0, 0].numpy()
sample_targets = trues_t2m[:5, 0, 0].numpy()
print(f"Sample t2m_f preds (°F): {sample_preds}")
print(f"Sample t2m_f targets (°F): {sample_targets}")

# Loss Plot
plt.figure(figsize=(10, 6))
plt.plot(range(1, len(train_losses) + 1), train_losses, label='Train Loss', color='blue')
plt.plot(val_epochs, val_losses, label='Validation Loss', color='orange', marker='o')
plt.xlabel('Epoch')
plt.ylabel('L1 Loss (Normalized)')
plt.title('Training and Validation Loss for t2m_f')
plt.legend()
plt.grid(True)
plt.savefig('loss_plot_t2m_f.png')
plt.show()

# Time Series Plot for t2m_f
plot_start = time.time()
print("Starting Time Series Plotting for t2m_f")
plt.figure(figsize=(10, 6))
plt.plot(preds_t2m[:, 0, 0].numpy(), label='Pred t2m_f', color='red', linestyle='--')
plt.plot(trues_t2m[:, 0, 0].numpy(), label='True t2m_f', color='green')
plt.xlabel('Time Step')
plt.ylabel('Temperature (°F)')
plt.title('Predicted vs Actual t2m_f')
plt.legend()
plt.grid(True)
plt.savefig('t2m_f_timeseries.png')
print(f"Finished Time Series Plotting, Total runtime: {time.time() - plot_start:.2f}s")
plt.show()

# Testing function to load and test the model on new data
def test_model(model_path, inputs_tensor, targets_tensor, edge_index, num_nodes, device, t2m_f_mean, t2m_f_std, lat_subset, lon_subset):
    model = WeatherGNN(num_features=15, num_outputs=1).to(device)
    model.load_state_dict(t.load(model_path))
    model.eval()
    print(f"Model loaded from {model_path}")

    test_preds, test_trues = [], []
    with t.no_grad():
        for t_step in range(inputs_tensor.shape[0] - 1):
            input_x = inputs_tensor[t_step].reshape(num_nodes, -1)
            target_y = targets_tensor[t_step, :, 5, :].reshape(num_nodes, -1)
            out = model(input_x, edge_index)
            test_preds.append(out.cpu())
            test_trues.append(target_y.cpu())
    test_preds, test_trues = t.stack(test_preds), t.stack(test_trues)

    # Denormalize and evaluate t2m_f
    preds_t2m = test_preds * t2m_f_std + t2m_f_mean
    trues_t2m = test_trues * t2m_f_std + t2m_f_mean
    mae_t2m = t.mean(t.abs(preds_t2m - trues_t2m)).item()
    rmse_t2m = t.sqrt(t.mean((preds_t2m - trues_t2m) ** 2)).item()
    print(f"t2m_f L1 norm (°F): {mae_t2m:.2f}")
    print(f"t2m_f RMSE (°F): {rmse_t2m:.2f}")

    # Plot variation at specific nodes
    nodes_to_plot = [0, 236, 8926, 23500]  # Example: NW, NE, Central, SW
    plt.figure(figsize=(12, 8))
    for idx in nodes_to_plot:
        plt.plot(preds_t2m[:, idx, 0].numpy(), label=f'Pred Node {idx}', linestyle='--')
        plt.plot(trues_t2m[:, idx, 0].numpy(), label=f'True Node {idx}', linestyle='-')
    plt.xlabel('Time Step')
    plt.ylabel('Temperature (°F)')
    plt.title('Predicted vs True t2m_f at Selected Nodes')
    plt.legend()
    plt.grid(True)
    plt.savefig('t2m_f_variation_nodes.png')
    plt.show()

    # Plot spatial difference for first time step
    first_pred = preds_t2m[0, :, 0].cpu().numpy()
    first_true = trues_t2m[0, :, 0].cpu().numpy()
    diff = np.abs(first_pred - first_true)
    diff_grid = diff.reshape(101, 237)
    plt.figure(figsize=(15, 5))
    plt.contourf(lon_subset, lat_subset, diff_grid, cmap='RdBu_r', levels=np.linspace(0, 5, 21))
    plt.colorbar(label='Absolute Difference (°F)')
    plt.title('Absolute Difference (Pred - True) for t2m_f at First Test Time Step')
    plt.xlabel('Longitude (°E)')
    plt.ylabel('Latitude (°N)')
    plt.savefig('t2m_f_grid_difference.png')
    plt.show()

# Example usage of test_model (uncomment to use with new data)
# test_model('best_weather_gnn_model.pth', new_inputs_tensor, new_targets_tensor, edge_index, num_nodes, device, t2m_f_mean, t2m_f_std, lat_subset, lon_subset)

# Early Testing and Model Evaluation

In [None]:
# Testing function to load and test the model on test data
def test_model(model_path, inputs_tensor, targets_tensor, edge_index, num_nodes, device, t2m_f_mean, t2m_f_std, lat_subset, lon_subset):
    model = WeatherGNN(num_features=15, num_outputs=1).to(device)
    model.load_state_dict(t.load(model_path))
    model.eval()
    print(f"Model loaded from {model_path}")

    # Validate inputs
    print(f"Inputs tensor shape: {inputs_tensor.shape}")
    print(f"Targets tensor shape: {targets_tensor.shape}")
    if inputs_tensor.shape[0] < 2 or targets_tensor.shape[0] < 2:
        raise ValueError("Input and target tensors must have at least 2 time steps for prediction.")

    test_preds, test_trues = [], []
    with t.no_grad():
        for t_step in range(inputs_tensor.shape[0] - 1):
            input_x = inputs_tensor[t_step].reshape(num_nodes, -1)
            target_y = targets_tensor[t_step, :, 5, :].reshape(num_nodes, -1)
            out = model(input_x, edge_index)
            test_preds.append(out.cpu())
            test_trues.append(target_y.cpu())
    test_preds, test_trues = t.stack(test_preds), t.stack(test_trues)

    # Denormalize and evaluate t2m_f
    preds_t2m = test_preds * t2m_f_std + t2m_f_mean
    trues_t2m = test_trues * t2m_f_std + t2m_f_mean
    mae_t2m = t.mean(t.abs(preds_t2m - trues_t2m)).item()
    rmse_t2m = t.sqrt(t.mean((preds_t2m - trues_t2m) ** 2)).item()
    print(f"t2m_f L1 norm (°F): {mae_t2m:.2f}")
    print(f"t2m_f RMSE (°F): {rmse_t2m:.2f}")

    # Plot variation at specific nodes
    nodes_to_plot = [0, 236, 8926, 23500]  # Example: NW, NE, Central, SW
    plt.figure(figsize=(12, 8))
    for idx in nodes_to_plot:
        plt.plot(preds_t2m[:, idx, 0].numpy(), label=f'Pred Node {idx}', linestyle='--')
        plt.plot(trues_t2m[:, idx, 0].numpy(), label=f'True Node {idx}', linestyle='-')
    plt.xlabel('Time Step')
    plt.ylabel('Temperature (°F)')
    plt.title('Predicted vs True t2m_f at Selected Nodes')
    plt.legend()
    plt.grid(True)
    plt.savefig('t2m_f_variation_nodes.png')
    plt.show()

    # Debug and plot spatial difference for first time step
    first_pred = preds_t2m[0, :, 0].cpu().numpy()
    first_true = trues_t2m[0, :, 0].cpu().numpy()
    print(f"First predicted t2m_f shape: {first_pred.shape}")
    print(f"First true t2m_f shape: {first_true.shape}")
    print(f"First predicted t2m_f sample: {first_pred[:5]}")
    print(f"First true t2m_f sample: {first_true[:5]}")

    # Check for NaN or infinite values
    if np.any(np.isnan(first_pred)) or np.any(np.isnan(first_true)):
        raise ValueError("NaN values detected in predictions or true values.")
    if np.any(np.isinf(first_pred)) or np.any(np.isinf(first_true)):
        raise ValueError("Infinite values detected in predictions or true values.")

    diff = np.abs(first_pred - first_true)
    print(f"Difference min: {np.min(diff):.2f}, max: {np.max(diff):.2f}, mean: {np.mean(diff):.2f}")

    # Reshape difference to 2D grid
    if diff.shape[0] != num_nodes:
        raise ValueError(f"Expected {num_nodes} nodes, but got {diff.shape[0]}")
    if len(lat_subset) * len(lon_subset) != num_nodes:
        raise ValueError(f"Grid size mismatch: lat_subset ({len(lat_subset)}) * lon_subset ({len(lon_subset)}) != num_nodes ({num_nodes})")
    diff_grid = diff.reshape(len(lat_subset), len(lon_subset))

    # Ensure matplotlib backend is set for inline display
    try:
        import matplotlib
        matplotlib.use('Agg')  # Use a non-interactive backend for saving
    except Exception as e:
        print(f"Error setting matplotlib backend: {e}")

    plt.figure(figsize=(15, 5))
    contour = plt.contourf(lon_subset, lat_subset, diff_grid, cmap='RdBu_r', levels=np.linspace(0, max(np.max(diff), 1), 21))
    plt.colorbar(label='Absolute Difference (°F)')
    plt.title('Absolute Difference (Pred - True) for t2m_f at First Test Time Step')
    plt.xlabel('Longitude (°E)')
    plt.ylabel('Latitude (°N)')
    
    # Save the plot
    save_path = 't2m_f_grid_difference.png'
    plt.savefig(save_path)
    print(f"Spatial grid plot saved to: {os.path.abspath(save_path)}")
    
    # Display the plot
    plt.show()

# Run the test on the test dataset
test_model(
    model_path=r'f:\weather_forecasting\notebooks\final project\models\paths\best_weather_gnn_model_ext_training.pth',
    inputs_tensor=temp_test_inputs,
    targets_tensor=temp_test_targets,
    edge_index=edge_index,
    num_nodes=num_nodes,
    device=device,
    t2m_f_mean=t2m_f_mean,
    t2m_f_std=t2m_f_std,
    lat_subset=lat_subset,
    lon_subset=lon_subset
)

# Final Model

In [None]:
import time
import numpy as np
import torch as t
import torch.nn as nn
import matplotlib.pyplot as plt
from torch_geometric.data import Data
from torch_geometric.nn import GCNConv
from sklearn.neighbors import NearestNeighbors
import gc
import os

device = t.device('cuda' if t.cuda.is_available() else 'cpu')
print(f"Using device: {device}")

# Define directory for saving the model
model_save_dir = r'F:\weather_forecasting\notebooks\final project\models\Final Model'
os.makedirs(model_save_dir, exist_ok=True)

# Load tensors
train_inputs_tensor = t.load('f:/weather_forecasting/notebooks/final project/tensors/train_inputs_norm.pt').to(device)
train_targets_tensor = t.load('f:/weather_forecasting/notebooks/final project/tensors/train_targets_norm.pt').to(device)
print(f"Tensors loaded: train inputs {train_inputs_tensor.shape}, train targets {train_targets_tensor.shape}")
print(f"Normalized t2m_f mean: {train_targets_tensor[:, :, 5, :].mean().item():.2f}, std: {train_targets_tensor[:, :, 5, :].std().item():.2f}")

# Edge index (computed on CPU, then moved to GPU)
num_nodes = 23937
k = 8
lat_subset = np.linspace(50, 25, 101)
lon_subset = np.linspace(235, 294, 237)
coords = np.stack(np.meshgrid(lat_subset, lon_subset, indexing='ij'), axis=-1).reshape(-1, 2)
nbrs = NearestNeighbors(n_neighbors=k+1).fit(coords)
_, indices = nbrs.kneighbors(coords)
edge_index = t.tensor(np.stack([np.repeat(np.arange(num_nodes), k), indices[:, 1:].flatten()]), dtype=t.long).to(device)

# Model (simplified GNN without temporal layer)
class WeatherGNN(t.nn.Module):
    def __init__(self, num_features=15, hidden_dims=128, num_outputs=1):
        super().__init__()
        self.conv1 = GCNConv(num_features, hidden_dims)
        self.conv2 = GCNConv(hidden_dims, hidden_dims)
        self.conv3 = GCNConv(hidden_dims, num_outputs)
        self.dropout = t.nn.Dropout(0.3)
        self.residual = t.nn.Linear(num_features, num_outputs)
        self.res_weight = t.nn.Parameter(t.tensor(2.0))

    def forward(self, x, edge_index):
        residual = self.residual(x) * self.res_weight
        x = self.conv1(x, edge_index).relu()
        x = self.dropout(x)
        x = self.conv2(x, edge_index).relu()
        x = self.dropout(x)
        x = self.conv3(x, edge_index)
        return x + residual

# Define L1 loss function directly
def l1_loss(x, y):
    return t.mean(t.abs(x - y))

# Pre-compute t2m_f statistics for denormalization
t2m_f_std = train_targets_tensor[:, :, 5, :].std().item()
t2m_f_mean = train_targets_tensor[:, :, 5, :].mean().item()
t2m_f_mean = 42.36  # °F
t2m_f_std = 21.75   # °F

# Training loop with early stopping
num_epochs = 10
patience = 5
patience_counter = 0
best_model_state = None

print("\nTraining with L1 loss for t2m_f prediction...")
t.cuda.empty_cache()
gc.collect()
model = WeatherGNN(num_features=15, hidden_dims=128, num_outputs=1).to(device)
optimizer = t.optim.Adam(model.parameters(), lr=0.01)
scheduler = t.optim.lr_scheduler.StepLR(optimizer, step_size=5, gamma=0.9)
train_losses = []

for epoch in range(num_epochs):
    epoch_start = time.time()
    print(f"Starting Epoch {epoch+1}/{num_epochs}")
    model.train()
    total_loss = 0
    for t_step in range(train_inputs_tensor.shape[0] - 1):
        input_x = train_inputs_tensor[t_step].reshape(num_nodes, -1)
        target_y = train_targets_tensor[t_step, :, 5, :].reshape(num_nodes, -1)
        optimizer.zero_grad()
        out = model(input_x, edge_index)
        loss = l1_loss(out, target_y)
        loss.backward()
        optimizer.step()
        total_loss += loss.item()
    scheduler.step()
    avg_loss = total_loss / (train_inputs_tensor.shape[0] - 1)
    train_losses.append(avg_loss)
    epoch_time = time.time() - epoch_start
    print(f"Finished Epoch {epoch+1}/{num_epochs}, Train Loss: {avg_loss:.4f}, LR: {scheduler.get_last_lr()[0]:.6f}, Total runtime: {epoch_time:.2f}s")

# Save the trained model
model_path = os.path.join(model_save_dir, f'final_model.pth')
t.save(model.state_dict(), model_path)
print(f"Saved model to {model_path}")

In [None]:
import time
import numpy as np
import torch as t
import torch.nn as nn
import matplotlib.pyplot as plt
from torch_geometric.data import Data
from torch_geometric.nn import GCNConv
from sklearn.neighbors import NearestNeighbors
import gc
import os

device = t.device('cuda' if t.cuda.is_available() else 'cpu')
print(f"Using device: {device}")

# Define directory for saving the model
model_save_dir = r'F:\weather_forecasting\notebooks\final project\models\Final Model'
os.makedirs(model_save_dir, exist_ok=True)

# Load tensors
train_inputs_tensor = t.load('f:/weather_forecasting/notebooks/final project/tensors/train_inputs_norm.pt').to(device)
train_targets_tensor = t.load('f:/weather_forecasting/notebooks/final project/tensors/train_targets_norm.pt').to(device)
print(f"Tensors loaded: train inputs {train_inputs_tensor.shape}, train targets {train_targets_tensor.shape}")
print(f"Normalized t2m_f mean: {train_targets_tensor[:, :, 5, :].mean().item():.2f}, std: {train_targets_tensor[:, :, 5, :].std().item():.2f}")

# Edge index (computed on CPU, then moved to GPU)
num_nodes = 23937
k = 8
lat_subset = np.linspace(50, 25, 101)
lon_subset = np.linspace(235, 294, 237)
coords = np.stack(np.meshgrid(lat_subset, lon_subset, indexing='ij'), axis=-1).reshape(-1, 2)
nbrs = NearestNeighbors(n_neighbors=k+1).fit(coords)
_, indices = nbrs.kneighbors(coords)
edge_index = t.tensor(np.stack([np.repeat(np.arange(num_nodes), k), indices[:, 1:].flatten()]), dtype=t.long).to(device)

# Model (simplified GNN without temporal layer)
class WeatherGNN(t.nn.Module):
    def __init__(self, num_features=15, hidden_dims=128, num_outputs=1):
        super().__init__()
        self.conv1 = GCNConv(num_features, hidden_dims)
        self.conv2 = GCNConv(hidden_dims, hidden_dims)
        self.conv3 = GCNConv(hidden_dims, num_outputs)
        self.dropout = t.nn.Dropout(0.3)
        self.residual = t.nn.Linear(num_features, num_outputs)
        self.res_weight = t.nn.Parameter(t.tensor(2.0))

    def forward(self, x, edge_index):
        residual = self.residual(x) * self.res_weight
        x = self.conv1(x, edge_index).relu()
        x = self.dropout(x)
        x = self.conv2(x, edge_index).relu()
        x = self.dropout(x)
        x = self.conv3(x, edge_index)
        return x + residual

# Define L1 loss function directly
def l1_loss(x, y):
    return t.mean(t.abs(x - y))

# Pre-compute t2m_f statistics for denormalization
t2m_f_std = train_targets_tensor[:, :, 5, :].std().item()
t2m_f_mean = train_targets_tensor[:, :, 5, :].mean().item()
t2m_f_mean = 42.36  # °F
t2m_f_std = 21.75   # °F

# Training loop with early stopping
num_epochs = 35
patience = 5
patience_counter = 0
best_model_state = None

print("\nTraining with L1 loss for t2m_f prediction...")
t.cuda.empty_cache()
gc.collect()
model = WeatherGNN(num_features=15, hidden_dims=128, num_outputs=1).to(device)
optimizer = t.optim.Adam(model.parameters(), lr=0.01)
scheduler = t.optim.lr_scheduler.StepLR(optimizer, step_size=5, gamma=0.9)
train_losses = []

for epoch in range(num_epochs):
    epoch_start = time.time()
    print(f"Starting Epoch {epoch+1}/{num_epochs}")
    model.train()
    total_loss = 0
    for t_step in range(train_inputs_tensor.shape[0] - 1):
        input_x = train_inputs_tensor[t_step].reshape(num_nodes, -1)
        target_y = train_targets_tensor[t_step, :, 5, :].reshape(num_nodes, -1)
        optimizer.zero_grad()
        out = model(input_x, edge_index)
        loss = l1_loss(out, target_y)
        loss.backward()
        optimizer.step()
        total_loss += loss.item()
    scheduler.step()
    avg_loss = total_loss / (train_inputs_tensor.shape[0] - 1)
    train_losses.append(avg_loss)
    epoch_time = time.time() - epoch_start
    print(f"Finished Epoch {epoch+1}/{num_epochs}, Train Loss: {avg_loss:.4f}, LR: {scheduler.get_last_lr()[0]:.6f}, Total runtime: {epoch_time:.2f}s")

# Save the trained model
model_path = os.path.join(model_save_dir, f'final_model_ext_training.pth')
t.save(model.state_dict(), model_path)
print(f"Saved model to {model_path}")