In [205]:
import torch
import torch.nn as nn
import pandas as pd
import numpy as np
import matplotlib.pyplot as plt
import os
import sys

current_dir = os.getcwd()
root_dir = os.path.join(current_dir, '..')
sys.path.insert(0, root_dir)

from GCGRU.GRU import GRU

torch.manual_seed(10)
np.random.seed(10)

In [209]:
data = pd.read_csv('../processed_data.csv')
data = data[data['TurbID'] == 1]['P_norm']

In [210]:
class GRUBaseline(nn.Module):
    def __init__(self, input_size, hidden_size=64):
        super().__init__()
        # Input x2 because of the mask
        self.gru = nn.GRU(input_size * 2, hidden_size, batch_first=True)
        self.fc = nn.Linear(hidden_size, input_size)

    def forward(self, x, mask, h=None):
        # Concatenate values and mask as input
        x_masked = torch.cat([x, mask], dim=-1)
        out, h = self.gru(x_masked, h)
        return self.fc(out), h

In [211]:
def train_baseline(data, train_input_mask, train_loss_mask, epochs=20, chunk_size=500, lr=0.001):
    n_samples, n_timesteps, n_features = data.shape
    device = 'cuda' if torch.cuda.is_available() else 'cpu'

    criterion = nn.MSELoss(reduction='none')
    model = GRUBaseline(n_features).to(device)
    optimizer = torch.optim.Adam(model.parameters(), lr=lr)

    for epoch in range(epochs):
        total_loss = 0
        count = 0

        for i in range(n_samples):
            h = None
            for start in range(0, n_timesteps, chunk_size):
                end = min(start + chunk_size, n_timesteps)

                chunk = data[i, start:end].clone().to(device)
                input_mask = train_input_mask[i, start:end].to(device)
                loss_mask = train_loss_mask[i, start:end-1].to(device)

                chunk[input_mask] = -1.0

                # Input to model: all but last timestep
                x = chunk[:-1].unsqueeze(0)   # [1, chunk_len - 1, n_features]
                target = chunk[1:].unsqueeze(0)  # [1, chunk_len - 1, n_features]

                input_mask = input_mask[:-1].unsqueeze(0)
                loss_mask = loss_mask.unsqueeze(0)
                # print(loss_mask.sum())

                optimizer.zero_grad()

                pred, h = model(x, input_mask, h.detach() if h is not None else None)

                loss = criterion(pred, target)
                masked_loss = (loss * loss_mask.unsqueeze(0)).sum() / loss_mask.sum()

                masked_loss.backward()
                torch.nn.utils.clip_grad_norm_(model.parameters(), 1.0)
                optimizer.step()

                total_loss += masked_loss.item()
                count += 1

        if (epoch + 1) % 5 == 0:
            print(f"Epoch {epoch+1}/{epochs}, Loss: {total_loss/count:.6f}")

    return model

In [212]:
np.random.seed(10)

# Train/test = 80/20
n_train = int(len(data) * 0.8)

turb_1 = data.values.astype('float32').reshape(1, -1, 1)
train_data = turb_1[:, :n_train, :]
test_data = turb_1[:, n_train:, :]

train_loss_mask = ~np.isnan(train_data)
test_loss_mask = ~np.isnan(test_data)

train_input_mask = (np.random.random(train_data.shape) < 0.1) | np.isnan(train_data)
test_input_mask = (np.random.random(test_data.shape) < 0.1) | np.isnan(test_data)

# print(f"Training on {train_data.shape} with {np.isnan(train_data).mean()*100:.1f}% NaNs")

model = train_baseline(torch.tensor(train_data), torch.tensor(train_input_mask), torch.tensor(train_loss_mask), epochs=20, chunk_size=500)

Epoch 5/20, Loss: 0.180477
Epoch 10/20, Loss: 0.178619
Epoch 15/20, Loss: 0.177926
Epoch 20/20, Loss: 0.177549


In [214]:
def test_baseline(model, data, test_input_mask, test_loss_mask, chunk_size=500):
    device = next(model.parameters()).device
    criterion = nn.MSELoss(reduction='none')

    model.eval()
    total_loss = 0.0
    count = 0
    all_predictions = []

    with torch.no_grad():
        n_samples, n_timesteps, n_features = data.shape

        for i in range(n_samples):
            h = None
            preds = []

            for start in range(0, n_timesteps, chunk_size):
                end = min(start + chunk_size, n_timesteps)

                # Extract chunk and masks
                chunk = data[i, start:end].clone().to(device)  # [chunk_len, n_features]
                input_mask = test_input_mask[i, start:end].to(device)  # [chunk_len, n_features]
                loss_mask = test_loss_mask[i, start:end-1].to(device)  # [chunk_len - 1, n_features]

                # Mask missing inputs
                chunk[input_mask] = -1.0

                # Input/output pairs
                x = chunk[:-1].unsqueeze(0)      # [1, T-1, F]
                target = chunk[1:].unsqueeze(0)  # [1, T-1, F]
                input_mask = input_mask[:-1].unsqueeze(0)  # [1, T-1, F]
                loss_mask = loss_mask.unsqueeze(0)

                # Forward pass
                pred, h = model(x, input_mask, h)

                # Compute masked loss
                loss = criterion(pred, target)
                masked_loss = (loss * loss_mask).sum() / loss_mask.sum()
                total_loss += masked_loss.item()
                count += 1

                preds.append(pred.squeeze(0).cpu())

            all_predictions.append(torch.cat(preds, dim=0))  # [n_timesteps-1, n_features]

    avg_loss = total_loss / count
    all_predictions = torch.stack(all_predictions)  # [n_samples, n_timesteps-1, n_features]
    return avg_loss, all_predictions

test_baseline(model, torch.tensor(test_data), torch.tensor(test_input_mask), torch.tensor(test _loss_mask))

(0.17652383993069332,
 tensor([[[0.2851],
          [0.4231],
          [0.4233],
          ...,
          [0.0543],
          [0.0346],
          [0.0041]]]))