In [1]:
import pandas as pd
import numpy as np
import torch
import torch.nn as nn
from torch.utils.data import Dataset, DataLoader
import os
import matplotlib.pyplot as plt
from sklearn.metrics import mean_absolute_error, mean_squared_error, r2_score
import random

## Model loading

In [2]:
class MaskedAutoEncoder(nn.Module):
    def __init__(self, in_dim=9, maskable_dim=4, embed_dim=64, depth=4):
        super().__init__()
        self.in_dim = in_dim
        self.maskable_dim = maskable_dim

        self.mask_token = nn.Parameter(torch.zeros(in_dim))

        layers = []
        for _ in range(depth):
            layers += [
                nn.Linear(embed_dim if layers else in_dim, embed_dim),
                nn.GELU(),
                nn.LayerNorm(embed_dim)
            ]
        self.encoder = nn.Sequential(*layers)

        self.decoder = nn.Sequential(
            nn.Linear(embed_dim, embed_dim),
            nn.GELU(),
            nn.Linear(embed_dim, in_dim)
        )

    def forward(self, x, mask):
        x = torch.where(mask, x, self.mask_token)
        z = self.encoder(x)
        return self.decoder(z)

In [3]:
MODEL_PATH = 'models/model.pt'

In [5]:
if os.path.exists(MODEL_PATH):
    print(f"Loading model from {MODEL_PATH}")
    model = MaskedAutoEncoder(in_dim=9, embed_dim=128).cuda()
    model.load_state_dict(torch.load(MODEL_PATH))
    model.eval()
else:
    print(f"MAE model not found!")

Loading model from models/model.pt


## Data loading

In [8]:
df = pd.read_parquet("data/processed/nitrate_00_train_data.parquet")
cols = ["temperature_00", "salinity_00", "oxygen_00", "phosphate_00"]

def encode_geospatial_features(df: pd.DataFrame) -> np.ndarray:
    lat_rad = np.radians(df["lat"].to_numpy())
    lon_rad = np.radians(df["lon"].to_numpy())

    sin_lat = np.sin(lat_rad)
    cos_lat = np.cos(lat_rad)
    sin_lon = np.sin(lon_rad)
    cos_lon = np.cos(lon_rad)

    depth = df["depth"].to_numpy(dtype=np.float32)
    norm_depth = (depth - depth.min()) / (depth.max() - depth.min())

    geo_features = np.stack([sin_lat, cos_lat, sin_lon, cos_lon, norm_depth], axis=1)
    return geo_features.astype(np.float32)

class Scaler:
    def __init__(self, mean: dict[str, float], std: dict[str, float]):
        self.mean = mean
        self.std = std
        self.cols = list(mean.keys())

    @classmethod
    def from_dataframe(cls, df, cols):
        mean = {col: df[col].mean() for col in cols}
        std = {col: df[col].std() for col in cols}
        return cls(mean, std)

    def normalize(self, tensor: torch.Tensor, cols: list[str]) -> torch.Tensor:
        for i, col in enumerate(cols):
            tensor[:, i] = (tensor[:, i] - self.mean[col]) / self.std[col]
        return tensor

    def denormalize(self, tensor: torch.Tensor, cols: list[str]) -> torch.Tensor:
        means = torch.tensor([self.mean[c] for c in cols], dtype=tensor.dtype, device=tensor.device)
        stds  = torch.tensor([self.std[c]  for c in cols], dtype=tensor.dtype, device=tensor.device)
        return tensor * stds + means

    def mae(self, reconstructed: torch.Tensor, ground_truth: torch.Tensor, cols: list[str]) -> float:
        rec_denorm = self.denormalize(reconstructed.clone(), cols)
        gt_denorm = self.denormalize(ground_truth.clone(), cols)
        return torch.abs(rec_denorm - gt_denorm).mean().item()

    def masked_mae(self, reconstructed: torch.Tensor, ground_truth: torch.Tensor, mask: torch.Tensor, cols: list[str]) -> float:
        rec_denorm = self.denormalize(reconstructed.clone(), cols)
        gt_denorm = self.denormalize(ground_truth.clone(), cols)

        abs_error = torch.abs(rec_denorm - gt_denorm)
        masked_error = abs_error * mask

        mae = masked_error.sum() / mask.sum().clamp(min=1.0)
        return mae.item()

df = df.dropna(subset=cols).reset_index(drop=True)
scaler = Scaler.from_dataframe(df, cols)
geo = encode_geospatial_features(df)
x = df[cols].to_numpy(dtype=np.float32)

x_full = np.concatenate([x, geo], axis=1)

X = torch.tensor(x_full)
X = scaler.normalize(X.clone(), cols)

class PredictionDataset(Dataset):
    def __init__(self, X: torch.Tensor, y: torch.Tensor):
        self.X = X
        self.y = y

    def __len__(self):
        return len(self.X)

    def __getitem__(self, idx):
        return self.X[idx], self.y[idx]

TARGET         = "nitrate_00"
TEST_BBOX      = {                       # Morze Śródziemne
    "lat_min": 30.0, "lat_max": 46.0,
    "lon_min": -6.0, "lon_max": 36.0
}
SEED           = 42
N_JOBS         = -1
SUB_FRAC       = 0.20

random.seed(SEED)
np.random.seed(SEED)
torch.manual_seed(SEED)
torch.cuda.manual_seed_all(SEED)

test_mask = (
    (df["lat"].between(TEST_BBOX["lat_min"], TEST_BBOX["lat_max"])) &
    (df["lon"].between(TEST_BBOX["lon_min"], TEST_BBOX["lon_max"]))
)

X_train_large = X[~test_mask, :].clone()
X_test = X[test_mask, :].clone()

y_train_large = df[~test_mask][TARGET].to_numpy(dtype=np.float32)
y_test = df[test_mask][TARGET].to_numpy(dtype=np.float32)

print(f"X_train_large shape: {X_train_large.shape}")
print(f"y_train_large  shape: {y_train_large.shape}")

idx = np.random.choice(X_train_large.shape[0], int((X_train_large.shape[0])*SUB_FRAC), replace=False)

X_train = torch.Tensor(X_train_large[idx])
y_train = torch.Tensor(y_train_large[idx])

print(f"X_train shape: {X_train.shape}")
print(f"X_test  shape: {X_test.shape}")

print(f"y_train shape: {y_train.shape}")
print(f"y_test  shape: {y_test.shape}")

train_ds = PredictionDataset(X_train, y_train)
test_ds  = PredictionDataset(X_test, y_test)

train_loader = DataLoader(train_ds, batch_size=256, shuffle=True)
test_loader  = DataLoader(test_ds, batch_size=len(test_ds), shuffle=False)


X_train_large shape: torch.Size([601393, 9])
y_train_large  shape: (601393,)
X_train shape: torch.Size([120278, 9])
X_test  shape: torch.Size([8519, 9])
y_train shape: torch.Size([120278])
y_test  shape: (8519,)


## Training loop

In [47]:
def train(model_, loader_, n_epochs=20, lr=1e-3, weight_decay=1e-4, verbose=True, gradient_clipping=False):
    opt = torch.optim.Adam(model_.parameters(), lr=lr, weight_decay=weight_decay)
    scheduler = torch.optim.lr_scheduler.CosineAnnealingLR(opt, T_max=n_epochs)
    loss_fn = nn.MSELoss()

    for epoch in range(n_epochs):
        model_.train()
        total_loss = 0
        all_y = []
        all_pred = []

        for x, y in loader_:
            x, y = x.cuda(), y.cuda()
            pred = model_(x).flatten()

            loss = loss_fn(pred, y)
            loss.backward()

            if gradient_clipping:
                torch.nn.utils.clip_grad_norm_(model_.parameters(), max_norm=1.0)

            opt.step()
            opt.zero_grad()

            total_loss += loss.item()
            all_y.append(y.detach().cpu())
            all_pred.append(pred.detach().cpu())

        scheduler.step()

        # Concatenate all predictions and targets
        all_y = torch.cat(all_y).numpy()
        all_pred = torch.cat(all_pred).numpy()

        r2 = r2_score(all_y, all_pred)
        avg_loss = total_loss / len(loader_)

        if verbose:
            print(f"Epoch {epoch+1}: loss = {avg_loss:.4f}  R2 = {r2:.4f}")

    return avg_loss, r2

def metrics(name, y, pred, verbose=True):
    mse = mean_squared_error(y, pred)
    rmse = np.sqrt(mse)
    mae  = mean_absolute_error(y, pred)
    r2   = r2_score(y, pred)
    if verbose:
        print(f"{name:<18} RMSE={rmse:.4f} MSE={mse:.4f} MAE={mae:.4f} R2={r2:.4f}")
    return mse, rmse, mae, r2



def evaluate(name, model_, loader_):
    model_.eval()
    for x, y in loader_:
        x, y = x.cuda(), y.cuda()
        pred = model_(x).flatten()
        y = y.detach().cpu().numpy()
        pred = pred.detach().cpu().numpy()
        return metrics(name, y, pred)


def initialize_weights(m):
    if isinstance(m, nn.Linear):
        nn.init.xavier_uniform_(m.weight)
        if m.bias is not None:
            nn.init.zeros_(m.bias)
    elif isinstance(m, nn.LayerNorm):
        nn.init.ones_(m.weight)
        nn.init.zeros_(m.bias)

## Sanity check: linear prediction without embeddings

In [54]:
class BaselineMLP(nn.Module):
    def __init__(self, in_dim):
        super().__init__()
        self.predictor = nn.Sequential(
            nn.Linear(in_dim, 128),
            nn.ReLU(),
            nn.Linear(128, 64),
            nn.ReLU(),
            nn.Linear(64, 1)
        )

    def forward(self, x):
        return self.predictor(x).squeeze(-1)

pred_model = BaselineMLP(in_dim=9).cuda()
_ = train(pred_model, train_loader, n_epochs=20, lr=1e-3)

Epoch 1: loss = 60.4349  R2 = 0.6724
Epoch 2: loss = 4.6641  R2 = 0.9747
Epoch 3: loss = 4.0296  R2 = 0.9782
Epoch 4: loss = 3.7618  R2 = 0.9796
Epoch 5: loss = 3.6050  R2 = 0.9805
Epoch 6: loss = 3.4932  R2 = 0.9811
Epoch 7: loss = 3.4193  R2 = 0.9815
Epoch 8: loss = 3.3547  R2 = 0.9818
Epoch 9: loss = 3.3114  R2 = 0.9821
Epoch 10: loss = 3.2815  R2 = 0.9822
Epoch 11: loss = 3.2570  R2 = 0.9824
Epoch 12: loss = 3.2237  R2 = 0.9825
Epoch 13: loss = 3.2068  R2 = 0.9826
Epoch 14: loss = 3.1890  R2 = 0.9827
Epoch 15: loss = 3.1759  R2 = 0.9828
Epoch 16: loss = 3.1619  R2 = 0.9829
Epoch 17: loss = 3.1545  R2 = 0.9829
Epoch 18: loss = 3.1487  R2 = 0.9829
Epoch 19: loss = 3.1445  R2 = 0.9830
Epoch 20: loss = 3.1419  R2 = 0.9830


In [55]:
_ = evaluate("Baseline", pred_model, test_loader)

Baseline           RMSE=1.8124 MSE=3.2847 MAE=1.3609 R2=0.6634


## Simple linear prediction

In [13]:
class LinearPredictionModel(nn.Module):
    def __init__(self, in_dim, mae):
        super().__init__()
        self.mae = mae
        self.predictor = nn.Linear(in_dim, 1)

        for param in self.mae.parameters():
            param.requires_grad = False

    def forward(self, x):
        enc = self.mae.encoder(x)
        return self.predictor(enc)

pred_model = LinearPredictionModel(in_dim=128, mae=model).cuda()
pred_model.predictor.apply(initialize_weights)
_ = train(pred_model, train_loader, n_epochs=30, lr=1e-4)

Epoch 1: loss = 479.9072  R2 = -1.6007
Epoch 2: loss = 440.6416  R2 = -1.3879
Epoch 3: loss = 404.0756  R2 = -1.1897
Epoch 4: loss = 370.1533  R2 = -1.0059
Epoch 5: loss = 338.8707  R2 = -0.8363
Epoch 6: loss = 310.1029  R2 = -0.6805
Epoch 7: loss = 283.7923  R2 = -0.5379
Epoch 8: loss = 259.8236  R2 = -0.4080
Epoch 9: loss = 238.0824  R2 = -0.2901
Epoch 10: loss = 218.4634  R2 = -0.1838
Epoch 11: loss = 200.8353  R2 = -0.0883
Epoch 12: loss = 185.0870  R2 = -0.0029
Epoch 13: loss = 171.0730  R2 = 0.0730
Epoch 14: loss = 158.6835  R2 = 0.1401
Epoch 15: loss = 147.7881  R2 = 0.1991
Epoch 16: loss = 138.2665  R2 = 0.2507
Epoch 17: loss = 129.9996  R2 = 0.2955
Epoch 18: loss = 122.8840  R2 = 0.3341
Epoch 19: loss = 116.8036  R2 = 0.3670
Epoch 20: loss = 111.6687  R2 = 0.3949
Epoch 21: loss = 107.3883  R2 = 0.4181
Epoch 22: loss = 103.8732  R2 = 0.4371
Epoch 23: loss = 101.0429  R2 = 0.4524
Epoch 24: loss = 98.8290  R2 = 0.4644
Epoch 25: loss = 97.1535  R2 = 0.4735
Epoch 26: loss = 95.9398

In [14]:
_ = evaluate("Linear prediction", pred_model, test_loader)

Linear prediction  RMSE=7.5521 MSE=57.0344 MAE=6.8655 R2=-4.8447


## Multi-layer linear prediction

In [32]:
class MultiLinearPredictionModel(nn.Module):
    def __init__(self, in_dim, mae):
        super().__init__()
        self.mae = mae
        self.predictor = nn.Sequential(
            nn.Linear(in_dim, 64),
            nn.ReLU(),
            nn.Linear(64, 1)
        )

        for param in self.mae.parameters():
            param.requires_grad = False

    def forward(self, x):
        enc = self.mae.encoder(x)
        return self.predictor(enc)

pred_model = MultiLinearPredictionModel(in_dim=128, mae=model).cuda()
pred_model.predictor.apply(initialize_weights)
_ = train(pred_model, train_loader, n_epochs=20, lr=1e-4)

Epoch 1: loss = 378.9284  R2 = -1.0535
Epoch 2: loss = 145.4503  R2 = 0.2117
Epoch 3: loss = 33.3476  R2 = 0.8193
Epoch 4: loss = 14.2146  R2 = 0.9230
Epoch 5: loss = 11.0473  R2 = 0.9401
Epoch 6: loss = 9.3514  R2 = 0.9493
Epoch 7: loss = 8.2026  R2 = 0.9556
Epoch 8: loss = 7.4188  R2 = 0.9598
Epoch 9: loss = 6.8707  R2 = 0.9628
Epoch 10: loss = 6.4880  R2 = 0.9648
Epoch 11: loss = 6.2174  R2 = 0.9663
Epoch 12: loss = 6.0170  R2 = 0.9674
Epoch 13: loss = 5.8689  R2 = 0.9682
Epoch 14: loss = 5.7575  R2 = 0.9688
Epoch 15: loss = 5.6760  R2 = 0.9692
Epoch 16: loss = 5.6169  R2 = 0.9696
Epoch 17: loss = 5.5776  R2 = 0.9698
Epoch 18: loss = 5.5524  R2 = 0.9699
Epoch 19: loss = 5.5401  R2 = 0.9700
Epoch 20: loss = 5.5344  R2 = 0.9700


In [33]:
_ = evaluate("Multi-layer linear prediction", pred_model, test_loader)

Multi-layer linear prediction RMSE=3.5767 MSE=12.7930 MAE=2.7154 R2=-0.3110


## Linear+Dropout prediction

In [41]:
class DropoutLinearPredictionModel(nn.Module):
    def __init__(self, in_dim, mae, dropout=0.3):
        super().__init__()
        self.mae = mae
        self.predictor = nn.Sequential(
            nn.LayerNorm(in_dim),
            nn.Linear(in_dim, 256),
            nn.ReLU(),
            nn.Dropout(dropout),           # Reduced from 0.5 to avoid underfitting
            nn.Linear(256, 64),
            nn.ReLU(),
            nn.Dropout(dropout / 2),       # Add dropout between deeper layers
            nn.Linear(64, 1)
        )

        for param in self.mae.parameters():
            param.requires_grad = False

    def forward(self, x):
        enc = self.mae.encoder(x)
        return self.predictor(enc)

pred_model = DropoutLinearPredictionModel(in_dim=128, mae=model).cuda()
pred_model.predictor.apply(initialize_weights)
_ = train(pred_model, train_loader, n_epochs=20, lr=1e-4, gradient_clipping=True)

Epoch 1: loss = 110.8870  R2 = 0.3989
Epoch 2: loss = 10.2707  R2 = 0.9443
Epoch 3: loss = 9.3712  R2 = 0.9492
Epoch 4: loss = 8.9942  R2 = 0.9513
Epoch 5: loss = 8.6386  R2 = 0.9532
Epoch 6: loss = 8.5181  R2 = 0.9538
Epoch 7: loss = 8.4046  R2 = 0.9545
Epoch 8: loss = 8.2600  R2 = 0.9552
Epoch 9: loss = 8.1469  R2 = 0.9559
Epoch 10: loss = 8.0602  R2 = 0.9563
Epoch 11: loss = 8.1160  R2 = 0.9560
Epoch 12: loss = 7.9845  R2 = 0.9567
Epoch 13: loss = 7.9477  R2 = 0.9569
Epoch 14: loss = 7.9492  R2 = 0.9569
Epoch 15: loss = 7.9491  R2 = 0.9569
Epoch 16: loss = 7.9156  R2 = 0.9571
Epoch 17: loss = 7.9260  R2 = 0.9570
Epoch 18: loss = 7.8732  R2 = 0.9573
Epoch 19: loss = 7.8683  R2 = 0.9574
Epoch 20: loss = 7.8517  R2 = 0.9575


In [42]:
_ = evaluate("Dropout+linear prediction", pred_model, test_loader)

Dropout+linear prediction RMSE=2.9876 MSE=8.9257 MAE=2.4747 R2=0.0853


## Deeper Droput prediction

In [48]:
class DeepDropoutLinearPredictionModel(nn.Module):
    def __init__(self, in_dim, mae, dropout=0.3):
        super().__init__()
        self.mae = mae

        # Freeze MAE if needed
        for param in self.mae.parameters():
            param.requires_grad = False

        self.predictor = nn.Sequential(
            nn.LayerNorm(in_dim),
            nn.Linear(in_dim, 256),
            nn.SiLU(),
            nn.Dropout(dropout),

            nn.LayerNorm(256),
            nn.Linear(256, 128),
            nn.SiLU(),
            nn.Dropout(dropout),

            nn.LayerNorm(128),
            nn.Linear(128, 64),
            nn.SiLU(),
            nn.Dropout(dropout / 2),

            nn.Linear(64, 32),
            nn.SiLU(),
            nn.Dropout(dropout / 2),

            nn.Linear(32, 1)
        )

    def forward(self, x):
        enc = self.mae.encoder(x)  # (B, D)
        return self.predictor(enc).squeeze(-1)  # (B,)

pred_model = DeepDropoutLinearPredictionModel(in_dim=128, mae=model).cuda()
pred_model.predictor.apply(initialize_weights)
_ = train(pred_model, train_loader, n_epochs=20, lr=1e-4, weight_decay=5e-4, gradient_clipping=True)

Epoch 1: loss = 144.5541  R2 = 0.2165
Epoch 2: loss = 19.0636  R2 = 0.8967
Epoch 3: loss = 15.7416  R2 = 0.9147
Epoch 4: loss = 14.3331  R2 = 0.9223
Epoch 5: loss = 13.5232  R2 = 0.9267
Epoch 6: loss = 12.9884  R2 = 0.9296
Epoch 7: loss = 12.5257  R2 = 0.9321
Epoch 8: loss = 12.2175  R2 = 0.9338
Epoch 9: loss = 11.9450  R2 = 0.9353
Epoch 10: loss = 11.7600  R2 = 0.9363
Epoch 11: loss = 11.6318  R2 = 0.9370
Epoch 12: loss = 11.3515  R2 = 0.9385
Epoch 13: loss = 11.2154  R2 = 0.9392
Epoch 14: loss = 11.1137  R2 = 0.9398
Epoch 15: loss = 11.1923  R2 = 0.9393
Epoch 16: loss = 11.0620  R2 = 0.9401
Epoch 17: loss = 11.0748  R2 = 0.9400
Epoch 18: loss = 10.9841  R2 = 0.9405
Epoch 19: loss = 11.0474  R2 = 0.9401
Epoch 20: loss = 11.1376  R2 = 0.9396


In [50]:
_ = evaluate("Deep dropout+linear prediction", pred_model, test_loader)

Deep dropout+linear prediction RMSE=3.6035 MSE=12.9855 MAE=2.6570 R2=-0.3307


## GELU prediction

In [23]:
class GELUPredictionModel(nn.Module):
    def __init__(self, in_dim, mae):
        super().__init__()
        self.mae = mae
        self.predictor = nn.Sequential(
            nn.LayerNorm(128),
            nn.Linear(128, 256),
            nn.GELU(),
            nn.Dropout(0.3),
            nn.Linear(256, 128),
            nn.GELU(),
            nn.Dropout(0.2),
            nn.Linear(128, 1)
        )

        for param in self.mae.parameters():
            param.requires_grad = False

    def forward(self, x):
        enc = self.mae.encoder(x)
        return self.predictor(enc)

pred_model = GELUPredictionModel(in_dim=128, mae=model).cuda()
pred_model.predictor.apply(initialize_weights)
_ = train(pred_model, train_loader, n_epochs=20, lr=1e-4)

Epoch 1: loss = 103.6273  R2 = 0.4383
Epoch 2: loss = 11.2845  R2 = 0.9388
Epoch 3: loss = 9.8866  R2 = 0.9464
Epoch 4: loss = 9.2162  R2 = 0.9501
Epoch 5: loss = 8.6010  R2 = 0.9534
Epoch 6: loss = 8.3138  R2 = 0.9549
Epoch 7: loss = 7.9841  R2 = 0.9567
Epoch 8: loss = 7.7322  R2 = 0.9581
Epoch 9: loss = 7.6176  R2 = 0.9587
Epoch 10: loss = 7.3985  R2 = 0.9599
Epoch 11: loss = 7.3076  R2 = 0.9604
Epoch 12: loss = 7.2029  R2 = 0.9610
Epoch 13: loss = 7.1132  R2 = 0.9615
Epoch 14: loss = 7.0982  R2 = 0.9615
Epoch 15: loss = 7.0711  R2 = 0.9617
Epoch 16: loss = 6.9525  R2 = 0.9623
Epoch 17: loss = 6.9288  R2 = 0.9625
Epoch 18: loss = 6.9910  R2 = 0.9621
Epoch 19: loss = 6.9265  R2 = 0.9625
Epoch 20: loss = 6.9338  R2 = 0.9624


In [24]:
_ = evaluate("GELU prediction", pred_model, test_loader)

GELU prediction    RMSE=3.1991 MSE=10.2341 MAE=2.3228 R2=-0.0488


## Residual Block prediction

In [25]:
class ResidualBlock(nn.Module):
    def __init__(self, dim, dropout=0.2):
        super().__init__()
        self.block = nn.Sequential(
            nn.LayerNorm(dim),
            nn.Linear(dim, dim),
            nn.SiLU(),
            nn.Dropout(dropout),
            nn.Linear(dim, dim),
        )
        self.ln = nn.LayerNorm(dim)

    def forward(self, x):
        return self.ln(x + self.block(x))

class ResidualPredictionModel(nn.Module):
    def __init__(self, in_dim, mae):
        super().__init__()
        self.mae = mae

        for param in self.mae.parameters():
            param.requires_grad = False

        self.input_proj = nn.Sequential(
            nn.LayerNorm(in_dim),
            nn.Linear(in_dim, 256),
            nn.SiLU()
        )

        self.residual_blocks = nn.Sequential(
            ResidualBlock(256, dropout=0.3),
            ResidualBlock(256, dropout=0.2)
        )

        self.output_head = nn.Sequential(
            nn.Linear(256, 128),
            nn.SiLU(),
            nn.Dropout(0.2),
            nn.Linear(128, 1)
        )

    def forward(self, x):
        enc = self.mae.encoder(x)  # (B, D)
        x = self.input_proj(enc)   # (B, 256)
        x = self.residual_blocks(x)
        return self.output_head(x)

pred_model = ResidualPredictionModel(in_dim=128, mae=model).cuda()
pred_model.input_proj.apply(initialize_weights)
pred_model.output_head.apply(initialize_weights)
pred_model.residual_blocks.apply(initialize_weights)
_ = train(pred_model, train_loader, n_epochs=20, lr=1e-4)

Epoch 1: loss = 43.3780  R2 = 0.7649
Epoch 2: loss = 7.5255  R2 = 0.9592
Epoch 3: loss = 6.4956  R2 = 0.9648
Epoch 4: loss = 6.0274  R2 = 0.9673
Epoch 5: loss = 5.7452  R2 = 0.9689
Epoch 6: loss = 5.5547  R2 = 0.9699
Epoch 7: loss = 5.4313  R2 = 0.9706
Epoch 8: loss = 5.3098  R2 = 0.9712
Epoch 9: loss = 5.2195  R2 = 0.9717
Epoch 10: loss = 5.1948  R2 = 0.9719
Epoch 11: loss = 5.0849  R2 = 0.9724
Epoch 12: loss = 5.0400  R2 = 0.9727
Epoch 13: loss = 4.9631  R2 = 0.9731
Epoch 14: loss = 4.9658  R2 = 0.9731
Epoch 15: loss = 4.8876  R2 = 0.9735
Epoch 16: loss = 4.9142  R2 = 0.9734
Epoch 17: loss = 4.8544  R2 = 0.9737
Epoch 18: loss = 4.8387  R2 = 0.9738
Epoch 19: loss = 4.8457  R2 = 0.9737
Epoch 20: loss = 4.8250  R2 = 0.9739


In [26]:
_ = evaluate("Residual prediction", pred_model, test_loader)

Residual prediction RMSE=3.2078 MSE=10.2899 MAE=2.6218 R2=-0.0545


In [27]:
pred_model = ResidualPredictionModel(in_dim=128, mae=model).cuda()
pred_model.input_proj.apply(initialize_weights)
pred_model.output_head.apply(initialize_weights)
pred_model.residual_blocks.apply(initialize_weights)
_ = train(pred_model, train_loader, n_epochs=50, lr=1e-4)
_ = evaluate("Residual prediction", pred_model, test_loader)

Epoch 1: loss = 47.2494  R2 = 0.7439
Epoch 2: loss = 7.5836  R2 = 0.9589
Epoch 3: loss = 6.5905  R2 = 0.9643
Epoch 4: loss = 6.1428  R2 = 0.9667
Epoch 5: loss = 5.8642  R2 = 0.9682
Epoch 6: loss = 5.6376  R2 = 0.9694
Epoch 7: loss = 5.5057  R2 = 0.9702
Epoch 8: loss = 5.3536  R2 = 0.9710
Epoch 9: loss = 5.2710  R2 = 0.9714
Epoch 10: loss = 5.1633  R2 = 0.9720
Epoch 11: loss = 5.0813  R2 = 0.9725
Epoch 12: loss = 5.0330  R2 = 0.9727
Epoch 13: loss = 4.9812  R2 = 0.9730
Epoch 14: loss = 4.8704  R2 = 0.9736
Epoch 15: loss = 4.8329  R2 = 0.9738
Epoch 16: loss = 4.7913  R2 = 0.9740
Epoch 17: loss = 4.7459  R2 = 0.9743
Epoch 18: loss = 4.6979  R2 = 0.9745
Epoch 19: loss = 4.6747  R2 = 0.9747
Epoch 20: loss = 4.5873  R2 = 0.9751
Epoch 21: loss = 4.5660  R2 = 0.9753
Epoch 22: loss = 4.5168  R2 = 0.9755
Epoch 23: loss = 4.4703  R2 = 0.9758
Epoch 24: loss = 4.4300  R2 = 0.9760
Epoch 25: loss = 4.4239  R2 = 0.9760
Epoch 26: loss = 4.4051  R2 = 0.9761
Epoch 27: loss = 4.3675  R2 = 0.9763
Epoch 28: