In [1]:
from google.colab import drive
drive.mount("/content/drive", force_remount=True, timeout_ms=300000)


Mounted at /content/drive


In [3]:

import os




data_dir = "/content/drive/MyDrive/ML_Project/Data"
print(os.listdir(data_dir))


['output.npy', 'input.npy', 'test_idx.npy', 'train_idx.npy', 'val_idx.npy', 'X_train_mean.npy', 'X_train_std.npy', 'cnn_best.pt', 'loss_history.npz', 'y_true_test.npy', 'y_pred_test_cnn.npy', 'models']


In [4]:
import torch
print(torch.cuda.is_available())
if torch.cuda.is_available():
    print(torch.cuda.get_device_name(0))


True
NVIDIA A100-SXM4-40GB


In [6]:

import numpy as np
import torch
from torch.utils.data import Dataset, DataLoader


bactch_size = 64

# load
X_np = np.load(os.path.join(data_dir, "input.npy"))   # (N,1,44, 128)
y_np = np.load(os.path.join(data_dir, "output.npy"))  # (N,)
train_idx = np.load(os.path.join(data_dir, "train_idx.npy"))
val_idx   = np.load(os.path.join(data_dir, "val_idx.npy"))
test_idx  = np.load(os.path.join(data_dir, "test_idx.npy"))
mean = float(np.load(os.path.join(data_dir, "X_train_mean.npy"))[0])
std  = float(np.load(os.path.join(data_dir, "X_train_std.npy"))[0])



# convert
X_all = torch.from_numpy(X_np)
y_all = torch.from_numpy(y_np)
mean_t = torch.tensor(mean)
std_t  = torch.tensor(std)

class EventDataset(Dataset):
    def __init__(self, X_all, y_all, indices, mean_t, std_t):
        self.X_all = X_all
        self.y_all = y_all
        self.indices = indices
        self.mean_t = mean_t
        self.std_t = std_t

    def __len__(self):
        return len(self.indices)

    def __getitem__(self, i):
        idx = int(self.indices[i])
        x = self.X_all[idx]              # (1,44, 128)
        x = (x - self.mean_t) / self.std_t
        y = self.y_all[idx]
        return x, y

train_ds = EventDataset(X_all, y_all, train_idx, mean_t, std_t)
val_ds   = EventDataset(X_all, y_all, val_idx,   mean_t, std_t)
test_ds  = EventDataset(X_all, y_all, test_idx,  mean_t, std_t)


train_loader = DataLoader(train_ds, batch_size=bactch_size, shuffle=True,  num_workers=0)#shuffle train only
val_loader   = DataLoader(val_ds,   batch_size=bactch_size, shuffle=False, num_workers=0)
test_loader  = DataLoader(test_ds,  batch_size=bactch_size, shuffle=False, num_workers=0)


xb, yb = next(iter(train_loader))


In [8]:
import time
import torch.nn as nn
from tqdm import tqdm

#setup
device = torch.device("cuda" if torch.cuda.is_available() else "cpu")
print(device)
epochs = 30 #i tried 10 but it was really bad
LR = 1e-4  #tried 1e-2 also pretty bad
l2 = 1e-4
save_path = os.path.join(DATA_DIR, "cnn_best.pt")

# reproduce seed
torch.manual_seed(42)
torch.cuda.manual_seed_all(42)

#cnn
class CNN(nn.Module):
    def __init__(self):
        super().__init__()
        #3 convo layer
        self.conv1 = nn.Conv2d(1, 16, kernel_size = 3, padding = 1)
        #(1, 44, 128) -> (16, 22, 64)
        self.conv2 = nn.Conv2d(16, 32, kernel_size = 3, padding = 1)
        # --> (32, 11, 32)
        self.conv3 = nn.Conv2d(32, 64, kernel_size = 3, padding = 1)
        # --> (64, 11//2 = 5, 16)
        self.relu = nn.ReLU()
        self.pool = nn.MaxPool2d(kernel_size = 2)
        # 2 fc layer
        #flatten to 5120
        self.fc1 = nn.Linear(64 * 5 * 16, 128)
        self.fc2 = nn.Linear(128, 1)

    def forward(self, x):
        x = self.pool(self.relu(self.conv1(x)))
        x = self.pool(self.relu(self.conv2(x)))
        x = self.pool(self.relu(self.conv3(x)))
        x = x.reshape(x.shape[0], -1)          # flatten
        x = self.relu(self.fc1(x))
        x = self.fc2(x)
        return x[:, 0]

model = CNN().to(device)

criterion = nn.SmoothL1Loss(beta = 0.5)#huber loss, "better" MSE
optimizer = torch.optim.AdamW(model.parameters(), lr = LR, weight_decay=l2)

#initialize
best_val = None
train_losses = []
val_losses = []

#trianing
for epoch in range(epochs):
    model.train()
    train_sum = 0.0
    train_count = 0

    pbar = tqdm(train_loader, desc=f"Epoch {epoch + 1}/{epochs} train")
    for xb, yb in pbar:
        xb = xb.to(device)
        yb = yb.to(device)

        optimizer.zero_grad() #clears
        pred = model(xb)
        loss = criterion(pred, yb)
        loss.backward() #calc gradient
        torch.nn.utils.clip_grad_norm_(model.parameters(), max_norm = 1.0)#add a clip later
        optimizer.step()

        bs = xb.shape[0]
        train_sum += float(loss.item()) * bs
        train_count += bs

        pbar.set_postfix(loss = float(loss.item()))




    train_loss = train_sum / train_count
    train_losses.append(train_loss)

    #validation
    model.eval()
    val_sum = 0.0
    val_count = 0

    with torch.no_grad():
        pbarv = tqdm(val_loader, desc=f"Epoch {epoch + 1}/{epochs} val")
        for xb, yb in pbarv:
            xb = xb.to(device)
            yb = yb.to(device)

            pred = model(xb)
            loss = criterion(pred, yb)


            bs = xb.shape[0]
            val_sum += float(loss.item()) * bs
            val_count += bs

            pbarv.set_postfix(val_loss=float(loss.item()))

    val_loss = val_sum / val_count
    val_losses.append(val_loss)

    print(f"Epoch {epoch+1}: train MSE={train_loss:.6f}  val MSE={val_loss:.6f}")

    # save the best one
    if (best_val is None) or (val_loss < best_val):
        best_val = val_loss
        torch.save(model.state_dict(), save_path)


print("best val MSE:", best_val)

#test eval
model.load_state_dict(torch.load(save_path, map_location = device))
model.eval()

pred_list = []
true_list = []

with torch.no_grad():
    pbar_t = tqdm(test_loader, desc = "Test")
    for xb, yb in pbar_t:
        xb = xb.to(device)
        pred = model(xb).cpu().numpy() #GPU -> CPU -> numpy
        pred_list.append(pred)
        true_list.append(yb.numpy())

y_pred = np.concatenate(pred_list)
y_true = np.concatenate(true_list)

mse = float(np.mean((y_pred - y_true) ** 2))
rmse = float(np.sqrt(mse))
mae = float(np.mean(np.abs(y_pred - y_true)))


print("RMSE:", rmse)
print("MAE :", mae)

# save data so we dont need to retrain everytime
np.savez(os.path.join(DATA_DIR, "loss_history.npz"),
         train_losses=np.array(train_losses),
         val_losses=np.array(val_losses))



cuda


Epoch 1/30 train: 100%|██████████| 177/177 [00:02<00:00, 61.77it/s, loss=1.2] 
Epoch 1/30 val: 100%|██████████| 22/22 [00:00<00:00, 277.74it/s, val_loss=0.85]


Epoch 1: train MSE=1.823630  val MSE=0.865476


Epoch 2/30 train: 100%|██████████| 177/177 [00:01<00:00, 133.96it/s, loss=1.03]
Epoch 2/30 val: 100%|██████████| 22/22 [00:00<00:00, 301.54it/s, val_loss=0.852]


Epoch 2: train MSE=0.853960  val MSE=0.846802


Epoch 3/30 train: 100%|██████████| 177/177 [00:01<00:00, 135.58it/s, loss=1.24]
Epoch 3/30 val: 100%|██████████| 22/22 [00:00<00:00, 303.48it/s, val_loss=0.802]


Epoch 3: train MSE=0.832246  val MSE=0.811671


Epoch 4/30 train: 100%|██████████| 177/177 [00:01<00:00, 136.31it/s, loss=1.24]
Epoch 4/30 val: 100%|██████████| 22/22 [00:00<00:00, 291.11it/s, val_loss=0.834]


Epoch 4: train MSE=0.817019  val MSE=0.808512


Epoch 5/30 train: 100%|██████████| 177/177 [00:01<00:00, 134.28it/s, loss=0.686]
Epoch 5/30 val: 100%|██████████| 22/22 [00:00<00:00, 294.28it/s, val_loss=0.836]


Epoch 5: train MSE=0.812451  val MSE=0.804670


Epoch 6/30 train: 100%|██████████| 177/177 [00:01<00:00, 133.40it/s, loss=0.23]
Epoch 6/30 val: 100%|██████████| 22/22 [00:00<00:00, 298.28it/s, val_loss=0.815]


Epoch 6: train MSE=0.801955  val MSE=0.857707


Epoch 7/30 train: 100%|██████████| 177/177 [00:01<00:00, 136.35it/s, loss=0.42]
Epoch 7/30 val: 100%|██████████| 22/22 [00:00<00:00, 297.02it/s, val_loss=0.791]


Epoch 7: train MSE=0.799364  val MSE=0.776784


Epoch 8/30 train: 100%|██████████| 177/177 [00:01<00:00, 135.76it/s, loss=0.33]
Epoch 8/30 val: 100%|██████████| 22/22 [00:00<00:00, 291.18it/s, val_loss=0.787]


Epoch 8: train MSE=0.790976  val MSE=0.778035


Epoch 9/30 train: 100%|██████████| 177/177 [00:01<00:00, 136.04it/s, loss=0.182]
Epoch 9/30 val: 100%|██████████| 22/22 [00:00<00:00, 303.45it/s, val_loss=0.801]


Epoch 9: train MSE=0.785065  val MSE=0.780152


Epoch 10/30 train: 100%|██████████| 177/177 [00:01<00:00, 137.36it/s, loss=0.607]
Epoch 10/30 val: 100%|██████████| 22/22 [00:00<00:00, 301.16it/s, val_loss=0.768]


Epoch 10: train MSE=0.776217  val MSE=0.781811


Epoch 11/30 train: 100%|██████████| 177/177 [00:01<00:00, 134.20it/s, loss=0.967]
Epoch 11/30 val: 100%|██████████| 22/22 [00:00<00:00, 262.85it/s, val_loss=0.8]


Epoch 11: train MSE=0.777472  val MSE=0.768330


Epoch 12/30 train: 100%|██████████| 177/177 [00:01<00:00, 130.75it/s, loss=1.13]
Epoch 12/30 val: 100%|██████████| 22/22 [00:00<00:00, 300.03it/s, val_loss=0.78]


Epoch 12: train MSE=0.771102  val MSE=0.788003


Epoch 13/30 train: 100%|██████████| 177/177 [00:01<00:00, 132.43it/s, loss=0.565]
Epoch 13/30 val: 100%|██████████| 22/22 [00:00<00:00, 282.20it/s, val_loss=0.804]


Epoch 13: train MSE=0.773182  val MSE=0.766891


Epoch 14/30 train: 100%|██████████| 177/177 [00:01<00:00, 132.94it/s, loss=1.21]
Epoch 14/30 val: 100%|██████████| 22/22 [00:00<00:00, 282.52it/s, val_loss=0.787]


Epoch 14: train MSE=0.770138  val MSE=0.759626


Epoch 15/30 train: 100%|██████████| 177/177 [00:01<00:00, 132.50it/s, loss=0.643]
Epoch 15/30 val: 100%|██████████| 22/22 [00:00<00:00, 293.22it/s, val_loss=0.819]


Epoch 15: train MSE=0.764690  val MSE=0.854704


Epoch 16/30 train: 100%|██████████| 177/177 [00:01<00:00, 135.53it/s, loss=1.44]
Epoch 16/30 val: 100%|██████████| 22/22 [00:00<00:00, 292.05it/s, val_loss=0.791]


Epoch 16: train MSE=0.769235  val MSE=0.758150


Epoch 17/30 train: 100%|██████████| 177/177 [00:01<00:00, 135.84it/s, loss=1.15]
Epoch 17/30 val: 100%|██████████| 22/22 [00:00<00:00, 298.08it/s, val_loss=0.82]


Epoch 17: train MSE=0.763382  val MSE=0.759579


Epoch 18/30 train: 100%|██████████| 177/177 [00:01<00:00, 135.18it/s, loss=0.941]
Epoch 18/30 val: 100%|██████████| 22/22 [00:00<00:00, 286.39it/s, val_loss=0.817]


Epoch 18: train MSE=0.761079  val MSE=0.764586


Epoch 19/30 train: 100%|██████████| 177/177 [00:01<00:00, 132.61it/s, loss=1.1]
Epoch 19/30 val: 100%|██████████| 22/22 [00:00<00:00, 279.62it/s, val_loss=0.834]


Epoch 19: train MSE=0.751824  val MSE=0.769659


Epoch 20/30 train: 100%|██████████| 177/177 [00:01<00:00, 132.42it/s, loss=1.95]
Epoch 20/30 val: 100%|██████████| 22/22 [00:00<00:00, 289.18it/s, val_loss=0.809]


Epoch 20: train MSE=0.750596  val MSE=0.751986


Epoch 21/30 train: 100%|██████████| 177/177 [00:01<00:00, 135.27it/s, loss=0.398]
Epoch 21/30 val: 100%|██████████| 22/22 [00:00<00:00, 299.09it/s, val_loss=0.789]


Epoch 21: train MSE=0.742590  val MSE=0.754262


Epoch 22/30 train: 100%|██████████| 177/177 [00:01<00:00, 128.47it/s, loss=0.545]
Epoch 22/30 val: 100%|██████████| 22/22 [00:00<00:00, 269.10it/s, val_loss=0.8]


Epoch 22: train MSE=0.743351  val MSE=0.759160


Epoch 23/30 train: 100%|██████████| 177/177 [00:01<00:00, 132.18it/s, loss=0.539]
Epoch 23/30 val: 100%|██████████| 22/22 [00:00<00:00, 275.14it/s, val_loss=0.789]


Epoch 23: train MSE=0.739344  val MSE=0.748005


Epoch 24/30 train: 100%|██████████| 177/177 [00:01<00:00, 135.84it/s, loss=0.339]
Epoch 24/30 val: 100%|██████████| 22/22 [00:00<00:00, 294.72it/s, val_loss=0.828]


Epoch 24: train MSE=0.740405  val MSE=0.754099


Epoch 25/30 train: 100%|██████████| 177/177 [00:01<00:00, 136.54it/s, loss=0.499]
Epoch 25/30 val: 100%|██████████| 22/22 [00:00<00:00, 289.56it/s, val_loss=0.801]


Epoch 25: train MSE=0.734579  val MSE=0.754770


Epoch 26/30 train: 100%|██████████| 177/177 [00:01<00:00, 133.65it/s, loss=0.36]
Epoch 26/30 val: 100%|██████████| 22/22 [00:00<00:00, 279.98it/s, val_loss=0.819]


Epoch 26: train MSE=0.731055  val MSE=0.748811


Epoch 27/30 train: 100%|██████████| 177/177 [00:01<00:00, 135.49it/s, loss=0.472]
Epoch 27/30 val: 100%|██████████| 22/22 [00:00<00:00, 293.03it/s, val_loss=0.82]


Epoch 27: train MSE=0.731065  val MSE=0.750632


Epoch 28/30 train: 100%|██████████| 177/177 [00:01<00:00, 133.99it/s, loss=1.2]
Epoch 28/30 val: 100%|██████████| 22/22 [00:00<00:00, 289.69it/s, val_loss=0.821]


Epoch 28: train MSE=0.727541  val MSE=0.748410


Epoch 29/30 train: 100%|██████████| 177/177 [00:01<00:00, 132.51it/s, loss=0.794]
Epoch 29/30 val: 100%|██████████| 22/22 [00:00<00:00, 280.56it/s, val_loss=0.816]


Epoch 29: train MSE=0.723670  val MSE=0.754707


Epoch 30/30 train: 100%|██████████| 177/177 [00:01<00:00, 132.70it/s, loss=0.182]
Epoch 30/30 val: 100%|██████████| 22/22 [00:00<00:00, 284.39it/s, val_loss=0.819]


Epoch 30: train MSE=0.729603  val MSE=0.753255
best val MSE: 0.7480051534516471


Test: 100%|██████████| 23/23 [00:00<00:00, 308.42it/s]


RMSE: 1.245013112225099
MAE : 1.0159544944763184
