In [12]:
#Imports
%matplotlib inline
import torch
import torch.nn as nn
from torch.optim import Adam
from torch.utils.data import DataLoader, TensorDataset
import pandas as pd
import numpy as np
import time
import matplotlib.pyplot as plt

from google.colab import files
uploaded = files.upload()

Saving metr-la.csv to metr-la (3).csv


In [13]:
#Loading and normalizing the data
#def load_data(file_path, window_size):
def load_data(file_path, window_size,col_index=1):
    df = pd.read_csv(file_path)
    #series = df.iloc[:, 1].astype(float).values
    series = df.iloc[:, col_index].astype(float).values
    # Normalize the data
    series = (series - series.mean()) / series.std()

    #Splitting the data into training data and validation data
    split_index = int(len(series) * 0.8)
    train_series = series[:split_index]
    val_series = series[split_index - window_size:]

    # Building the training windows
    X_train, y_train = [], []
    for i in range(len(train_series) - window_size):
        X_train.append(train_series[i : i + window_size])
        y_train.append(train_series[i + window_size])

    #Building the validation windows
    X_val, y_val = [], []
    for i in range(len(val_series) - window_size):
        X_val.append(val_series[i : i + window_size])
        y_val.append(val_series[i + window_size])

    return(
        np.array(X_train, dtype=np.float32),
        np.array(y_train, dtype=np.float32),
        np.array(X_val, dtype=np.float32),
        np.array(y_val, dtype=np.float32),
    )

In [14]:
#Training function
def train_model(model, X_train, y_train, X_val, y_val, learning, batch_size, epochs):
    train_dataset = TensorDataset(torch.from_numpy(X_train), torch.from_numpy(y_train))
    val_dataset = TensorDataset(torch.from_numpy(X_val), torch.from_numpy(y_val))
    train_loader = DataLoader(train_dataset, batch_size=batch_size, shuffle=True)
    val_loader = DataLoader(val_dataset, batch_size=batch_size)

    criterion = nn.MSELoss()
    optimizer = Adam(model.parameters(), lr=learning)

    train_losses, val_losses = [], []
    start_time = time.time()

    for epoch in range(epochs):
        model.train()
        batch_losses = []
        #Training loop
        for xb, yb in train_loader:
            optimizer.zero_grad()
            preds = model(xb).squeeze(-1)
            loss = criterion(preds, yb)
            loss.backward()
            optimizer.step()
            batch_losses.append(loss.item())
        train_losses.append(np.mean(batch_losses))
        #Validation
        model.eval()
        with torch.no_grad():
            val_batch = [criterion(model(xb).squeeze(-1), yb).item() for xb, yb in val_loader]
        val_losses.append(np.mean(val_batch))
    training_time = time.time() - start_time
    return train_losses, val_losses, training_time

In [17]:
#Experiment code
def experiment_chosen_configs(file_path, chosen_configs,sensor_cols, epochs, window_size,runs):
    overall_start_time = time.time()
    print("Starting experiment...")

    results = {}

    for sensor_id, col_index in sensor_cols.items():
        print(f"\nSensor ID: {sensor_id}")
        X_train, y_train, X_val, y_val = load_data(file_path,window_size,col_index)
        results[sensor_id] = {}

        for learning, batch, hidden in chosen_configs:
          configs = f"learning={learning}_batch={batch}_hidden={hidden}"
          final_val_losses = []

          for run in range(runs):
            torch.manual_seed(run)
            np.random.seed(run)
            model = nn.Sequential(nn.Linear(window_size, hidden),nn.ReLU(), nn.Linear(hidden, 1))
            _, val_losses, _ = train_model(model, X_train, y_train, X_val, y_val, learning, batch, epochs)
            final_val_losses.append(val_losses[-1])

          #Compute the mean across the 3 runs
          mean_val_loss = np.mean(final_val_losses)
          results[sensor_id][configs] = mean_val_loss
          print(f"Config: {configs} Mean Validation Loss: {mean_val_loss:.4f}")

    overall_duration = time.time() - overall_start_time
    print(f"\nTotal Experiment Time: {overall_duration:.2f} seconds")
    return results

In [18]:
#Main Function
if __name__ == '__main__':
    #file_path = "C:/Users/acer/OneDrive/Desktop/Masters/CSC5025Z - Intelligent Systems/Assignments/metr-la.csv"
    file_path = 'metr-la.csv'
    epochs = 50
    window_size = 12
    runs = 3

    sensor_cols = {
        773869: 1,
        767541: 2,
        767542: 3
    }

    #The selected six configurations
    chosen_configs = [
    (0.0001, 64, 64),
    (0.001, 64, 64),
    (0.0001, 64, 80),
    (0.001, 128, 16),
    (0.0001, 128, 64),
    (0.001, 64, 32)
    ]


    experiment_chosen_configs(file_path, chosen_configs,sensor_cols, epochs, window_size,runs)

Starting experiment...

Sensor ID: 773869
Config: learning=0.0001_batch=64_hidden=64 Mean Validation Loss: 0.0797
Config: learning=0.001_batch=64_hidden=64 Mean Validation Loss: 0.0787
Config: learning=0.0001_batch=64_hidden=80 Mean Validation Loss: 0.0795
Config: learning=0.001_batch=128_hidden=16 Mean Validation Loss: 0.0790
Config: learning=0.0001_batch=128_hidden=64 Mean Validation Loss: 0.0801
Config: learning=0.001_batch=64_hidden=32 Mean Validation Loss: 0.0797

Sensor ID: 767541
Config: learning=0.0001_batch=64_hidden=64 Mean Validation Loss: 0.1253
Config: learning=0.001_batch=64_hidden=64 Mean Validation Loss: 0.1287
Config: learning=0.0001_batch=64_hidden=80 Mean Validation Loss: 0.1253
Config: learning=0.001_batch=128_hidden=16 Mean Validation Loss: 0.1264
Config: learning=0.0001_batch=128_hidden=64 Mean Validation Loss: 0.1265
Config: learning=0.001_batch=64_hidden=32 Mean Validation Loss: 0.1295

Sensor ID: 767542
Config: learning=0.0001_batch=64_hidden=64 Mean Validation