In [None]:
import torch
import pandas as pd
import numpy as np

In [None]:
torch.manual_seed(345)

In [None]:
df = pd.read_csv("~/data/dataset/processed.csv")
df['NOX'] = pd.to_numeric(df['NOX'], errors="coerce")
df = df.loc[df['NOX'] >= 0]
df

In [None]:
all_data = df.to_numpy(dtype="float32")
all_data

In [None]:
class CustomDataset(torch.utils.data.Dataset):
  def __init__(self, all_data):
    x_weather = all_data[:,2:7]
    x_temporal = all_data[:,7:]
    y = all_data[:,1:2]

    # normalize first 5 columns of x manually
    # for temperature data, convert celcius to kelvin, then max scaling
    # for relative humidity data, scale to 100
    x_weather_temp = x_weather[:, :3]
    x_weather_temp += 273.15
    x_weather_temp = x_weather_temp / x_weather_temp.max(axis=0)
    x_weather_rh = x_weather[:, 3:] / 100
    x_weather = np.hstack((x_weather_temp, x_weather_rh))

    # normalize y by max scaling
    y = y / y.max(axis=0)

    self.x = np.hstack((x_weather, x_temporal))
    self.y = y

  def __len__(self):
    return self.x.shape[0]
  
  def __getitem__(self, index):
    x = self.x[index]
    y = self.y[index]

    return torch.Tensor(x), torch.Tensor(y)

In [None]:
dataset = CustomDataset(all_data)
dataset

In [None]:
train_size = int(0.9 * len(dataset))
test_size = len(dataset) - train_size
train_dataset, test_dataset = torch.utils.data.random_split(dataset, [train_size, test_size])

In [None]:
train_dataloader = torch.utils.data.DataLoader(train_dataset, batch_size=64, shuffle=True)
test_dataloader = torch.utils.data.DataLoader(test_dataset, batch_size=64, shuffle=True)

In [None]:
class DNNModel(torch.nn.Module):
  def __init__(self):
    super().__init__()
    self.temporal = torch.nn.Sequential(
      torch.nn.Linear(12 + 31 + 24, 128),
      torch.nn.ReLU(),
      torch.nn.Linear(128, 32),
      torch.nn.ReLU(),
      torch.nn.Linear(32, 16),
    )
    self.weather = torch.nn.Sequential(
      torch.nn.Linear(5, 20),
      torch.nn.ReLU(),
      torch.nn.Linear(20, 10),
      torch.nn.ReLU(),
      torch.nn.Linear(10, 5),
    )
    self.combine = torch.nn.Sequential(
      torch.nn.Linear(21, 8),
      torch.nn.ReLU(),
      torch.nn.Linear(8, 1)
    )
  
  def forward(self, x):
    x_temporal = x[:, 5:]
    x_weather = x[:, :5]
    x_temporal = self.temporal(x_temporal)
    x_weather = self.weather(x_weather)
    x_combine = torch.cat((x_temporal, x_weather), 1)
    x_combine = self.combine(x_combine)
    return x_combine

In [None]:
dnn_model = DNNModel()

In [None]:
learning_rate = 1e-3
batch_size = 64
l2_weight_decay = 1e-5

In [None]:
loss_fn = torch.nn.MSELoss()
optimizer = torch.optim.Adam(dnn_model.parameters(), learning_rate, weight_decay=l2_weight_decay)

In [None]:
def train_loop(dataloader, model, loss_fn, optimizer):
    size = len(dataloader.dataset)
    # Set the model to training mode - important for batch normalization and dropout layers
    # Unnecessary in this situation but added for best practices
    model.train()
    for batch, (X, y) in enumerate(dataloader):
        # Compute prediction and loss
        pred = model(X)
        loss = loss_fn(pred, y)

        # Backpropagation
        optimizer.zero_grad()
        loss.backward()
        optimizer.step()

        if batch % 100 == 0:
            loss, current = loss.item(), (batch + 1) * len(X)
            print(f"loss: {loss:>7f}  [{current:>5d}/{size:>5d}]")

def test_loop(dataloader, model, loss_fn, permitted_test_error):
    # Set the model to evaluation mode - important for batch normalization and dropout layers
    # Unnecessary in this situation but added for best practices
    model.eval()
    size = len(dataloader.dataset)
    num_batches = len(dataloader)
    test_loss, correct = 0, 0

    # Evaluating the model with torch.no_grad() ensures that no gradients are computed during test mode
    # also serves to reduce unnecessary gradient computations and memory usage for tensors with requires_grad=True
    with torch.no_grad():
        for X, y in dataloader:
            pred = model(X)
            test_loss += loss_fn(pred, y).item()
            correct += (torch.abs(pred - y) <= permitted_test_error * y).float().sum().item()

    test_loss /= num_batches
    correct /= size
    print(f"Test Error: \n Accuracy: {(100*correct):>0.1f}%, Avg loss: {test_loss:>8f} \n")

In [None]:
epochs = 20
permitted_test_error = 0.20 # prediction is considered to be correct if within 20% of truth
for t in range(epochs):
    print(f"Epoch {t+1}\n-------------------------------")
    train_loop(train_dataloader, dnn_model, loss_fn, optimizer)
    test_loop(test_dataloader, dnn_model, loss_fn, permitted_test_error)
print("Done!")