<a href="https://colab.research.google.com/github/jacobdwatters/NIOSH-Project/blob/main/NeuralNetwork.ipynb" target="_parent"><img src="https://colab.research.google.com/assets/colab-badge.svg" alt="Open In Colab"/></a>

In [1]:
import numpy as np
import pandas as pd
import seaborn as sns
import matplotlib.pyplot as plt
import torch
from torch import nn, tensor
from torch.optim import SGD
from torch.utils.data import DataLoader, Dataset
from datetime import datetime
from sklearn.preprocessing import StandardScaler, OneHotEncoder, PowerTransformer
from sklearn.model_selection import train_test_split
from violation_common import get_processed_violation_data, scale_and_encode

from torchmetrics import R2Score

pd.set_option('display.max_rows', 100)
pd.set_option('display.max_columns', 100)
pd.set_option('display.width', 1000)

In [2]:
#from google.colab import drive
#drive.mount('/content/gdrive')

In [2]:
violation_data = get_processed_violation_data()

In [3]:
(X_pre, y_pre), (scaler, ohe, target_scaler) = scale_and_encode(violation_data, target_method=PowerTransformer)

print('Features shape:', X_pre.shape)

Features shape: (2757866, 24)


In [4]:
X_train, X_test, y_train, y_test = train_test_split(X_pre, y_pre, test_size = 0.25, random_state = 0)

X_train = X_train.astype(np.float32)
X_test = X_test.astype(np.float32)
y_train = y_train.astype(np.float32)
y_test = y_test.astype(np.float32)

class ViolationDataset(Dataset):
    def __init__(self, features, targets):
        self.features = tensor(features)
        self.targets = tensor(targets)
    
    def __len__(self):
        return len(self.targets)
      
    def __getitem__(self, index):
        return self.features[index], self.targets[index]

train_dataset = ViolationDataset(X_train, y_train)
test_dataset = ViolationDataset(X_test, y_test)

train_loader = DataLoader(train_dataset, batch_size=8, shuffle=True)
test_loader = DataLoader(test_dataset, batch_size=8, shuffle=True)

In [5]:
device = "cuda" if torch.cuda.is_available() else "cpu"
print(f"Using {device} device")

Using cpu device


In [6]:
class NeuralNetwork(nn.Module):
    def __init__(self):
        super(NeuralNetwork, self).__init__()
        self.linear_relu_stack = nn.Sequential(
            nn.Linear(24, 12),
            nn.ReLU(),
            nn.Linear(12, 1)
        )

    def forward(self, x):
        logits = self.linear_relu_stack(x)
        return logits

In [7]:
model = NeuralNetwork().to(device)
print(model)

NeuralNetwork(
  (linear_relu_stack): Sequential(
    (0): Linear(in_features=24, out_features=12, bias=True)
    (1): ReLU()
    (2): Linear(in_features=12, out_features=1, bias=True)
  )
)


In [None]:
criterion = nn.MSELoss()
optimizer = SGD(model.parameters(), lr=0.0001, momentum=0.9)
try:
    model.train()
    for epoch in range(2):  # loop over the dataset multiple times
        running_loss = 0.0
        for i, data in enumerate(train_loader, 0):
            # get the inputs; data is a list of [inputs, labels]
            inputs, labels = data

            # zero the parameter gradients
            optimizer.zero_grad()

            # forward + backward + optimize
            outputs = model(inputs)
            loss = criterion(outputs, labels)
            loss.backward()
            optimizer.step()

            # print statistics
            running_loss += loss.item()
            if i % 2000 == 1999:    # print every 2000 mini-batches
                print(f'[{epoch + 1}, {i + 1:5d}] loss: {running_loss / 2000:.3f}')
                running_loss = 0.0
except KeyboardInterrupt:
    pass

model.eval()
print('Finished Training')

In [31]:
y_pred = model(tensor(X_test)).detach().numpy()
R2Score()(tensor(target_scaler.transform(np.exp(target_scaler.inverse_transform(y_pred)))),
tensor(target_scaler.transform(np.exp(target_scaler.inverse_transform(y_test)))))



tensor(0.4133)