In [1]:
import pandas as pd
import numpy as np
import matplotlib.pyplot as plt
import seaborn as sns
from sklearn import preprocessing
from sklearn import datasets, ensemble
from sklearn.inspection import permutation_importance
from sklearn.metrics import mean_squared_error
from sklearn.model_selection import train_test_split
from keras.models import Sequential
from keras.layers import Bidirectional, LSTM, Dropout, Dense

In [2]:
df_train = pd.read_csv('illinois_basing_train.csv')


df_train[df_train.columns[-1]].fillna(0, inplace = True)
y = df_train[df_train.columns[-1]]
df_train.drop(df_train.columns[-1], axis=1, inplace = True)

df_train['Month'] = pd.to_datetime(df_train['SampleTimeUTC']).dt.month
df_train['Day'] = pd.to_datetime(df_train['SampleTimeUTC']).dt.day
df_train['Hour'] = pd.to_datetime(df_train['SampleTimeUTC']).dt.hour
df_train['Year'] = pd.DatetimeIndex(df_train['SampleTimeUTC']).year
df_train.drop('SampleTimeUTC', axis=1, inplace = True)

In [3]:
cols = [i for i in df_train.columns if df_train[i].isnull().any()]
for i in cols:
    df_train[i].fillna(df_train[i].mean(), inplace=True)


In [4]:
# Early stopping (see Module 3.4)
class EarlyStopping():
  def __init__(self, patience=10, min_delta=1e-4, restore_best_weights=True):
    self.patience = patience
    self.min_delta = min_delta
    self.restore_best_weights = restore_best_weights
    self.best_model = None
    self.best_loss = None
    self.counter = 0
    self.status = ""
    
  def __call__(self, model, val_loss):
    if self.best_loss == None:
      self.best_loss = val_loss
      self.best_model = copy.deepcopy(model)
    elif self.best_loss - val_loss > self.min_delta:
      self.best_loss = val_loss
      self.counter = 0
      self.best_model.load_state_dict(model.state_dict())
    elif self.best_loss - val_loss < self.min_delta:
      self.counter += 1
      if self.counter >= self.patience:
        self.status = f"Stopped on {self.counter}"
        if self.restore_best_weights:
          model.load_state_dict(self.best_model.state_dict())
        return True
    self.status = f"{self.counter}/{self.patience}"
    return False

In [5]:
import pandas as pd
from sklearn.model_selection import train_test_split
from sklearn.metrics import accuracy_score

import torch
import torch.nn as nn
import torch.nn.functional as F
import numpy as np
from torch.autograd import Variable
from sklearn import preprocessing
from torch.utils.data import DataLoader, TensorDataset
import tqdm
import time
import copy

# Make use of a GPU or MPS (Apple) if one is available. (see Module 3.2)
device = "mps" if getattr(torch,'has_mps',False) \
    else "cuda" if torch.cuda.is_available() else "cpu"
print(f"Using device: {device}")

# Define the PyTorch Neural Network
class Net(nn.Module):
    def __init__(self, in_count, out_count):
        super(Net, self).__init__()
        self.fc1 = nn.Linear(in_count, 50)
        self.fc2 = nn.Linear(50, 25)
        self.fc3 = nn.Linear(25, out_count)

    def forward(self, x):
        x = F.relu(self.fc1(x))
        x = F.relu(self.fc2(x))
        return self.fc3(x)

x = df_train  
x_train, x_test, y_train, y_test = train_test_split(
    x.values, y.values, test_size=0.1, random_state=42
)

# Numpy to Torch Tensor
x_train = torch.Tensor(x_train).float()
y_train = torch.Tensor(y_train).float()

x_test = torch.Tensor(x_test).float().to(device)
y_test = torch.Tensor(y_test).float().to(device)


# Create datasets
BATCH_SIZE = 16

dataset_train = TensorDataset(x_train, y_train)
dataloader_train = DataLoader(dataset_train,\
  batch_size=BATCH_SIZE, shuffle=True)

dataset_test = TensorDataset(x_test, y_test)
dataloader_test = DataLoader(dataset_test,\
  batch_size=BATCH_SIZE, shuffle=True)


# Create model
model = Net(x.shape[1],1).to(device)

# Define the loss function for regression
loss_fn = nn.MSELoss()

# Define the optimizer
optimizer = torch.optim.Adam(model.parameters())

es = EarlyStopping()

epoch = 0
done = False
while epoch<1000 and not done:
  epoch += 1
  steps = list(enumerate(dataloader_train))
  pbar = tqdm.tqdm(steps)
  model.train()
  for i, (x_batch, y_batch) in pbar:
    y_batch_pred = model(x_batch.to(device)).flatten()
    loss = loss_fn(y_batch_pred, y_batch.to(device))
    optimizer.zero_grad()
    loss.backward()
    optimizer.step()

    loss, current = loss.item(), (i + 1)* len(x_batch)
    if i == len(steps)-1:
      model.eval()
      pred = model(x_test).flatten()
      vloss = loss_fn(pred, y_test)
      if es(model,vloss): done = True
      pbar.set_description(f"Epoch: {epoch}, tloss: {loss}, vloss: {vloss:>7f}, EStop:[{es.status}]")
    else:
      pbar.set_description(f"Epoch: {epoch}, tloss {loss:}")

Using device: cuda


Epoch: 1, tloss: 0.8088574409484863, vloss: 15.331805, EStop:[0/10]: 100%|████████| 1542/1542 [00:06<00:00, 230.79it/s]
Epoch: 2, tloss: 0.025616204366087914, vloss: 14.915764, EStop:[0/10]: 100%|██████| 1542/1542 [00:05<00:00, 294.15it/s]
Epoch: 3, tloss: 0.06818347424268723, vloss: 16.074657, EStop:[1/10]: 100%|███████| 1542/1542 [00:06<00:00, 233.93it/s]
Epoch: 4, tloss: 0.06609426438808441, vloss: 14.749968, EStop:[0/10]: 100%|███████| 1542/1542 [00:06<00:00, 236.16it/s]
Epoch: 5, tloss: 0.029554683715105057, vloss: 14.756166, EStop:[1/10]: 100%|██████| 1542/1542 [00:05<00:00, 263.93it/s]
Epoch: 6, tloss: 0.04469485208392143, vloss: 14.757010, EStop:[2/10]: 100%|███████| 1542/1542 [00:05<00:00, 283.33it/s]
Epoch: 7, tloss: 0.06110917404294014, vloss: 14.771641, EStop:[3/10]: 100%|███████| 1542/1542 [00:05<00:00, 282.32it/s]
Epoch: 8, tloss: 0.09462262690067291, vloss: 14.775132, EStop:[4/10]: 100%|███████| 1542/1542 [00:06<00:00, 228.84it/s]
Epoch: 9, tloss: 0.07244472950696945, vl

In [6]:
df_test = pd.read_csv('illinois_basing_test.csv')
df_test['Month'] = pd.to_datetime(df_test['SampleTimeUTC']).dt.month
df_test['Day'] = pd.to_datetime(df_test['SampleTimeUTC']).dt.day
df_test['Hour'] = pd.to_datetime(df_test['SampleTimeUTC']).dt.hour
df_test['Year'] = pd.DatetimeIndex(df_test['SampleTimeUTC']).year
df_test.drop('SampleTimeUTC', axis=1, inplace = True)
cols = [i for i in df_test.columns if df_test[i].isnull().any()]
for i in cols:
    df_test[i].fillna(df_test[i].mean(), inplace=True)
x_test = df_test.values
x_test = torch.Tensor(x_test).float().to(device)

from sklearn import metrics

# Predict
pred = model(x_test)

In [7]:
pred = pred.cpu().detach().numpy()
preds = pd.DataFrame(pred, columns=['inj_diff\xa0'])
preds.to_csv("basic_nn.csv", index = False)

In [6]:
class ShallowRegressionLSTM(nn.Module):
    def __init__(self, num_sensors, hidden_units):
        super().__init__()
        self.num_sensors = num_sensors  # this is the number of features
        self.hidden_units = hidden_units
        self.num_layers = 1

        self.lstm = nn.LSTM(
            input_size=num_sensors,
            hidden_size=hidden_units,
            batch_first=True,
            num_layers=self.num_layers
        )

        self.linear = nn.Linear(in_features=self.hidden_units, out_features=1)

    def forward(self, x):
        batch_size = x.shape[0]
        h0 = torch.zeros(self.num_layers, batch_size, self.hidden_units).requires_grad_()
        c0 = torch.zeros(self.num_layers, batch_size, self.hidden_units).requires_grad_()
        
        _, (hn, _) = self.lstm(x, (h0, c0))
        out = self.linear(hn[0]).flatten()  # First dim of Hn is num_layers, which is set to 1 above.

        return out
x = df_train  
x_train, x_test, y_train, y_test = train_test_split(
    x.values, y.values, test_size=0.1, random_state=42
)

# Numpy to Torch Tensor
x_train = torch.Tensor(x_train).float()
y_train = torch.Tensor(y_train).float()

x_test = torch.Tensor(x_test).float().to(device)
y_test = torch.Tensor(y_test).float().to(device)


# Create datasets
BATCH_SIZE = 16

dataset_train = TensorDataset(x_train, y_train)
dataloader_train = DataLoader(dataset_train,\
  batch_size=BATCH_SIZE, shuffle=True)

dataset_test = TensorDataset(x_test, y_test)
dataloader_test = DataLoader(dataset_test,\
  batch_size=BATCH_SIZE, shuffle=True)


# Create model
model = Net(x.shape[1],1).to(device)

# Define the loss function for regression
loss_fn = nn.MSELoss()

# Define the optimizer
optimizer = torch.optim.Adam(model.parameters())

es = EarlyStopping()

epoch = 0
done = False
while epoch<1000 and not done:
  epoch += 1
  steps = list(enumerate(dataloader_train))
  pbar = tqdm.tqdm(steps)
  model.train()
  for i, (x_batch, y_batch) in pbar:
    y_batch_pred = model(x_batch.to(device)).flatten()
    loss = loss_fn(y_batch_pred, y_batch.to(device))
    optimizer.zero_grad()
    loss.backward()
    optimizer.step()

    loss, current = loss.item(), (i + 1)* len(x_batch)
    if i == len(steps)-1:
      model.eval()
      pred = model(x_test).flatten()
      vloss = loss_fn(pred, y_test)
      if es(model,vloss): done = True
      pbar.set_description(f"Epoch: {epoch}, tloss: {loss}, vloss: {vloss:>7f}, EStop:[{es.status}]")
    else:
      pbar.set_description(f"Epoch: {epoch}, tloss {loss:}")

Epoch: 1, tloss: 1.0855745077133179, vloss: 21.721550, EStop:[0/50]: 100%|████████| 1542/1542 [00:05<00:00, 297.60it/s]
Epoch: 2, tloss: 146.0690155029297, vloss: 37.612030, EStop:[1/50]: 100%|█████████| 1542/1542 [00:05<00:00, 298.10it/s]
Epoch: 3, tloss: 2.2170002460479736, vloss: 17.593355, EStop:[0/50]: 100%|████████| 1542/1542 [00:05<00:00, 296.89it/s]
Epoch: 4, tloss: 12.683396339416504, vloss: 19.668217, EStop:[1/50]: 100%|████████| 1542/1542 [00:05<00:00, 281.44it/s]
Epoch: 5, tloss: 0.3144833445549011, vloss: 19.195494, EStop:[2/50]: 100%|████████| 1542/1542 [00:05<00:00, 285.05it/s]
Epoch: 6, tloss: 0.007501082960516214, vloss: 15.801548, EStop:[0/50]: 100%|██████| 1542/1542 [00:06<00:00, 252.57it/s]
Epoch: 7, tloss: 0.018034078180789948, vloss: 14.869804, EStop:[0/50]: 100%|██████| 1542/1542 [00:05<00:00, 284.05it/s]
Epoch: 8, tloss: 1.4217360019683838, vloss: 16.011496, EStop:[1/50]: 100%|████████| 1542/1542 [00:05<00:00, 279.65it/s]
Epoch: 9, tloss: 0.030279282480478287, v

In [24]:


    


testing_values = df_test.values
dataset_test = TensorDataset(testing_values)
dataloader_test = DataLoader(dataset_test,\
  batch_size=1, shuffle=True)



predictions = []
model.eval()
for i in dataloader_test:
    with torch.no_grad():
        pred = model(i).to(device)
        predictions.append(pred)


TypeError: 'int' object is not callable