In [1]:
import numpy as np
import pandas as pd
import matplotlib.pyplot as plt
%matplotlib inline
import torch
import torch.nn as nn
import torch.nn.functional as F
from torchvision import datasets, transforms
from sklearn.metrics import roc_auc_score
from collections import OrderedDict 

In [2]:
# Seed random number generation
torch.manual_seed(62)
np.random.seed(62)

# Neural Net Model
### Fake Deep - CMS 155
I will first import the data.

In [3]:
df_train = pd.read_csv("./datasets/train_freq-imp_iqr-outliers_label-enc.zip")
df_train.head(5)

Unnamed: 0,id,LATITUDE,LONGITUDE,STATE,DISCOVERY_TIME,FIRE_SIZE,FIPS_NAME,FIPS_CODE,SOURCE_REPORTING_UNIT_NAME,DATE,LABEL
0,1,33.8131,-85.1043,1,1115.0,1.17,82,143.0,65,0,4
1,2,32.201,-82.4987,1,1600.0,0.07,129,209.0,65,0,2
2,3,32.5093,-81.7086,1,1215.0,4.4,19,31.0,65,0,4
3,4,33.663889,-116.171944,0,1400.0,0.2,153,65.0,13,0,2
4,5,33.1667,-116.6342,0,1330.0,5.0,153,65.0,140,0,2


In [4]:
df_test = pd.read_csv("./datasets/test_freq-imp_label-enc.zip")
df_test.head(5)

Unnamed: 0,id,LATITUDE,LONGITUDE,STATE,DISCOVERY_TIME,FIRE_SIZE,FIPS_NAME,FIPS_CODE,SOURCE_REPORTING_UNIT_NAME,DATE
0,285382,34.346944,-117.442222,0,1605.0,0.2,158,71.0,145,0
1,285383,34.02039,-116.17997,0,1545.0,0.1,154,65.0,69,0
2,285384,38.068611,-120.276667,0,1200.0,0.1,196,109.0,170,0
3,285385,32.499971,-83.742573,1,1500.0,0.4,87,153.0,47,1
4,285386,32.92494,-114.99253,0,126.0,0.1,89,25.0,18,1


Here I separate the data randomly into training and testing sets, with a 75/25 split. 

In [5]:
D = len(df_train.columns[1:-1])
N_total = len(df_train)
N = int(0.75 * N_total)
N_test = N_total - N
train_indices = np.random.choice(list(range(N_total)), size=N, replace=False)
X_predict = df_test.to_numpy(dtype = float)[:, 1:]
X_train = np.zeros([N, D], dtype = float)
Y_train = np.zeros(N, dtype = int)
X_test = np.zeros([N_test, D], dtype = float)
Y_test = np.zeros(N_test, dtype = int)
j = 0
k = 0
for i in range(len(df_train)):
    if i in train_indices:
        X_train[j, :] = df_train.iloc[i, 1:-1]
        Y_train[j] = df_train.iloc[i, -1]
        j += 1
    else:
        X_test[k, :] = df_train.iloc[i, 1:-1]
        Y_test[k] = df_train.iloc[i, -1]
        k += 1

Next I will apply a basic normalization to each column. I am careful to normalize everything using only the training data. 

In [6]:
for i in range(D):
    mu = np.mean(X_train[:, i])
    stddev = np.std(X_train[:, i])
    X_train[:, i] = (X_train[:, i] - mu ) / stddev
    X_test[:, i] = (X_test[:, i] - mu ) / stddev
    X_predict[:, i] = (X_predict[:, i] - mu ) / stddev

# We require Y_train and Y_test to be from 0-3, not 1-4
Y_train = Y_train - 1
Y_test = Y_test - 1

I actually need to onehot encode the labels to the data set. In effect, my neural net will have 4 output units and I want the labels to emulate this. 

In [7]:
C = len(np.unique(Y_train))
Y_train_oh = np.zeros([len(Y_train), C])
Y_test_oh = np.zeros([len(Y_test), C])
for i in range(len(Y_train)):
    y = Y_train[i] - 1
    Y_train_oh[i, y] = 1
for i in range(len(Y_test)):
    y = Y_test[i] - 1
    Y_test_oh[i, y] = 1

I need to write a dataset class in order to use this set with pytorch. This is totally barebones, but I don't need to worry about streaming the dataset off the hard drive to multiple cores, since I have the memory to just store the entire dataset on each core. 

In [8]:
class Dataset(torch.utils.data.Dataset):
    "Dataset object for pytorch."
    def __init__(self, X, Y):
        'Initialization'
        self.Y = Y.astype(float)
        self.X = X.astype(float)

    def __len__(self):
        'Denotes the total number of samples'
        return len(self.Y)

    def __getitem__(self, index):
        'Generates one sample of data'
        # Load data and get label
        x = self.X[index]
        y = self.Y[index]
        return x, y

Let's use this class to actually construct dataset objects. 

In [9]:
train_dataset = Dataset(X_train, Y_train_oh)
test_dataset = Dataset(X_test, Y_test_oh)
train_loader = torch.utils.data.DataLoader(train_dataset, batch_size=1024, shuffle=True)
test_loader = torch.utils.data.DataLoader(test_dataset, batch_size=1024, shuffle=True) 

I will use my GPU to try and speed up the neural net. 

In [10]:
device = torch.device('cuda') if torch.cuda.is_available() else torch.device('cpu')
# device = torch.device('cpu')
# When you are on a CUDA machine, this should print a CUDA device:
print(device)

cuda


I'll take a first stab at the model architecture. 

In [11]:
model = nn.Sequential(
    nn.Linear(D, 100),
    nn.Linear(100, 500),
    nn.ReLU(),
    nn.Dropout(0.2),
    nn.Linear(500, 500),
    nn.ReLU(),
    nn.Dropout(0.2),
    nn.Linear(500, 100),
    nn.ReLU(),
    nn.Dropout(0.2),
    nn.Linear(100, C),
    nn.Softmax(dim=1)
)
optimizer = torch.optim.Adam(model.parameters(), lr=float(1e-4))
loss_fn = nn.SmoothL1Loss()
print(model)

Sequential(
  (0): Linear(in_features=9, out_features=100, bias=True)
  (1): Linear(in_features=100, out_features=500, bias=True)
  (2): ReLU()
  (3): Dropout(p=0.2, inplace=False)
  (4): Linear(in_features=500, out_features=500, bias=True)
  (5): ReLU()
  (6): Dropout(p=0.2, inplace=False)
  (7): Linear(in_features=500, out_features=100, bias=True)
  (8): ReLU()
  (9): Dropout(p=0.2, inplace=False)
  (10): Linear(in_features=100, out_features=4, bias=True)
  (11): Softmax(dim=1)
)


I will import some helper functions that I wrote in problem set 4. 

In [12]:
def train_model():
    
    model.to(device)
    model.train()

    for epoch in range(12):
        for batch_idx, (data, target) in enumerate(train_loader):
            data, target = data.to(device), target.to(device)
            # Erase accumulated gradients
            optimizer.zero_grad()
            
            # Forward pass
            output = model(data.float())

            # Calculate loss
            loss = loss_fn(output, target.float())

            # Backward pass
            loss.backward()

            # Weight update
            optimizer.step()

        # Track loss each epoch
        print('Train Epoch: %d  Loss: %.4f' % (epoch + 1,  loss.item()))

def get_train_err():
    model.eval()
    correct = 0
    train_error = 0
    train_loss = 0
    # Turning off automatic differentiation
    with torch.no_grad():
        for data, target in train_loader:
            data, target = data.to(device), target.to(device)
            output = model(data.float())
            train_loss += loss_fn(output, target).item() * len(target) # Sum up batch loss
            pred = output.argmax(dim=1, keepdim=False).cpu().numpy()  # Get the index of the max class score
            
            # Convert the target back from onehot encoding
            target = target.cpu().numpy()
            target = target[:, 1] + target[:, 2] * 2 + target[:, 3] * 3
            
            # Determine the accuracy of the classification
            correct += np.sum(pred==target)
            train_error += roc_auc_score(target, 
                                         output.cpu(), 
                                         multi_class='ovr') * len(target)
            
    train_loss /= len(train_loader.dataset)
    train_error /= len(train_loader.dataset)
    print("Average Training ROC AUC: %.3f"%train_error)
    print('Training set: Average loss: %.4f, Accuracy: %d/%d (%.4f)' %
          (train_loss, correct, len(train_loader.dataset),
           100. * correct / len(train_loader.dataset)))
    
def get_test_err():
    # Putting layers like Dropout into evaluation mode
    model.eval()

    test_loss = 0
    correct = 0
    test_error = 0

    # Turning off automatic differentiation
    with torch.no_grad():
        for data, target in test_loader:
            data, target = data.to(device), target.to(device)
            output = model(data.float())
            test_loss += loss_fn(output, target).item() * len(target)  # Sum up batch loss
            pred = output.argmax(dim=1, keepdim=False).cpu().numpy()  # Get the index of the max class score
            
            # Convert the target back from onehot encoding
            target = target.cpu().numpy()
            target = target[:, 1] + target[:, 2] * 2 + target[:, 3] * 3
            
            # Determine the accuracy of the classification
            correct += np.sum(pred==target)
            test_error += roc_auc_score(target, 
                                        output.cpu(), 
                                        multi_class='ovr') * len(target)

    test_loss /= len(test_loader.dataset)
    test_error /= len(test_loader.dataset)
    print("Average Testing ROC AUC: %.3f"%test_error)
    print('Test set: Average loss: %.4f, Accuracy: %d/%d (%.4f)' %
          (test_loss, correct, len(test_loader.dataset),
           100. * correct / len(test_loader.dataset)))

In [13]:
train_model()
get_train_err()
get_test_err()

Train Epoch: 1  Loss: 0.0661
Train Epoch: 2  Loss: 0.0675
Train Epoch: 3  Loss: 0.0686
Train Epoch: 4  Loss: 0.0638
Train Epoch: 5  Loss: 0.0663
Train Epoch: 6  Loss: 0.0695
Train Epoch: 7  Loss: 0.0639
Train Epoch: 8  Loss: 0.0685
Train Epoch: 9  Loss: 0.0647
Train Epoch: 10  Loss: 0.0616
Train Epoch: 11  Loss: 0.0625
Train Epoch: 12  Loss: 0.0636
Average Training ROC AUC: 0.762
Training set: Average loss: 0.0634, Accuracy: 125764/200395 (62.7581)
Average Testing ROC AUC: 0.761
Test set: Average loss: 0.0637, Accuracy: 41794/66799 (62.5668)


I will now try constructing and testing an **even deeper** neural net. 

In [14]:
height = 5000
narrow = 100
drop = 0.05
depth = 4
ordict = OrderedDict()
ordict['start'] = nn.Linear(D, narrow)
ordict['expand1'] = nn.Linear(narrow, height)
ordict['expand2'] = nn.ReLU()
ordict['expand3'] = nn.Dropout(drop)

# Construct the bulk of the net
for i in range(depth):
    ordict['1-%i'%i] = nn.Linear(height, height)
    ordict['2-%i'%i] = nn.ReLU()
    ordict['3-%i'%i] = nn.Dropout(drop)
    
# Narrow the net and bring it down to the last few nodes
ordict['narrow1'] = nn.Linear(height, narrow)
ordict['narrow2'] = nn.ReLU()
ordict['narrow3'] = nn.Dropout(drop)
ordict['final-1'] = nn.Linear(narrow, C)
ordict['final-2'] = nn.Softmax(dim=1)

# Pack all the layers into the model
model = nn.Sequential(ordict)

optimizer = torch.optim.Adam(model.parameters(), lr=float(1e-4))
loss_fn = nn.SmoothL1Loss()
print(model)

Sequential(
  (start): Linear(in_features=9, out_features=100, bias=True)
  (expand1): Linear(in_features=100, out_features=5000, bias=True)
  (expand2): ReLU()
  (expand3): Dropout(p=0.05, inplace=False)
  (1-0): Linear(in_features=5000, out_features=5000, bias=True)
  (2-0): ReLU()
  (3-0): Dropout(p=0.05, inplace=False)
  (1-1): Linear(in_features=5000, out_features=5000, bias=True)
  (2-1): ReLU()
  (3-1): Dropout(p=0.05, inplace=False)
  (1-2): Linear(in_features=5000, out_features=5000, bias=True)
  (2-2): ReLU()
  (3-2): Dropout(p=0.05, inplace=False)
  (1-3): Linear(in_features=5000, out_features=5000, bias=True)
  (2-3): ReLU()
  (3-3): Dropout(p=0.05, inplace=False)
  (narrow1): Linear(in_features=5000, out_features=100, bias=True)
  (narrow2): ReLU()
  (narrow3): Dropout(p=0.05, inplace=False)
  (final-1): Linear(in_features=100, out_features=4, bias=True)
  (final-2): Softmax(dim=1)
)


In [15]:
train_model()
get_train_err()
get_test_err()

Train Epoch: 1  Loss: 0.0625
Train Epoch: 2  Loss: 0.0669
Train Epoch: 3  Loss: 0.0651
Train Epoch: 4  Loss: 0.0655
Train Epoch: 5  Loss: 0.0647
Train Epoch: 6  Loss: 0.0632
Train Epoch: 7  Loss: 0.0616
Train Epoch: 8  Loss: 0.0644
Train Epoch: 9  Loss: 0.0597
Train Epoch: 10  Loss: 0.0634
Train Epoch: 11  Loss: 0.0572
Train Epoch: 12  Loss: 0.0630
Average Training ROC AUC: 0.785
Training set: Average loss: 0.0609, Accuracy: 128575/200395 (64.1608)
Average Testing ROC AUC: 0.781
Test set: Average loss: 0.0618, Accuracy: 42502/66799 (63.6267)


This model looks good. Let's train it on everything. 

In [16]:
Y_oh = np.zeros([N_total, C])
X = np.zeros([N_total, D])
Y_oh[:N, :] = Y_train_oh
Y_oh[N:, :] = Y_test_oh
X[:N, :] = X_train
X[N:, :] = X_test
train_dataset = Dataset(X, Y_oh)
train_loader = torch.utils.data.DataLoader(train_dataset, batch_size=1024, shuffle=True)

# Redefine the model
model = nn.Sequential(ordict)
optimizer = torch.optim.Adam(model.parameters(), lr=float(1e-4))
loss_fn = nn.SmoothL1Loss()

# Retrain the model on all avaliable data and get the training error. 
train_model()
get_train_err()

Train Epoch: 1  Loss: 0.0603
Train Epoch: 2  Loss: 0.0621
Train Epoch: 3  Loss: 0.0582
Train Epoch: 4  Loss: 0.0593
Train Epoch: 5  Loss: 0.0588
Train Epoch: 6  Loss: 0.0654
Train Epoch: 7  Loss: 0.0609
Train Epoch: 8  Loss: 0.0623
Train Epoch: 9  Loss: 0.0614
Train Epoch: 10  Loss: 0.0617
Train Epoch: 11  Loss: 0.0597
Train Epoch: 12  Loss: 0.0552
Average Training ROC AUC: 0.803
Training set: Average loss: 0.0590, Accuracy: 175294/267194 (65.6055)


Now that the model is well trained, I will predict the labels and submit to kaggle. 

In [17]:
pred_data = np.zeros([len(X_predict), 5])
output = 0
with torch.no_grad():
    pred_data[:, 1:] = model(torch.tensor(X_predict).to(device).float()).cpu()
pred_data[:, 0] = df_test["id"].values
pred_df = pd.DataFrame(pred_data, columns = ["id", "P1", "P2", "P3", "P4"])

In [18]:
pred_df = pred_df.astype({'id': 'int'})
pred_df.to_csv("./FakeDeep_Attempt2_NeuralNet.csv", index=False)