In [630]:
import pandas as pd
import seaborn as sns
import numpy as np
import torch
import torch.nn as nn
import torch.nn.functional as F
import warnings
warnings.filterwarnings('ignore')

In [631]:
## Including data 

new_x_train = pd.read_csv('train.csv')
new_y_train = new_x_train.Dep_Var
new_x_train=new_x_train.drop(['Unnamed: 0','Date','Dep_Var','Identifier','quarter'],axis=1)
new_x_val = pd.read_csv('validation.csv')
new_y_val = new_x_val.Dep_Var
new_x_val=new_x_val.drop(['Unnamed: 0','Date','Dep_Var','Identifier','quarter'],axis=1)


In [632]:
##Regression NN

input_pats = torch.tensor(np.array(new_x_train),dtype=torch.float)
output_pats = torch.tensor(np.array(new_y_train),dtype=torch.long)

test_input_pats = torch.tensor(np.array(new_x_val),dtype=torch.float)
test_output_pats = torch.tensor(np.array(new_y_val),dtype=torch.long)

N = input_pats.shape[0]
M = test_input_pats.shape[0]

In [633]:
class Net(nn.Module):

    def __init__(self):
        super(Net, self).__init__()
        self.fc1 = nn.Linear(input_pats.shape[1], 64)
        self.fc2 = nn.Linear(64, 32)
        self.fc3 = nn.Linear(32, 20)
        self.elu = torch.nn.LeakyReLU()
        self.dropout1 = torch.nn.Dropout(0.2)
        self.dropout2 = torch.nn.Dropout(0.2)
        self.batchnorm1 = torch.nn.BatchNorm1d(64,affine=False)
        self.batchnorm2 = torch.nn.BatchNorm1d(32,affine=False)
        self.final_activation = torch.nn.Softmax()

    def forward(self, x):
        x = x.view(-1, input_pats.shape[1])
        x = self.elu(self.fc1(x))
        x = self.dropout1(x)
        x = self.batchnorm1(x)
        x = self.elu(self.fc2(x))
        x = self.dropout2(x)
        x = self.batchnorm2(x)
        x = self.final_activation(self.fc3(x))
        return x.view(-1,20)


net = Net()
print(net)

Net(
  (fc1): Linear(in_features=106, out_features=64, bias=True)
  (fc2): Linear(in_features=64, out_features=32, bias=True)
  (fc3): Linear(in_features=32, out_features=20, bias=True)
  (elu): LeakyReLU(negative_slope=0.01)
  (dropout1): Dropout(p=0.2)
  (dropout2): Dropout(p=0.2)
  (batchnorm1): BatchNorm1d(64, eps=1e-05, momentum=0.1, affine=False, track_running_stats=True)
  (batchnorm2): BatchNorm1d(32, eps=1e-05, momentum=0.1, affine=False, track_running_stats=True)
  (final_activation): Softmax()
)


In [634]:
def batch_train(mynet,epoch_count,nepochs,batch_size=16):
    for e in range(nepochs):
        mynet.train()
        error_epoch = 0
        permutation = torch.randperm(N)
        for i in range(0,N,batch_size):
            mynet.zero_grad()
            indices = permutation[i:i+batch_size]
            
            batch_x, batch_y = input_pats[indices], output_pats[indices]
            output = mynet(batch_x) 
            target = batch_y
            loss = criterion(output, target) 
            loss.backward() 
            optimizer.step()
            error_epoch += loss.item()
        with torch.no_grad():
            mynet.eval()
            val_loss = 0
            for t in range(M):
                output = mynet(test_input_pats[t]) 
                target = test_output_pats[t]
                val_loss += criterion(output, target.view(-1)) 
            val_loss /= float(M)
            scheduler.step(val_loss)
        error_epoch = error_epoch / float(N)        
        print('epoch ' + str(e) +' loss ' + str(round(error_epoch,3)) + ' val loss ' + str(round(val_loss.item(),3)))


In [636]:
learning_rate = 0.01
criterion = nn.CrossEntropyLoss()
mynet = Net()
optimizer = torch.optim.Adam(mynet.parameters(), lr=learning_rate)
scheduler = torch.optim.lr_scheduler.ReduceLROnPlateau(optimizer, 'min',patience=1)
nepochs = 2
batch_train(mynet,scheduler,nepochs)

epoch 0 loss 0.186 val loss 2.976
epoch 1 loss 0.186 val loss 2.976


In [637]:
test_set = test_input_pats
T = len(test_set)
with torch.no_grad():
    pred_val = mynet(test_set)
final_pred = pred_val.numpy()
final_pred = [np.argmax(f) for f in final_pred]
from collections import Counter
print(Counter(final_pred))
stats.spearmanr(final_pred,new_y_val)

Counter({12: 2702, 3: 2006, 0: 1490, 10: 1439, 8: 1257, 1: 1244, 18: 958, 13: 402, 16: 400, 11: 377, 2: 59, 14: 35, 7: 33, 4: 6, 19: 5, 9: 5, 5: 2, 6: 1})


SpearmanrResult(correlation=0.03393905294663558, pvalue=0.00015482430183892505)

In [645]:
##Classification NN


input_pats = torch.tensor(np.array(new_x_train),dtype=torch.float)
output_pats = torch.tensor(np.array(new_y_train),dtype=torch.float)

test_input_pats = torch.tensor(np.array(new_x_val),dtype=torch.float)
test_output_pats = torch.tensor(np.array(new_y_val),dtype=torch.float)

N = input_pats.shape[0]
M = test_input_pats.shape[0]

In [646]:
class Net(nn.Module):

    def __init__(self):
        super(Net, self).__init__()
        self.fc1 = nn.Linear(input_pats.shape[1], 64)
        self.fc2 = nn.Linear(64, 32)
        self.fc3 = nn.Linear(32, 16)
        self.fc4 = nn.Linear(16, 1)
        self.elu = torch.nn.LeakyReLU()
        self.dropout1 = torch.nn.Dropout(0.2)
        self.dropout2 = torch.nn.Dropout(0.2)
        self.dropout3 = torch.nn.Dropout(0.2)
        self.batchnorm1 = torch.nn.BatchNorm1d(64,affine=False)
        self.batchnorm2 = torch.nn.BatchNorm1d(32,affine=False)
        self.batchnorm3 = torch.nn.BatchNorm1d(16)
        self.final_activation = torch.nn.Softplus()

    def forward(self, x):
        x = x.view(-1, input_pats.shape[1])
        x = self.elu(self.fc1(x))
        x = self.dropout1(x)
        x = self.batchnorm1(x)
        x = self.elu(self.fc2(x))
        x = self.dropout2(x)
        x = self.batchnorm2(x)
        x = self.fc3(x)
        x = self.dropout3(x)
        x = self.batchnorm3(x)
        x = self.final_activation(self.fc4(x))
        return x.view(-1)


net = Net()
print(net)

def batch_train(mynet,epoch_count,nepochs,batch_size=16):
    for e in range(nepochs):
        mynet.train()
        error_epoch = 0
        permutation = torch.randperm(N)
        for i in range(0,N,batch_size):
            mynet.zero_grad()
            indices = permutation[i:i+batch_size]
            batch_x, batch_y = input_pats[indices], output_pats[indices]
            output = mynet(batch_x) 
            target = batch_y
            loss = criterion(output, target) 
            loss.backward() 
            optimizer.step()
            error_epoch += loss.item()
        with torch.no_grad():
            mynet.eval()
            val_loss = 0
            for t in range(M):
                output = mynet(test_input_pats[t]) 
                target = test_output_pats[t]
                val_loss += criterion(output, target.view(-1)) 
            val_loss /= float(M)
            scheduler.step(val_loss)
        error_epoch = error_epoch / float(N)        
        print('epoch ' + str(e) +' loss ' + str(round(error_epoch,3)) + ' val loss ' + str(round(val_loss.item(),3)))


Net(
  (fc1): Linear(in_features=106, out_features=64, bias=True)
  (fc2): Linear(in_features=64, out_features=32, bias=True)
  (fc3): Linear(in_features=32, out_features=16, bias=True)
  (fc4): Linear(in_features=16, out_features=1, bias=True)
  (elu): LeakyReLU(negative_slope=0.01)
  (dropout1): Dropout(p=0.2)
  (dropout2): Dropout(p=0.2)
  (dropout3): Dropout(p=0.2)
  (batchnorm1): BatchNorm1d(64, eps=1e-05, momentum=0.1, affine=False, track_running_stats=True)
  (batchnorm2): BatchNorm1d(32, eps=1e-05, momentum=0.1, affine=False, track_running_stats=True)
  (batchnorm3): BatchNorm1d(16, eps=1e-05, momentum=0.1, affine=True, track_running_stats=True)
  (final_activation): Softplus(beta=1, threshold=20)
)


In [647]:
learning_rate = 0.01
criterion = nn.L1Loss()
mynet = Net()
optimizer = torch.optim.Adam(mynet.parameters(), lr=learning_rate)
scheduler = torch.optim.lr_scheduler.ReduceLROnPlateau(optimizer, 'min',patience=1)
nepochs = 10
batch_train(mynet,scheduler,nepochs)

epoch 0 loss 0.316 val loss 4.954
epoch 1 loss 0.308 val loss 4.974
epoch 2 loss 0.305 val loss 4.976
epoch 3 loss 0.302 val loss 5.007
epoch 4 loss 0.301 val loss 5.017
epoch 5 loss 0.3 val loss 5.005
epoch 6 loss 0.3 val loss 5.01
epoch 7 loss 0.3 val loss 4.999
epoch 8 loss 0.3 val loss 4.99
epoch 9 loss 0.299 val loss 5.024


In [649]:
test_set = test_input_pats
T = len(test_set)
with torch.no_grad():
    pred_val = mynet(test_set)
ranked_pred=pred_val.numpy().argsort()
bins = [i for i in np.arange(0,T,T/20)]
bins.append(T)

final_pred = []
for r in ranked_pred:
    for i in range(20):
        if bins[i+1]>r:
            final_pred.append(i)
            break
from collections import Counter
print(Counter(final_pred))
from scipy import stats
stats.spearmanr(final_pred,new_y_val)

Counter({0: 622, 2: 621, 3: 621, 12: 621, 6: 621, 15: 621, 9: 621, 4: 621, 17: 621, 18: 621, 13: 621, 7: 621, 16: 621, 19: 621, 10: 621, 14: 621, 1: 621, 11: 621, 8: 621, 5: 621})


SpearmanrResult(correlation=-0.009962479078470267, pvalue=0.2669001359592835)