# Deep Learning with Pytorch for the PCA dataset.

In [63]:
import torch
import torch.nn as nn
import torch.nn.functional as F
import torch.optim as optim
from torch.utils.data import TensorDataset, DataLoader
from torch.autograd import Variable

import pandas as pd
import numpy as np
from sklearn.preprocessing import LabelEncoder
from sklearn.model_selection import train_test_split
from sklearn.metrics import accuracy_score

In [2]:
df = pd.read_csv('pcafilewlabels.csv', header = None)

In [3]:
df.head()

Unnamed: 0,0,1,2,3,4,5,6,7,8,9,...,22,23,24,25,26,27,28,29,30,31
0,-0.555856,2.5e-05,0.000419,3.8e-05,-0.000716,-0.000726,0.001239,-0.000194,0.002798,0.012813,...,0.001206,0.000655,1e-06,-7.1e-05,-0.000121,-0.000157,-0.000213,-0.000554,p1,a01
1,-0.55631,2.8e-05,0.000418,3.6e-05,-0.000713,-0.000724,0.001234,-0.000199,0.002731,0.012509,...,0.001149,0.000652,-4.3e-05,-7.6e-05,-0.000117,-0.000103,-0.000173,-0.00049,p1,a01
2,-0.555609,1.8e-05,0.000419,2.1e-05,-0.000703,-0.000718,0.001231,-0.000156,0.002711,0.012504,...,0.000851,0.000605,-0.000239,-1.3e-05,-0.000219,-5.8e-05,-0.000412,-0.000616,p1,a01
3,-0.55599,2.9e-05,0.000417,3.9e-05,-0.000715,-0.000728,0.001247,-0.000215,0.002794,0.012732,...,0.001167,0.000664,-1.9e-05,-3.7e-05,-0.000138,-0.00013,-0.000105,-0.00049,p1,a01
4,-0.555658,1.9e-05,0.000425,2.4e-05,-0.000701,-0.000714,0.001223,-0.000171,0.002714,0.012477,...,0.00102,0.000639,-0.000154,-0.00013,-9.7e-05,-4.8e-05,-0.000214,-0.00056,p1,a01


In [4]:
X2 = df.iloc[:,30:32]
X2 = X2.apply(LabelEncoder().fit_transform)

In [5]:
X_2 = df.iloc[:,0:30].join(X2)
X_2.head()

Unnamed: 0,0,1,2,3,4,5,6,7,8,9,...,22,23,24,25,26,27,28,29,30,31
0,-0.555856,2.5e-05,0.000419,3.8e-05,-0.000716,-0.000726,0.001239,-0.000194,0.002798,0.012813,...,0.001206,0.000655,1e-06,-7.1e-05,-0.000121,-0.000157,-0.000213,-0.000554,0,0
1,-0.55631,2.8e-05,0.000418,3.6e-05,-0.000713,-0.000724,0.001234,-0.000199,0.002731,0.012509,...,0.001149,0.000652,-4.3e-05,-7.6e-05,-0.000117,-0.000103,-0.000173,-0.00049,0,0
2,-0.555609,1.8e-05,0.000419,2.1e-05,-0.000703,-0.000718,0.001231,-0.000156,0.002711,0.012504,...,0.000851,0.000605,-0.000239,-1.3e-05,-0.000219,-5.8e-05,-0.000412,-0.000616,0,0
3,-0.55599,2.9e-05,0.000417,3.9e-05,-0.000715,-0.000728,0.001247,-0.000215,0.002794,0.012732,...,0.001167,0.000664,-1.9e-05,-3.7e-05,-0.000138,-0.00013,-0.000105,-0.00049,0,0
4,-0.555658,1.9e-05,0.000425,2.4e-05,-0.000701,-0.000714,0.001223,-0.000171,0.002714,0.012477,...,0.00102,0.000639,-0.000154,-0.00013,-9.7e-05,-4.8e-05,-0.000214,-0.00056,0,0


In [6]:
X_2[31].value_counts()

15    480
18    480
8     480
16    480
1     480
9     480
17    480
2     480
10    480
3     480
7     480
11    480
4     480
12    480
5     480
13    480
6     480
14    480
0     480
Name: 31, dtype: int64

In [7]:
X_2.iloc[:,0:30].shape

(9120, 30)

Getting all the features in X, and the labels in y.

In [7]:
X = X_2.iloc[:,0:30].as_matrix()
y = X_2[31].as_matrix()

Splitting them into `X_train, X_test, y_train, y_test`

In [8]:
X_train, X_test, y_train, y_test = train_test_split(X, y, test_size=0.25, random_state = 451)

In [17]:
y_pred

array([14, 14, 14, ..., 14, 11, 14])

Converting them into pytorch tensors from numpy arrays, because torch works with tensors.

In [58]:
X_train = torch.from_numpy(X_train)
X_test = torch.from_numpy(X_test)
y_train = torch.from_numpy(y_train)
y_test = torch.from_numpy(y_test)

Converting the training data into `long` data type

In [59]:
y_train = y_train.long()
y_test = y_test.long()

In [95]:
type(y_train)

torch.LongTensor

In [60]:
X_train.shape, X_test.shape, y_train.shape, y_test.shape 

(torch.Size([6840, 30]),
 torch.Size([2280, 30]),
 torch.Size([6840]),
 torch.Size([2280]))

Making the training data ready for the iterations and epochs to occur, according to their `batch_size` and shuffling them as well.

In [61]:
training_samples = TensorDataset(X_train, y_train)
test_samples = TensorDataset(X_test, y_test)

train_loader = DataLoader(training_samples, batch_size=64, shuffle=True)
valid_loader = DataLoader(test_samples, batch_size=64, shuffle=True)

### With Adam Optimizer. 

* Optimizer : Adam 
* Learning Rate : 1e-2
* Number of Hidden layers : 1
* Number of neurons in hidden layers : 100
* Activation function(s) used : ReLU
* Batch size : 64
* Epochs : 500
* Final loss of the last batch / neuron : 0.297
* **Accuracy of the model : 0.8951**

In [108]:
class DynamicNet(torch.nn.Module):
    def __init__(self, D_in, H, D_out):
        """
        In the constructor we construct three nn.Linear instances that we will use
        in the forward pass.
        """

        super(DynamicNet, self).__init__()
        self.input_linear = torch.nn.Linear(D_in, H)
        self.middle_linear = torch.nn.Linear(H, H)
        self.output_linear = torch.nn.Linear(H, D_out)


    def forward(self, x):
        """
        For the forward pass of the model, we randomly choose either 0, 1, 2, or 3
        and reuse the middle_linear Module that many times to compute hidden layer
        representations.

        Since each forward pass builds a dynamic computation graph, we can use normal
        Python control-flow operators like loops or conditional statements when
        defining the forward pass of the model.

        Here we also see that it is perfectly safe to reuse the same Module many
        times when defining a computational graph. This is a big improvement from Lua
        Torch, where each Module could be used only once.
        """
        h_relu = self.input_linear(x).clamp(min=0)
        for _ in range(np.random.randint(0, 3)):
            h_relu = self.middle_linear(h_relu).clamp(min=0)
        y_pred = self.output_linear(h_relu)
        return y_pred
      
# N is batch size; D_in is input dimension;
# H is hidden dimension; D_out is output dimension.
N, D_in, H, D_out = 64, 30, 100, 19

# Construct our model by instantiating the class defined above
model = DynamicNet(D_in, H, D_out)
# making our model operate at double precision.
model = model.double()

# Construct our loss function and an Optimizer. Training this strange model with
# vanilla stochastic gradient descent is tough, so we use momentum
# since it is multiclass classification problem, we are using CrossEntropyLoss, instead of MSEloss
criterion = torch.nn.CrossEntropyLoss()

learning_rate = 1e-2 # alpha
optimizer = torch.optim.Adam(model.parameters(), lr=learning_rate) # Adam optimizers

for epoch in range(500):  # loop over the dataset multiple times

    running_loss = 0.0
    for i, data in enumerate(train_loader, 0):
        # get the inputs
        inputs, labels = data

        # wrap them in Variable
        inputs, labels = Variable(inputs), Variable(labels)

        # zero the parameter gradients
        optimizer.zero_grad()

        # forward + backward + optimize
        outputs = model(inputs)
        loss = criterion(outputs, labels)
        loss.backward()
        optimizer.step()

        # print statistics
        running_loss += loss.data[0]

    if (epoch + 1) % 100 == 0:
        print('Epoch : %d,  loss: %.3f' %
              (epoch + 1, running_loss / 64))
    running_loss = 0.0

print('Finished Training')

Epoch : 100,  loss: 0.568
Epoch : 200,  loss: 0.434
Epoch : 300,  loss: 0.368
Epoch : 400,  loss: 0.342
Epoch : 500,  loss: 0.297
Finished Training


In [109]:

# pass it through the model
X_test_var = Variable(X_test, volatile=True)
scores = model(X_test_var)
_, preds = torch.max(scores, dim=1)

accuracy_score(preds.data.numpy(), y_test.numpy())


0.89517543859649118

### With RMSProp optimizer.
* Optimizer : RMSProp 
* Learning Rate : 1e-2 
* Number of Hidden layers : 1
* Number of neurons in hidden layers : 100
* Activation function(s) used : ReLU
* Batch size : 64
* Epochs : 500
* Final loss of the last batch / neuron : 0.275
* **Accuracy of the model : 0.8877**

In [104]:
class DynamicNet(torch.nn.Module):
    def __init__(self, D_in, H, D_out):

        super(DynamicNet, self).__init__()
        self.input_linear = torch.nn.Linear(D_in, H)
        self.middle_linear = torch.nn.Linear(H, H)
        self.output_linear = torch.nn.Linear(H, D_out)


    def forward(self, x):

        h_relu = self.input_linear(x).clamp(min=0)
        for _ in range(np.random.randint(0, 3)):
            h_relu = self.middle_linear(h_relu).clamp(min=0)
        y_pred = self.output_linear(h_relu)
        return y_pred
      

N, D_in, H, D_out = 64, 30, 100, 19

model = DynamicNet(D_in, H, D_out)
model = model.double()

criterion = torch.nn.CrossEntropyLoss()

learning_rate = 1e-2
optimizer = torch.optim.RMSprop(model.parameters(), lr=learning_rate)

for epoch in range(500):  # loop over the dataset multiple times

    running_loss = 0.0
    for i, data in enumerate(train_loader, 0):
        # get the inputs
        inputs, labels = data

        # wrap them in Variable
        inputs, labels = Variable(inputs), Variable(labels)

        # zero the parameter gradients
        optimizer.zero_grad()

        # forward + backward + optimize
        outputs = model(inputs)
        loss = criterion(outputs, labels)
        loss.backward()
        optimizer.step()

        # print statistics
        running_loss += loss.data[0]
#         if i % 2000 == 1999:    # print every 2000 mini-batches
    if (epoch + 1) % 100 == 0:
        print('Epoch : %d,  loss: %.3f' %
              (epoch + 1, running_loss / 64))
    running_loss = 0.0

print('Finished Training')

Epoch : 100,  loss: 0.549
Epoch : 200,  loss: 0.400
Epoch : 300,  loss: 0.331
Epoch : 400,  loss: 0.292
Epoch : 500,  loss: 0.275
Finished Training


In [105]:

# pass it through the model
X_test_var = Variable(X_test, volatile=True)
scores = model(X_test_var)
_, preds = torch.max(scores, dim=1)

accuracy_score(preds.data.numpy(), y_test.numpy())


0.88771929824561402

### With SGD optimizer.

* Optimizer : SGD 
* Learning Rate : 1e-2
* Number of Hidden layers : 1
* Number of neurons in hidden layers : 100
* Activation function(s) used : ReLU
* Batch size : 64
* Epochs : 5000
* Final loss of the last batch / neuron : 0.940
* **Accuracy : 0.8451**

In [110]:
class DynamicNet(torch.nn.Module):
    def __init__(self, D_in, H, D_out):

        super(DynamicNet, self).__init__()
        self.input_linear = torch.nn.Linear(D_in, H)
        self.middle_linear = torch.nn.Linear(H, H)
        self.output_linear = torch.nn.Linear(H, D_out)


    def forward(self, x):

        h_relu = self.input_linear(x).clamp(min=0)
        for _ in range(np.random.randint(0, 3)):
            h_relu = self.middle_linear(h_relu).clamp(min=0)
        y_pred = self.output_linear(h_relu)
        return y_pred
      

N, D_in, H, D_out = 64, 30, 100, 19

model = DynamicNet(D_in, H, D_out)
model = model.double()

criterion = torch.nn.CrossEntropyLoss()

learning_rate = 1e-2
optimizer = torch.optim.SGD(model.parameters(), lr=learning_rate)

for epoch in range(5000):  # loop over the dataset multiple times

    running_loss = 0.0
    for i, data in enumerate(train_loader, 0):
        # get the inputs
        inputs, labels = data

        # wrap them in Variable
        inputs, labels = Variable(inputs), Variable(labels)

        # zero the parameter gradients
        optimizer.zero_grad()

        # forward + backward + optimize
        outputs = model(inputs)
        loss = criterion(outputs, labels)
        loss.backward()
        optimizer.step()

        # print statistics
        running_loss += loss.data[0]
#         if i % 2000 == 1999:    # print every 2000 mini-batches
    if (epoch + 1) % 100 == 0:
        print('Epoch : %d,  loss: %.3f' %
              (epoch + 1, running_loss / 64))
    running_loss = 0.0

print('Finished Training')

Epoch : 100,  loss: 3.663
Epoch : 200,  loss: 3.420
Epoch : 300,  loss: 3.325
Epoch : 400,  loss: 3.279
Epoch : 500,  loss: 3.243
Epoch : 600,  loss: 3.223
Epoch : 700,  loss: 3.133
Epoch : 800,  loss: 2.928
Epoch : 900,  loss: 2.667
Epoch : 1000,  loss: 2.539
Epoch : 1100,  loss: 2.292
Epoch : 1200,  loss: 2.202
Epoch : 1300,  loss: 1.962
Epoch : 1400,  loss: 2.043
Epoch : 1500,  loss: 1.782
Epoch : 1600,  loss: 1.726
Epoch : 1700,  loss: 1.716
Epoch : 1800,  loss: 1.617
Epoch : 1900,  loss: 1.588
Epoch : 2000,  loss: 1.468
Epoch : 2100,  loss: 1.441
Epoch : 2200,  loss: 1.460
Epoch : 2300,  loss: 1.319
Epoch : 2400,  loss: 1.332
Epoch : 2500,  loss: 1.264
Epoch : 2600,  loss: 1.284
Epoch : 2700,  loss: 1.208
Epoch : 2800,  loss: 1.208
Epoch : 2900,  loss: 1.154
Epoch : 3000,  loss: 1.208
Epoch : 3100,  loss: 1.151
Epoch : 3200,  loss: 1.102
Epoch : 3300,  loss: 1.129
Epoch : 3400,  loss: 1.087
Epoch : 3500,  loss: 1.172
Epoch : 3600,  loss: 1.059
Epoch : 3700,  loss: 1.024
Epoch : 38

In [111]:

# pass it through the model
X_test_var = Variable(X_test, volatile=True)
scores = model(X_test_var)
_, preds = torch.max(scores, dim=1)

accuracy_score(preds.data.numpy(), y_test.numpy())


0.84517543859649125

### With Adam Optimizer , Karpathy constant

* Optimizer : Adam 
* Learning Rate : 3e-4 # karpathy constant
* Number of Hidden layers : 1
* Number of neurons in hidden layers : 100
* Activation function(s) used : ReLU
* Batch size : 64
* Epochs : 500
* Final loss of the last batch / neuron : 0.938
* **Accuracy of the model : 0.8078**

In [90]:
class DynamicNet(torch.nn.Module):
    def __init__(self, D_in, H, D_out):

        super(DynamicNet, self).__init__()
        self.input_linear = torch.nn.Linear(D_in, H)
        self.middle_linear = torch.nn.Linear(H, H)
        self.output_linear = torch.nn.Linear(H, D_out)


    def forward(self, x):

        h_relu = self.input_linear(x).clamp(min=0)
        for _ in range(np.random.randint(0, 3)):
            h_relu = self.middle_linear(h_relu).clamp(min=0)
        y_pred = self.output_linear(h_relu)
        return y_pred
      

N, D_in, H, D_out = 64, 30, 100, 19

model = DynamicNet(D_in, H, D_out)
model = model.double()

criterion = torch.nn.CrossEntropyLoss()

learning_rate = 3e-4
optimizer = torch.optim.Adam(model.parameters(), lr=learning_rate)

for epoch in range(500):  # loop over the dataset multiple times

    running_loss = 0.0
    for i, data in enumerate(train_loader, 0):
        # get the inputs
        inputs, labels = data

        # wrap them in Variable
        inputs, labels = Variable(inputs), Variable(labels)

        # zero the parameter gradients
        optimizer.zero_grad()

        # forward + backward + optimize
        outputs = model(inputs)
        loss = criterion(outputs, labels)
        loss.backward()
        optimizer.step()

        # print statistics
        running_loss += loss.data[0]
#         if i % 2000 == 1999:    # print every 2000 mini-batches
    if (epoch + 1) % 100 == 0:
        print('Epoch : %d,  loss: %.3f' %
              (epoch + 1, running_loss / 64))
    running_loss = 0.0

print('Finished Training')

Epoch : 100,  loss: 1.821
Epoch : 200,  loss: 1.359
Epoch : 300,  loss: 1.150
Epoch : 400,  loss: 1.055
Epoch : 500,  loss: 0.938
Finished Training


In [91]:

# pass it through the model
X_test_var = Variable(X_test, volatile=True)
scores = model(X_test_var)
_, preds = torch.max(scores, dim=1)

accuracy_score(preds.data.numpy(), y_test.numpy())


0.80789473684210522

In [85]:
import pickle
import copy

saved_trainer = copy.deepcopy(model)
with open(r"bestmodel_object.pkl", "wb") as output_file:
    pickle.dump(saved_trainer, output_file)

In [86]:
torch.save(model, './model803.pth')

  "type " + obj.__name__ + ". It won't be checked "


# Bonus Task

## Prediction of Patient with signals and activity as input.

In [113]:
# selecting the first 30 columns of the pca data and the activity column, and storing it as a matrix in X
# getting the patient ID column from X_2 and storing it as a numpy array in y.

X = X_2.iloc[:, 0:30].join(X_2[31]).as_matrix()
y = X_2[30].as_matrix()

# train_test_split to create a testing set and a validation set.
X_train, X_test, y_train, y_test = train_test_split(X, y, test_size=0.25, random_state = 451)

# converting all the numpy arrays into torch Double Tensors.
X_train = torch.from_numpy(X_train)
X_test = torch.from_numpy(X_test)
y_train = torch.from_numpy(y_train)
y_test = torch.from_numpy(y_test)

# For the net I've writtent takes, only long tensors for target data, so converting the labels into Long Tensors.
y_train = y_train.long()
y_test = y_test.long()

# putting them together , preparation for training.
training_samples = TensorDataset(X_train, y_train)

# makes division using batch_size = 64, and shuffle is enabled.
train_loader = DataLoader(training_samples, batch_size=64, shuffle=True)

In [118]:
class DynamicNet(torch.nn.Module):
    def __init__(self, D_in, H, D_out):
        """
        In the constructor we construct three nn.Linear instances that we will use
        in the forward pass.
        """

        super(DynamicNet, self).__init__()
        self.input_linear = torch.nn.Linear(D_in, H)
        self.middle_linear = torch.nn.Linear(H, H)
        self.output_linear = torch.nn.Linear(H, D_out)


    def forward(self, x):
        """
        For the forward pass of the model, we randomly choose either 0, 1, 2, or 3
        and reuse the middle_linear Module that many times to compute hidden layer
        representations.

        Since each forward pass builds a dynamic computation graph, we can use normal
        Python control-flow operators like loops or conditional statements when
        defining the forward pass of the model.

        Here we also see that it is perfectly safe to reuse the same Module many
        times when defining a computational graph. This is a big improvement from Lua
        Torch, where each Module could be used only once.
        """
        h_relu = self.input_linear(x).clamp(min=0)
        for _ in range(np.random.randint(0, 3)):
            h_relu = self.middle_linear(h_relu).clamp(min=0)
        y_pred = self.output_linear(h_relu)
        return y_pred
      
# N is batch size; D_in is input dimension;
# H is hidden dimension; D_out is output dimension.
N, D_in, H, D_out = 64, 31, 100, 8

# Construct our model by instantiating the class defined above
model = DynamicNet(D_in, H, D_out)
# making our model operate at double precision.
model = model.double()

# Construct our loss function and an Optimizer. Training this strange model with
# vanilla stochastic gradient descent is tough, so we use momentum
# since it is multiclass classification problem, we are using CrossEntropyLoss, instead of MSEloss
criterion = torch.nn.CrossEntropyLoss()

learning_rate = 1e-2 # alpha
optimizer = torch.optim.RMSprop(model.parameters(), lr=learning_rate) 

for epoch in range(500): # loop over the dataset multiple times

    running_loss = 0.0
    for i, data in enumerate(train_loader, 0):
        # get the inputs
        inputs, labels = data

        # wrap them in Variable
        inputs, labels = Variable(inputs), Variable(labels)

        # zero the parameter gradients
        optimizer.zero_grad()

        # forward + backward + optimize
        outputs = model(inputs)
        loss = criterion(outputs, labels)
        loss.backward()
        optimizer.step()

        # print statistics
        running_loss += loss.data[0]

    if (epoch + 1) % 100 == 0:
        print('Epoch : %d,  loss: %.3f' %
              (epoch + 1, running_loss / 64))
    running_loss = 0.0

print('Finished Training')

Epoch : 100,  loss: 2.876
Epoch : 200,  loss: 2.663
Epoch : 300,  loss: 2.607
Epoch : 400,  loss: 2.453
Epoch : 500,  loss: 2.409
Finished Training


In [119]:

# pass it through the model
X_test_var = Variable(X_test, volatile=True)
scores = model(X_test_var)
_, preds = torch.max(scores, dim=1)

accuracy_score(preds.data.numpy(), y_test.numpy())


0.50438596491228072

# Conclusion

# Prediction of Activity with the signal using NNs

#### With Pytorch


|**Model**                    |**Accuracy**|
|-----------------------------|------------|
|With Adam Optimizer  |0.8951|
|With Adam Optimizer, Karpathy constant | 0.8078 |
|With RMSProp optimizer  |0.8877|
|With SGD Optimizer | 0.8451 |

## Bonus Task

# Prediction of Patient with the signal and activity using NNs

#### With Pytorch


|**Model**                    |**Accuracy**|
|-----------------------------|------------|
|With RMSProp optimizer  |0.5043|