In [1]:
import torch
import torch.nn as nn
import torch.nn.functional as F
import pandas as pd
import numpy as np

In [2]:
train_data = pd.read_csv("labeled_file.txt", sep="\t", header=None)
test_data = pd.read_csv("unlabeled_file.txt", sep="\t", header=None)

In [3]:
train_data = train_data.sample(frac=1).reset_index(drop=True)

In [4]:
layers_dim = [21, 50, 128]

In [5]:
class CNNNet(torch.nn.Module):
    def __init__(self):
        super(CNNNet, self).__init__()
        
        self.conv1 = torch.nn.Conv1d(1, 32, kernel_size=3, padding=1, stride=1)
        self.pool1 = torch.nn.MaxPool1d(kernel_size=2, stride=2, padding=0)
        self.drop = torch.nn.Dropout(0.5)
        self.fc1 = torch.nn.Linear(320, 384)
        self.fc2 = torch.nn.Linear(384, 128)
        
    def forward(self, x):
        x = F.relu(self.conv1(x))
        
        x = self.pool1(x)
        
        x = x.view(-1, 320)
        
        x = F.relu(self.fc1(x))
        x = self.drop(x)
        x = (self.fc2(x))
        
        return x
    
CNNnet = CNNNet()

In [6]:
class Net(torch.nn.Module):
    def __init__(self):
        super(Net, self).__init__()
        
        self.fc1 = torch.nn.Linear(layers_dim[0], layers_dim[1])
        self.relu1 = torch.nn.ReLU()
        self.drop = torch.nn.Dropout(0.5)
        self.fc2 = torch.nn.Linear(layers_dim[1], layers_dim[2])
        
    def forward(self, x):
        x = self.fc1(x)
        x = self.relu1(x)
        x = self.drop(x)
        x = self.fc2(x)
        
        return x
    
net = Net()

In [7]:
#USE THIS ONLY FOR LOADING THE MODEL

netLOADER = torch.nn.Sequential(
    torch.nn.Linear(layers_dim[0], layers_dim[1]),
    torch.nn.ReLU(),
    torch.nn.Dropout(0.5),
    torch.nn.Linear(layers_dim[1], layers_dim[2]),
)

In [8]:
#CNNNetLoader = torch.nn.Sequential(
#    torch.nn.Conv1d(1, 8, kernel_size=3, padding=1, stride=1)
#    torch.nn.MaxPool1d(kernel_size=2, stride=2, padding=0)
    
#)

In [9]:
train_x = train_data.iloc[:, 2:-1]
test_x = test_data.iloc[:, 2:-1]
train_y = train_data.iloc[:, -1]

In [10]:
print(train_x.shape, test_x.shape, train_y.shape)

(4636, 21) (19, 21) (4636,)


In [11]:
print(net.parameters)

<bound method Module.parameters of Net(
  (fc1): Linear(in_features=21, out_features=50, bias=True)
  (relu1): ReLU()
  (drop): Dropout(p=0.5)
  (fc2): Linear(in_features=50, out_features=128, bias=True)
)>


In [12]:
print(CNNnet.parameters)

<bound method Module.parameters of CNNNet(
  (conv1): Conv1d(1, 32, kernel_size=(3,), stride=(1,), padding=(1,))
  (pool1): MaxPool1d(kernel_size=2, stride=2, padding=0, dilation=1, ceil_mode=False)
  (drop): Dropout(p=0.5)
  (fc1): Linear(in_features=320, out_features=384, bias=True)
  (fc2): Linear(in_features=384, out_features=128, bias=True)
)>


In [13]:
def one_hot(a, num_classes):
  return np.squeeze(np.eye(num_classes)[a.reshape(-1)])

In [14]:
train_x = np.array(train_x.values, dtype=np.float32)
train_y = np.array(train_y.values, dtype=np.long)
test_x = np.array(test_x.values, dtype=np.float32)
train_y = np.reshape(train_y, (4636, ))

#train_y = one_hot(train_y, 128)

train_x = np.reshape(train_x, (train_x.shape[0], 1, train_x.shape[1]))
test_x = np.reshape(test_x, (test_x.shape[0], 1, test_x.shape[1]))
#train_y = np.reshape(train_y, (train_y.shape[0], 1, train_y.shape[1]))

train_x = torch.from_numpy(train_x)
test_x = torch.from_numpy(test_x)
train_y = torch.from_numpy(train_y)

In [15]:
split_ratio = 0.9   #train_size/(train_size+val_size)
assert(split_ratio >= 0.0 and split_ratio <= 1.0)
index = (int)(split_ratio * train_x.shape[0])

In [16]:
train_val_x = train_x[index:, :, :]
train_val_y = train_y[index:]
train_x = train_x[:index, :, :]
train_y = train_y[:index]
print(train_x.shape, train_y.shape, train_val_x.shape, train_val_y.shape)

torch.Size([4172, 1, 21]) torch.Size([4172]) torch.Size([464, 1, 21]) torch.Size([464])


In [17]:
train_y = torch.tensor(train_y, dtype=torch.long)
train_val_y = torch.tensor(train_val_y, dtype=torch.long)

  """Entry point for launching an IPython kernel.
  


In [18]:
print(train_x.shape, test_x.shape, train_y.shape)

torch.Size([4172, 1, 21]) torch.Size([19, 1, 21]) torch.Size([4172])


In [None]:
criterion = torch.nn.CrossEntropyLoss()
optimizer = torch.optim.Adam(CNNnet.parameters(), lr=0.005)
num_epochs = 1000
batch_size = 32


In [None]:
for epoch in range(num_epochs):  # number of epochs
    outputs = CNNnet(train_x)
    outputs = torch.squeeze(outputs)
    loss = criterion(outputs, train_y)
    
    #backward
    optimizer.zero_grad()
    loss.backward()
    optimizer.step()
    
    print ('Epoch [{}/{}], Loss: {:.4f}'.format(epoch+1, num_epochs, loss.item()))

Epoch [1/1000], Loss: 87.1369
Epoch [2/1000], Loss: 148.1938
Epoch [3/1000], Loss: 160.4964
Epoch [4/1000], Loss: 117.6678
Epoch [5/1000], Loss: 74.8964
Epoch [6/1000], Loss: 35.5717
Epoch [7/1000], Loss: 11.7364
Epoch [8/1000], Loss: 5.5548
Epoch [9/1000], Loss: 5.0692
Epoch [10/1000], Loss: 4.9110
Epoch [11/1000], Loss: 4.8422
Epoch [12/1000], Loss: 4.8007
Epoch [13/1000], Loss: 4.7817
Epoch [14/1000], Loss: 4.7613
Epoch [15/1000], Loss: 4.7545
Epoch [16/1000], Loss: 4.7474
Epoch [17/1000], Loss: 4.7433
Epoch [18/1000], Loss: 4.7330
Epoch [19/1000], Loss: 4.7039
Epoch [20/1000], Loss: 4.6909
Epoch [21/1000], Loss: 4.6639
Epoch [22/1000], Loss: 4.6612
Epoch [23/1000], Loss: 4.6489
Epoch [24/1000], Loss: 4.6508
Epoch [25/1000], Loss: 4.6501
Epoch [26/1000], Loss: 4.6011
Epoch [27/1000], Loss: 4.6008
Epoch [28/1000], Loss: 4.5964
Epoch [29/1000], Loss: 4.5979
Epoch [30/1000], Loss: 4.5799
Epoch [31/1000], Loss: 4.5738
Epoch [32/1000], Loss: 4.5530
Epoch [33/1000], Loss: 4.5395
Epoch [34

Epoch [269/1000], Loss: 2.5180
Epoch [270/1000], Loss: 2.6260
Epoch [271/1000], Loss: 2.5539
Epoch [272/1000], Loss: 2.5933
Epoch [273/1000], Loss: 2.4956
Epoch [274/1000], Loss: 2.5040
Epoch [275/1000], Loss: 2.5190
Epoch [276/1000], Loss: 2.5386
Epoch [277/1000], Loss: 2.5090
Epoch [278/1000], Loss: 2.5189
Epoch [279/1000], Loss: 2.4862
Epoch [280/1000], Loss: 2.4839
Epoch [281/1000], Loss: 2.5245
Epoch [282/1000], Loss: 2.4634
Epoch [283/1000], Loss: 2.4850
Epoch [284/1000], Loss: 2.4936
Epoch [285/1000], Loss: 2.4756
Epoch [286/1000], Loss: 2.4727
Epoch [287/1000], Loss: 2.4763
Epoch [288/1000], Loss: 2.4312
Epoch [289/1000], Loss: 2.4460
Epoch [290/1000], Loss: 2.4639
Epoch [291/1000], Loss: 2.4378
Epoch [292/1000], Loss: 2.4179
Epoch [293/1000], Loss: 2.4428
Epoch [294/1000], Loss: 2.4415
Epoch [295/1000], Loss: 2.4646
Epoch [296/1000], Loss: 2.4284
Epoch [297/1000], Loss: 2.4219
Epoch [298/1000], Loss: 2.4450
Epoch [299/1000], Loss: 2.4240
Epoch [300/1000], Loss: 2.4296
Epoch [3

Epoch [534/1000], Loss: 2.4673
Epoch [535/1000], Loss: 2.4524
Epoch [536/1000], Loss: 2.4774
Epoch [537/1000], Loss: 2.4599
Epoch [538/1000], Loss: 2.4622
Epoch [539/1000], Loss: 2.4671
Epoch [540/1000], Loss: 2.4618
Epoch [541/1000], Loss: 2.4640
Epoch [542/1000], Loss: 2.4564
Epoch [543/1000], Loss: 2.4643
Epoch [544/1000], Loss: 2.4478
Epoch [545/1000], Loss: 2.4564
Epoch [546/1000], Loss: 2.4452
Epoch [547/1000], Loss: 2.4391
Epoch [548/1000], Loss: 2.4459
Epoch [549/1000], Loss: 2.4626
Epoch [550/1000], Loss: 2.4470
Epoch [551/1000], Loss: 2.4331
Epoch [552/1000], Loss: 2.4588
Epoch [553/1000], Loss: 2.4520
Epoch [554/1000], Loss: 2.4456
Epoch [555/1000], Loss: 2.4363
Epoch [556/1000], Loss: 2.4411
Epoch [557/1000], Loss: 2.4243
Epoch [558/1000], Loss: 2.4440
Epoch [559/1000], Loss: 2.4427
Epoch [560/1000], Loss: 2.4416
Epoch [561/1000], Loss: 2.4113
Epoch [562/1000], Loss: 2.4395
Epoch [563/1000], Loss: 2.4412
Epoch [564/1000], Loss: 2.4578
Epoch [565/1000], Loss: 2.4548
Epoch [5

Epoch [799/1000], Loss: 2.3068
Epoch [800/1000], Loss: 2.2826
Epoch [801/1000], Loss: 2.2793
Epoch [802/1000], Loss: 2.2633
Epoch [803/1000], Loss: 2.2742
Epoch [804/1000], Loss: 2.2655
Epoch [805/1000], Loss: 2.2682
Epoch [806/1000], Loss: 2.2633
Epoch [807/1000], Loss: 2.2599
Epoch [808/1000], Loss: 2.2607
Epoch [809/1000], Loss: 2.2636
Epoch [810/1000], Loss: 2.2380
Epoch [811/1000], Loss: 2.2442
Epoch [812/1000], Loss: 2.2572
Epoch [813/1000], Loss: 2.2436
Epoch [814/1000], Loss: 2.2552
Epoch [815/1000], Loss: 2.2340
Epoch [816/1000], Loss: 2.2504
Epoch [817/1000], Loss: 2.2331
Epoch [818/1000], Loss: 2.2374
Epoch [819/1000], Loss: 2.2332
Epoch [820/1000], Loss: 2.2386
Epoch [821/1000], Loss: 2.2272
Epoch [822/1000], Loss: 2.2225
Epoch [823/1000], Loss: 2.2189
Epoch [824/1000], Loss: 2.2145
Epoch [825/1000], Loss: 2.2232
Epoch [826/1000], Loss: 2.2243
Epoch [827/1000], Loss: 2.2325
Epoch [828/1000], Loss: 2.2160
Epoch [829/1000], Loss: 2.2249
Epoch [830/1000], Loss: 2.2187
Epoch [8

In [None]:
#num_batches = train_x.shape[0]//batch_size
#for epoch in range(num_epochs):  # number of epochs
#    for batch_no in range(num_batches):
#        outputs = net(train_x[batch_no*batch_size:(batch_size*(batch_no+1))])
#        loss = criterion(outputs, train_y[batch_no*batch_size:(batch_size*(batch_no+1))])
#    
#        #backward
#        optimizer.zero_grad()
#        loss.backward()
#        optimizer.step()
#
#    print ('Epoch [{}/{}], Loss: {:.4f}'.format(epoch+1, num_epochs, loss.item()))

In [None]:
#outputs = net(train_x)

In [None]:
#_, predicted = torch.max(outputs, 1)

In [None]:
#print(predicted)

In [None]:
#correct = (predicted == train_y).sum().item()

In [None]:
#print("Accuracy = {}%".format(100 *(float(correct/train_data.shape[0]))))

In [None]:
def predict_accuracy(network, val_x, val_y):
    outputs = network(val_x)
    _, predicted = torch.max(outputs, 1)
    correct = (predicted == val_y).sum().item()
    print("Accuracy = {}%".format(100 *(float(correct/val_x.shape[0]))))

In [None]:
predict_accuracy(CNNnet, train_x, train_y)
predict_accuracy(CNNnet, train_val_x, train_val_y)

In [None]:
def generate_labels(test_x):
    outputs = net(test_x)
    _, predictions = torch.max(outputs, 1)
    return predictions

In [None]:
!ls

In [None]:
torch.save(net, "trained_model_CNN_10000_epochs_2.pt")

In [None]:
CNNnet = torch.load("trained_model_CNN_10000_epochs_2.pt")

In [None]:
print(CNNnet)

In [None]:
test_labels = generate_labels(test_x)
print(test_labels)

In [None]:
test_data[[23]] = test_labels
test_data

In [None]:
test_data.to_csv("Labelled_prev_unlabelBCLL_CNN_M1.txt", sep=" ", header=False, index=False)