### Import libraraies


In [1]:
from google.colab import drive 
import pandas as pd
import numpy as np
from sklearn.model_selection import train_test_split
from sklearn.metrics import accuracy_score
import time

import torch
import torch.nn as nn
import torch.optim as optim
import torch.nn.functional as F
from torch.utils.data import DataLoader
import torchvision.transforms as transforms
from torch.utils.data import TensorDataset, DataLoader
import matplotlib.pyplot as plt


### Load data from google drive

In [2]:
## log in my google drive
drive.mount('/content/drive')

Drive already mounted at /content/drive; to attempt to forcibly remount, call drive.mount("/content/drive", force_remount=True).


In [3]:
train_path = '/content/drive/MyDrive/Project1-Group 46/train.csv'
test_path = '/content/drive/MyDrive/Project1-Group 46/test.csv'

In [4]:
train_data = pd.read_csv(train_path, header = None)

In [5]:
def DataSplit(data, r1 = 5, r2 = 10, split_size = 0.2):
  #input: loaded data,
  #output: train, validation, test set (np array)
  #explanation: remove the first column (id), feature set is the column from 1 - 960, test set is the column 961

  features = [i for i in range(1,961)]
  labels = [961]
  
  x_train = data[features]
  y_train = data[labels]

  x_train, x_test, y_train, y_test = train_test_split(x_train, y_train, test_size = split_size, random_state = r1)
  x_validation, x_test, y_validation, y_test = train_test_split(x_test, y_test, test_size = 0.5, random_state = r2)


  return x_train.values, x_validation.values, x_test.values, y_train.values, y_validation.values, y_test.values


In [6]:
x_train, x_validation, x_test, y_train, y_validation, y_test = DataSplit(train_data)

In [7]:
def To2DFeatures(array):

  new_array = []

  for row in array:
    row_2d = []
    joint_coordinate = []
    for index, item in enumerate(row):
      joint_coordinate.append(item)
      if index % 3 == 2:
        row_2d.append(joint_coordinate)
        joint_coordinate = []

    new_array.append(row_2d)

  return np.array(new_array)


In [8]:
x_train = To2DFeatures(x_train)
x_validation = To2DFeatures(x_validation)
x_test = To2DFeatures(x_test)

x_train = torch.FloatTensor(x_train)
x_validation = torch.FloatTensor(x_validation)
x_test = torch.FloatTensor(x_test)
y_train = torch.LongTensor(y_train)
y_validation = torch.LongTensor(y_validation)
y_test = torch.LongTensor(y_test)

x_train = np.swapaxes(x_train,1,2)
x_validation = np.swapaxes(x_validation,1,2)
x_test = np.swapaxes(x_test,1,2)

y_train = torch.flatten(y_train)
y_validation = torch.flatten(y_validation)
y_test = torch.flatten(y_test)


train_dataset = TensorDataset(x_train, y_train)
dev_dataset = TensorDataset(x_validation, y_validation)
train_dataloader = DataLoader(train_dataset, batch_size = 100)
dev_dataloader = DataLoader(dev_dataset, batch_size = 100)

### Feed the data directly to the CNN network

In [9]:
# The easiest CNN1d model that feed the original dataset to convolution 1d model and get result. 
# Define the model
class CNN1d_v1(nn.Module):
  def __init__(self, in_channels = 1, num_classes = 10):
    super(CNN1d_v1, self).__init__()

    #320 * 3
    self.conv1 = nn.Sequential(
        nn.Conv1d(in_channels=3, 
                  out_channels=16, 
                  kernel_size=3, 
                  stride = 1,
                  padding = 1
        ),
        nn.ReLU(),
        nn.MaxPool1d(kernel_size = 2,stride = 2)
    )

    #160 * 16
    self.conv2 = nn.Sequential(
         nn.Conv1d(in_channels=16, 
                  out_channels=32, 
                  kernel_size=3, 
                  stride = 1,
                  padding = 1
        ),
        nn.ReLU(),
        nn.MaxPool1d(kernel_size = 2,stride = 2) # 40
    )
    #80 * 32
    self.conv3 = nn.Sequential(
        nn.Conv1d(in_channels=32, 
                  out_channels=64, 
                  kernel_size=3,
                  stride = 1,
                  padding = 1
        ),
        nn.ReLU(),
        nn.MaxPool1d(kernel_size = 2,stride = 2) # 40
    )
   
    #40 * 64
    self.fc1 = nn.Linear(40 * 64, 50)    


  def forward(self, x):
    x = self.conv1(x)
    x = self.conv2(x)
    x= self.conv3(x)
    x = x.view(-1, 40 * 64)   #Flatten
    x = self.fc1(x)

    return F.log_softmax(x, dim=1)


In [10]:
def train(model, criterion, opti, train_loader, dev_loader, max_eps ):
  best_acc = 0
  st = time.time()

  train_correct = []
  dev_correct = []
  train_losses = []
  dev_losses = []
  for ep in range(max_eps):
    trn_corr = 0
    tst_corr = 0
    total_y = 0
    count = 0
    mean_loss = 0

    for b, (x_train, y_train) in enumerate(train_loader):
      b += 1
      y_pred = model(x_train)
      loss = criterion(y_pred, y_train)

      predicted = torch.max(y_pred.data, 1)[1]
      batch_corr = (predicted == y_train).sum()
      trn_corr += batch_corr

      optimizer.zero_grad()
      loss.backward()
      optimizer.step()

      if b % 30 == 0:
        print(("Iteration {} of epoch {} complete. Loss: {}; Acc =: {}, Time taken (s): {}".format(b, ep, loss.item(), batch_corr/len(y_train), (time.time()-st))))
    

    with torch.no_grad():
      for b,(x_dev, y_dev) in enumerate(dev_loader):
        y_val = model(x_dev)
        predicted = torch.max(y_val.data,1)[1]
        tst_corr += (predicted == y_dev).sum()
        total_y += len(predicted)
        count += 1

        mean_loss += criterion(y_val, y_dev)
    
    acc_dev = tst_corr / total_y
    mean_loss = mean_loss / count

    print("Epoch {} complete! Development Accuracy: {}; Development Loss: {}".format(ep, acc_dev, mean_loss))
    if acc_dev > best_acc:
        print("Best development accuracy improved from {} to {}, saving model...".format(best_acc, acc_dev))
        best_acc = acc_dev
        torch.save(model.state_dict(), 'sstcls_{}.dat'.format(ep))

  


In [11]:
model = CNN1d_v1()

In [12]:
criterion = nn.CrossEntropyLoss()
optimizer = torch.optim.Adam(model.parameters(), lr = 0.01)
max_eps = 15

In [13]:
train(model, criterion, optimizer, train_dataloader, dev_dataloader, 5)

  return torch.max_pool1d(input, kernel_size, stride, padding, dilation, ceil_mode)


Iteration 30 of epoch 0 complete. Loss: 3.5193357467651367; Acc =: 0.10000000149011612, Time taken (s): 0.7660806179046631
Iteration 60 of epoch 0 complete. Loss: 3.1081693172454834; Acc =: 0.23000000417232513, Time taken (s): 1.4943788051605225
Epoch 0 complete! Development Accuracy: 0.22257721424102783; Development Loss: 3.043186902999878
Best development accuracy improved from 0 to 0.22257721424102783, saving model...
Iteration 30 of epoch 1 complete. Loss: 2.97920298576355; Acc =: 0.2199999988079071, Time taken (s): 2.6770379543304443
Iteration 60 of epoch 1 complete. Loss: 2.7782061100006104; Acc =: 0.2800000011920929, Time taken (s): 3.4056272506713867
Epoch 1 complete! Development Accuracy: 0.23003195226192474; Development Loss: 2.923140525817871
Best development accuracy improved from 0.22257721424102783 to 0.23003195226192474, saving model...
Iteration 30 of epoch 2 complete. Loss: 2.8735873699188232; Acc =: 0.25999999046325684, Time taken (s): 4.57209038734436
Iteration 60 of

In [16]:
predict_model = CNN1d_v1()
predict_model.load_state_dict(torch.load('sstcls_7.dat'))
with torch.no_grad():
  y_eval = predict_model.forward(x_test)
  loss = criterion(y_eval, y_test)
  prediction = torch.max(y_eval.data,1)[1]

In [17]:
accuracy_score(y_test, prediction)

0.2598509052183174