In [1]:
import os 
import numpy as np
import pandas as pd

import torch
import torch.nn as nn
import torch.nn.functional as F
import torch.optim as optim
from torch.utils.data import Dataset, DataLoader, random_split
data_dir='../data/'

In [2]:
train=pd.read_csv(data_dir+'train_features.csv')
train_labels=pd.read_csv(data_dir+'train_labels.csv')
test=pd.read_csv(data_dir+'test_features.csv')
submission=pd.read_csv(data_dir+'sample_submission.csv')

In [44]:
val_start_id = int(len(train.id.unique())* (0.8))
# Split
val_df = train[train['id']>val_start_id]
val_labels = train_labels[train_labels['id']>val_start_id]
train_df = train[train['id']<=val_start_id]
train_labels = train_labels[train_labels['id']<=val_start_id]

In [51]:
train_df['id'].iloc[0]

0

In [42]:
int(len(train.id.unique())* 0.8)

2500

In [41]:
train[train['id']<600]

Unnamed: 0,id,time,acc_x,acc_y,acc_z,gy_x,gy_y,gy_z
0,0,0,1.206087,-0.179371,-0.148447,-0.591608,-30.549010,-31.676112
1,0,1,1.287696,-0.198974,-0.182444,0.303100,-39.139103,-24.927216
2,0,2,1.304609,-0.195114,-0.253382,-3.617278,-44.122565,-25.019629
3,0,3,1.293095,-0.230366,-0.215210,2.712986,-53.597843,-27.454013
4,0,4,1.300887,-0.187757,-0.222523,4.286707,-57.906561,-27.961234
...,...,...,...,...,...,...,...,...
359995,599,595,-0.760772,-0.183207,-0.270722,0.475636,-4.328264,-15.421589
359996,599,596,-0.809180,-0.192749,-0.222375,5.915133,-6.604455,-16.137791
359997,599,597,-0.790256,-0.171147,-0.207490,-14.382488,-9.310891,-13.168375
359998,599,598,-0.797502,-0.226748,-0.215349,-34.430392,-3.482236,-13.677362


In [4]:
train_labels

Unnamed: 0,id,label,label_desc
0,0,37,Shoulder Press (dumbbell)
1,1,26,Non-Exercise
2,2,3,Biceps Curl (band)
3,3,26,Non-Exercise
4,4,26,Non-Exercise
...,...,...,...
3120,3120,26,Non-Exercise
3121,3121,26,Non-Exercise
3122,3122,15,Dynamic Stretch (at your own pace)
3123,3123,26,Non-Exercise


In [5]:
train[train['id']==0].iloc[:,1:]

Unnamed: 0,time,acc_x,acc_y,acc_z,gy_x,gy_y,gy_z
0,0,1.206087,-0.179371,-0.148447,-0.591608,-30.549010,-31.676112
1,1,1.287696,-0.198974,-0.182444,0.303100,-39.139103,-24.927216
2,2,1.304609,-0.195114,-0.253382,-3.617278,-44.122565,-25.019629
3,3,1.293095,-0.230366,-0.215210,2.712986,-53.597843,-27.454013
4,4,1.300887,-0.187757,-0.222523,4.286707,-57.906561,-27.961234
...,...,...,...,...,...,...,...
595,595,0.985242,-0.326122,-0.354528,-14.903280,20.172339,22.973018
596,596,1.052837,-0.220710,-0.413472,-10.857025,19.786856,23.174597
597,597,1.025643,-0.227845,-0.354516,-2.334243,25.768654,18.932070
598,598,1.031553,-0.387862,-0.277857,-9.710746,28.697694,20.631577


In [6]:
submission

Unnamed: 0,id,0,1,2,3,4,5,6,7,8,...,51,52,53,54,55,56,57,58,59,60
0,3125,0,0,0,0,0,0,0,0,0,...,0,0,0,0,0,0,0,0,0,0
1,3126,0,0,0,0,0,0,0,0,0,...,0,0,0,0,0,0,0,0,0,0
2,3127,0,0,0,0,0,0,0,0,0,...,0,0,0,0,0,0,0,0,0,0
3,3128,0,0,0,0,0,0,0,0,0,...,0,0,0,0,0,0,0,0,0,0
4,3129,0,0,0,0,0,0,0,0,0,...,0,0,0,0,0,0,0,0,0,0
...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...
777,3902,0,0,0,0,0,0,0,0,0,...,0,0,0,0,0,0,0,0,0,0
778,3903,0,0,0,0,0,0,0,0,0,...,0,0,0,0,0,0,0,0,0,0
779,3904,0,0,0,0,0,0,0,0,0,...,0,0,0,0,0,0,0,0,0,0
780,3905,0,0,0,0,0,0,0,0,0,...,0,0,0,0,0,0,0,0,0,0


In [7]:
test['id'][0]-1

3124

In [8]:
class HealthDataset(Dataset):
    def __init__(self,feature_df,label=None,train_mode=True):
        self.X = feature_df
        self.train_mode= train_mode
        if self.train_mode:
            self.y = label
        else:
            self.first_test_id = self.X['id'][0]
        
    def __len__(self):
        return len(self.X.id.unique())
    
    def __getitem__(self,idx):
        if self.train_mode:
            return (np.array(self.X[self.X['id']==idx].iloc[:,1:]),np.array(self.y[self.y['id']==idx]['label']))
        else:
            return np.array(self.X[self.X['id']==idx+self.first_test_id].iloc[:,1:])

In [25]:
class LSTMNet(nn.Module):
    def __init__(self,device,input_size,hidden_size=30,num_layers=1,output_size=61):
        super(LSTMNet, self).__init__()
        self.device=device
        
        self.input_size = input_size
        self.hidden_size= hidden_size
        self.num_layers = num_layers
        
        self.dropout = nn.Dropout(0.6)
        self.softmax = nn.Softmax
        
        self.lstm= nn.LSTM(input_size,hidden_size,num_layers,batch_first=True)
        self.fc1 = nn.Linear(hidden_size,hidden_size)
        self.fc2 = nn.Linear(hidden_size,output_size)
        
    def init_hidden_and_cell(self, batch_size):
        h0 = torch.zeros(self.num_layers,batch_size, self.hidden_size, requires_grad=True).to(self.device)
        c0 = torch.zeros(self.num_layers,batch_size, self.hidden_size, requires_grad=True).to(self.device)
        return h0, c0
        
    def forward(self,X,batch_size):
        h0,c0 = self.init_hidden_and_cell(batch_size) 
        print('h0,c0',(h0.shape,c0.shape))
        print(X.shape)
        out,_ = self.lstm(X,(h0,c0))
        print(out)
        out = out[:,-1,:]
        out = self.fc1(out)
        out = self.dropout(out)
        out = self.fc2(out)
        return out
        
    

In [26]:
train_dataset = HealthDataset(train,train_labels)
test_dataset = HealthDataset(test,train_mode=False)

In [27]:
# Device configuration
device = torch.device('cuda' if torch.cuda.is_available() else 'cpu')
num_epochs = 10
batch_size = 120
learning_rate = 0.001

input_size = 7
sequence_length=600

In [28]:
device

device(type='cuda')

In [32]:
model = LSTMNet(device,input_size).cuda()
criterion = nn.CrossEntropyLoss()
optimizer = torch.optim.Adam(model.parameters(), lr=learning_rate) 



In [33]:
# Data loader
train_loader = torch.utils.data.DataLoader(dataset=train_dataset, 
                                           batch_size=batch_size, 
                                           drop_last=True,
                                           shuffle=True)

test_loader = torch.utils.data.DataLoader(dataset=test_dataset, 
                                          batch_size=1, 
                                          shuffle=False)

In [34]:
# Train the model
n_total_steps = len(train_loader)
for epoch in range(num_epochs):
    for i, (X, labels) in enumerate(train_loader):  
        X = X.float().reshape(batch_size,sequence_length,-1).to(device)
        print(X.shape)
        labels = labels.to(device)
        
        # Forward pass
        outputs = model(X,batch_size)
        loss = criterion(outputs, labels.view(-1))
        
        # Backward and optimize
        optimizer.zero_grad()
        loss.backward()
        optimizer.step()
        
        if (i+1) % 5 == 0:
            print (f'Epoch [{epoch+1}/{num_epochs}], Step [{i+1}/{n_total_steps}], Loss: {loss.item():.4f}')


torch.Size([120, 600, 7])
h0,c0 (torch.Size([1, 120, 30]), torch.Size([1, 120, 30]))
torch.Size([120, 600, 7])
tensor([[[ 3.6372e-09,  5.0634e-05,  1.0145e-04,  ..., -5.5884e-02,
          -6.7725e-01,  3.1826e-02],
         [ 3.1123e-09,  6.0120e-05,  1.2405e-04,  ..., -4.7010e-01,
          -6.6033e-01,  3.0317e-02],
         [ 7.6430e-09,  6.9682e-05,  1.5088e-04,  ..., -5.6981e-01,
          -6.5298e-01,  3.9489e-02],
         ...,
         [-3.3762e-01, -1.2394e-40,  7.6156e-01,  ...,  0.0000e+00,
          -1.0000e+00,  1.0978e-37],
         [-3.3762e-01, -1.8563e-40,  7.6147e-01,  ...,  0.0000e+00,
          -1.0000e+00,  4.9903e-38],
         [-3.3762e-01, -6.4287e-41,  7.6123e-01,  ...,  0.0000e+00,
          -1.0000e+00,  1.0453e-38]],

        [[ 2.1083e-05,  5.5160e-02, -2.1914e-01,  ...,  4.4683e-01,
          -6.9251e-01,  3.5790e-03],
         [ 1.8673e-05,  1.0860e-01, -4.4917e-01,  ...,  3.5548e-01,
          -7.1406e-01,  2.7414e-03],
         [ 2.6562e-05,  1.2737e-0

KeyboardInterrupt: 

In [None]:
submission.iloc[1,1:]

In [None]:
outputs.cpu().detach().numpy()[0]

In [None]:
len(submission.iloc[i,:])

In [None]:
submission.iloc[i,:]= outputs.cpu().detach().numpy()[0]

In [None]:
# In test phase, we don't need to compute gradients (for memory efficiency)
with torch.no_grad():
    for i, X in enumerate(test_loader):
        X = X.float().reshape(sequence_length,1,-1).to(device)
        outputs = model(X,1)
        submission.iloc[i,1:]= outputs.cpu().detach().numpy()[0]
        _, predicted = torch.max(outputs.data, 1)
