In [17]:
import torch
import torch.nn as nn
import torch.optim as optim
import torchvision
import numpy as np
import pandas as pd
from sklearn.preprocessing import StandardScaler

In [2]:
data = pd.read_csv('Aggregated User Data.csv')
data.head()

Unnamed: 0,UUID,timestamp,raw_acc:magnitude_stats:mean,raw_acc:magnitude_stats:std,raw_acc:magnitude_stats:moment3,raw_acc:magnitude_stats:moment4,raw_acc:magnitude_stats:percentile25,raw_acc:magnitude_stats:percentile50,raw_acc:magnitude_stats:percentile75,raw_acc:magnitude_stats:value_entropy,...,label:STAIRS_-_GOING_DOWN,label:ELEVATOR,label:OR_standing,label:AT_SCHOOL,label:PHONE_IN_HAND,label:PHONE_IN_BAG,label:PHONE_ON_TABLE,label:WITH_CO-WORKERS,label:WITH_FRIENDS,label_source
0,00EABED2-271D-49D8-B599-1D4A09240601,1444079161,0.996815,0.003529,-0.002786,0.006496,0.995203,0.996825,0.998502,1.748756,...,,,0.0,,,,1.0,1.0,,2
1,00EABED2-271D-49D8-B599-1D4A09240601,1444079221,0.996864,0.004172,-0.00311,0.00705,0.994957,0.996981,0.998766,1.935573,...,,,0.0,,,,1.0,1.0,,2
2,00EABED2-271D-49D8-B599-1D4A09240601,1444079281,0.996825,0.003667,0.003094,0.006076,0.994797,0.996614,0.998704,2.03178,...,,,0.0,,,,1.0,1.0,,2
3,00EABED2-271D-49D8-B599-1D4A09240601,1444079341,0.996874,0.003541,0.000626,0.006059,0.99505,0.996907,0.99869,1.865318,...,,,0.0,,,,1.0,1.0,,2
4,00EABED2-271D-49D8-B599-1D4A09240601,1444079431,0.997371,0.037653,0.043389,0.102332,0.995548,0.99686,0.998205,0.460806,...,,,0.0,,,,1.0,1.0,,2


In [35]:
device = 'cuda' if torch.cuda.is_available() else 'cpu'

In [36]:
def interpolation(df):
    col_to_avg = list(df.columns) #Start with keeping all the columns as columns to use an average interpolation on
    for k in range(len(list(df.columns))):
        if list(df.columns)[k].startswith(('discrete', 'label')): #Remove label and discrete columns from col_to_avg
            col_to_avg.remove(list(df.columns)[k])
    
    df_with_avg = df[col_to_avg].fillna(df[col_to_avg].mean()) #Interpolate nan columns for all continuous-valued columns with average
    
    col_to_zero = list(df.columns)
    for k in range(len(list(df.columns))):
        if not list(df.columns)[k].startswith(('discrete', 'label')): #Remove all columns except label and discrete
            col_to_zero.remove(list(df.columns)[k])
    
    df_with_zero = df[col_to_zero].fillna(0) #Interpolate nan values for label and discrete columns with 0
    
    return pd.concat([df_with_avg, df_with_zero], axis = 1)

In [37]:
X = interpolation(data.iloc[:,2:28])
y = interpolation(data.iloc[:,227:-1])

X = X.values
y = y.values

In [38]:
sc = StandardScaler()
X = sc.fit_transform(X)
X

array([[-0.06795078, -0.36746172, -0.35842925, ...,  0.39622642,
         1.84726953,  0.95347118],
       [-0.06733515, -0.36076884, -0.36129259, ..., -0.257227  ,
         1.27502995,  0.22785565],
       [-0.06782514, -0.3660253 , -0.30646495, ...,  0.45060521,
         1.66037285,  0.8438587 ],
       ...,
       [ 0.30153292,  1.62669325,  1.92186691, ..., -0.74879204,
         1.25038783,  0.4289404 ],
       [ 1.52388973,  2.44210021,  2.43071056, ..., -0.8930017 ,
        -0.76180252,  2.05206506],
       [ 0.23118678,  1.00799237,  0.58609267, ..., -0.43935668,
        -0.02730479,  0.09026333]])

In [39]:
def classifier_block(input_dim, output_dim):
    return nn.Sequential(
        nn.Linear(input_dim, output_dim),
        nn.Dropout(0.1),
        nn.LeakyReLU(0.05)
    )

class Classifier(nn.Module):
    def __init__(self, feature_dim = 26):
        super(Classifier, self).__init__()
        self.network = nn.Sequential(
            classifier_block(feature_dim, 50),
            classifier_block(50, 100),
            classifier_block(100, 75),
            nn.Linear(75, 51),
            nn.Sigmoid()
            
        )
    def forward(self, x):
        return self.network(x)

In [40]:
criterion = nn.BCELoss()
n_epochs = 200
batch_size = 200
feature_dim = y.shape[1]
lr = 0.01

train_features = torch.tensor(X).to(device)
train_labels = torch.tensor(y).to(device)
train_data = torch.utils.data.TensorDataset(train_features, train_labels)
train_loader = torch.utils.data.DataLoader(train_data, batch_size = batch_size, shuffle = True)

model = Classifier().to(device)
optimizer = optim.Adam(model.parameters(), lr = lr)

In [42]:
for epoch in range(n_epochs):
    total_loss = 0
    for batch in train_loader:
        features, label = batch
        
        optimizer.zero_grad()
        preds = model(features.float())
        loss = criterion(preds, label.float())
        loss.backward()
        optimizer.step()
        total_loss += loss.item()
    
    print(f'Epoch {epoch}, Loss: {total_loss}, Final Batch Loss: {loss.item()}')

Epoch 0, Loss: 276.8947856426239, Final Batch Loss: 0.1366567462682724
Epoch 1, Loss: 266.0412206053734, Final Batch Loss: 0.12896481156349182
Epoch 2, Loss: 263.09387032687664, Final Batch Loss: 0.12644906342029572
Epoch 3, Loss: 261.3870267048478, Final Batch Loss: 0.14045093953609467
Epoch 4, Loss: 259.83589163422585, Final Batch Loss: 0.13299328088760376
Epoch 5, Loss: 259.2597752660513, Final Batch Loss: 0.1463182419538498
Epoch 6, Loss: 258.27568669617176, Final Batch Loss: 0.1462501734495163
Epoch 7, Loss: 257.7430736646056, Final Batch Loss: 0.13134463131427765
Epoch 8, Loss: 257.2638116851449, Final Batch Loss: 0.15012580156326294
Epoch 9, Loss: 257.02130801975727, Final Batch Loss: 0.13630637526512146
Epoch 10, Loss: 256.5592309087515, Final Batch Loss: 0.14262062311172485
Epoch 11, Loss: 256.3784095197916, Final Batch Loss: 0.13690762221813202
Epoch 12, Loss: 256.17999620735645, Final Batch Loss: 0.1315571367740631
Epoch 13, Loss: 256.19172935932875, Final Batch Loss: 0.1306

Epoch 112, Loss: 252.78821858763695, Final Batch Loss: 0.13389229774475098
Epoch 113, Loss: 252.9370829537511, Final Batch Loss: 0.13194067776203156
Epoch 114, Loss: 252.7101497799158, Final Batch Loss: 0.13738591969013214
Epoch 115, Loss: 252.80763110518456, Final Batch Loss: 0.1245899498462677
Epoch 116, Loss: 252.80158676952124, Final Batch Loss: 0.14057523012161255
Epoch 117, Loss: 252.72383458912373, Final Batch Loss: 0.1480824500322342
Epoch 118, Loss: 252.87635570764542, Final Batch Loss: 0.13836610317230225
Epoch 119, Loss: 252.68414817005396, Final Batch Loss: 0.14318394660949707
Epoch 120, Loss: 252.50865723192692, Final Batch Loss: 0.1266876757144928
Epoch 121, Loss: 252.64927408844233, Final Batch Loss: 0.13406777381896973
Epoch 122, Loss: 252.74670501053333, Final Batch Loss: 0.11842267960309982
Epoch 123, Loss: 252.5834176018834, Final Batch Loss: 0.13811764121055603
Epoch 124, Loss: 252.5010293647647, Final Batch Loss: 0.1282484382390976
Epoch 125, Loss: 252.669623345136

In [46]:
features, label = next(iter(train_loader))

In [48]:
preds = model(features.float())

In [64]:
torch.round(preds[0])

tensor([1., 0., 0., 0., 0., 0., 0., 0., 0., 0., 1., 0., 0., 0., 0., 0., 1., 0.,
        0., 0., 0., 0., 0., 0., 0., 0., 0., 0., 0., 0., 0., 0., 0., 0., 0., 0.,
        0., 0., 0., 0., 0., 0., 0., 0., 0., 0., 0., 0., 0., 0., 0.],
       device='cuda:0', grad_fn=<RoundBackward>)

In [63]:
label[0]

tensor([1., 0., 0., 0., 0., 1., 0., 0., 0., 0., 1., 0., 0., 0., 0., 0., 1., 0.,
        0., 0., 0., 0., 0., 0., 0., 0., 0., 0., 0., 0., 0., 0., 0., 0., 0., 0.,
        0., 0., 0., 0., 0., 0., 0., 0., 0., 0., 0., 0., 0., 0., 0.],
       device='cuda:0', dtype=torch.float64)