In [1]:
import torch
import torch.nn as nn
import torch.optim as optim
import torchvision
import numpy as np
import pandas as pd
import torch.functional as F
from sklearn.preprocessing import StandardScaler
from sklearn.compose import ColumnTransformer
from sklearn.preprocessing import OneHotEncoder
from sklearn.model_selection import train_test_split
from sklearn import metrics
from focal_loss.focal_loss import FocalLoss

In [2]:
data = pd.read_csv('Aggregated User Data.csv')
data.head()

Unnamed: 0,UUID,timestamp,raw_acc:magnitude_stats:mean,raw_acc:magnitude_stats:std,raw_acc:magnitude_stats:moment3,raw_acc:magnitude_stats:moment4,raw_acc:magnitude_stats:percentile25,raw_acc:magnitude_stats:percentile50,raw_acc:magnitude_stats:percentile75,raw_acc:magnitude_stats:value_entropy,...,label:STAIRS_-_GOING_DOWN,label:ELEVATOR,label:OR_standing,label:AT_SCHOOL,label:PHONE_IN_HAND,label:PHONE_IN_BAG,label:PHONE_ON_TABLE,label:WITH_CO-WORKERS,label:WITH_FRIENDS,label_source
0,00EABED2-271D-49D8-B599-1D4A09240601,1444079161,0.996815,0.003529,-0.002786,0.006496,0.995203,0.996825,0.998502,1.748756,...,,,0.0,,,,1.0,1.0,,2
1,00EABED2-271D-49D8-B599-1D4A09240601,1444079221,0.996864,0.004172,-0.00311,0.00705,0.994957,0.996981,0.998766,1.935573,...,,,0.0,,,,1.0,1.0,,2
2,00EABED2-271D-49D8-B599-1D4A09240601,1444079281,0.996825,0.003667,0.003094,0.006076,0.994797,0.996614,0.998704,2.03178,...,,,0.0,,,,1.0,1.0,,2
3,00EABED2-271D-49D8-B599-1D4A09240601,1444079341,0.996874,0.003541,0.000626,0.006059,0.99505,0.996907,0.99869,1.865318,...,,,0.0,,,,1.0,1.0,,2
4,00EABED2-271D-49D8-B599-1D4A09240601,1444079431,0.997371,0.037653,0.043389,0.102332,0.995548,0.99686,0.998205,0.460806,...,,,0.0,,,,1.0,1.0,,2


In [3]:
data = data[(data['UUID'] == '0BFC35E2-4817-4865-BFA7-764742302A2D') | (data['UUID'] == '0A986513-7828-4D53-AA1F-E02D6DF9561B') | (data['UUID'] == '00EABED2-271D-49D8-B599-1D4A09240601')] 
data.drop(columns = ['timestamp'], inplace = True)

# Creating a new dataframe with only sitting, walking, and sleeping data

In [4]:
only_walking = data[(data['label:FIX_walking'] == 1) & (data['label:SITTING'] == 0) & (data['label:SLEEPING'] == 0)]
only_walking = only_walking.iloc[:,1:27]
only_walking['label'] = "WALKING"

only_sitting = data[(data['label:FIX_walking'] == 0) & (data['label:SITTING'] == 1) & (data['label:SLEEPING'] == 0)]
only_sitting = only_sitting.iloc[:,1:27]
only_sitting['label'] = "SITTING"

only_sleeping = data[(data['label:FIX_walking'] == 0) & (data['label:SITTING'] == 0) & (data['label:SLEEPING'] == 1)]
only_sleeping = only_sleeping.iloc[:,1:27]
only_sleeping['label'] = "SLEEPING"

df = pd.concat([only_walking, only_sitting, only_sleeping], axis = 0)
df.head()

Unnamed: 0,raw_acc:magnitude_stats:mean,raw_acc:magnitude_stats:std,raw_acc:magnitude_stats:moment3,raw_acc:magnitude_stats:moment4,raw_acc:magnitude_stats:percentile25,raw_acc:magnitude_stats:percentile50,raw_acc:magnitude_stats:percentile75,raw_acc:magnitude_stats:value_entropy,raw_acc:magnitude_stats:time_entropy,raw_acc:magnitude_spectrum:log_energy_band0,...,raw_acc:3d:mean_x,raw_acc:3d:mean_y,raw_acc:3d:mean_z,raw_acc:3d:std_x,raw_acc:3d:std_y,raw_acc:3d:std_z,raw_acc:3d:ro_xy,raw_acc:3d:ro_xz,raw_acc:3d:ro_yz,label
155,1.003052,0.139768,0.115192,0.273656,0.99192,0.996238,0.999979,1.328158,6.674634,5.027699,...,-0.051719,-0.168622,-0.866269,0.250667,0.343037,0.252212,-0.03083,0.143926,-0.530033,WALKING
156,1.005091,0.232927,0.190056,0.292741,0.826441,0.94863,1.176819,2.745002,6.658351,5.013784,...,0.25574,-0.433965,-0.801786,0.276395,0.190099,0.235251,0.105712,0.01635,0.027738,WALKING
157,0.988339,0.025706,-0.012189,0.044177,0.97999,0.987836,0.996713,1.78355,6.684273,5.043187,...,-0.0289,-0.594037,-0.772274,0.136729,0.065971,0.065611,-0.603787,0.630796,-0.799983,WALKING
158,1.005461,0.105208,0.128558,0.199233,0.992822,0.99477,0.997684,1.459081,6.679381,5.039396,...,0.152049,-0.164488,-0.791628,0.272913,0.297443,0.426869,-0.372997,0.652161,-0.524372,WALKING
159,1.001464,0.121184,0.129053,0.209801,0.942805,0.99616,1.052143,2.060366,6.677517,5.029112,...,0.149694,-0.014688,-0.913222,0.29931,0.237561,0.122455,0.601753,-0.072091,-0.003273,WALKING


# Interpolating acceleration columns with average values

In [5]:
def interpolation(df):
    col_to_avg = list(df.columns) #Start with keeping all the columns as columns to use an average interpolation on
    for k in range(len(list(df.columns))):
        if list(df.columns)[k].startswith(('discrete', 'label')): #Remove label and discrete columns from col_to_avg
            col_to_avg.remove(list(df.columns)[k])
    
    df_with_avg = df[col_to_avg].fillna(df[col_to_avg].mean()) #Interpolate nan columns for all continuous-valued columns with average
    
    col_to_zero = list(df.columns)
    for k in range(len(list(df.columns))):
        if not list(df.columns)[k].startswith(('discrete', 'label')): #Remove all columns except label and discrete
            col_to_zero.remove(list(df.columns)[k])
    
    df_with_zero = df[col_to_zero].fillna(0) #Interpolate nan values for label and discrete columns with 0
    
    return pd.concat([df_with_avg, df_with_zero], axis = 1)

In [6]:
df.iloc[:,:-1] = interpolation(df.iloc[:,:-1]) #Interpolate acceleration columns
df = df.values
df

array([[1.003052, 0.139768, 0.115192, ..., 0.143926, -0.530033,
        'WALKING'],
       [1.005091, 0.232927, 0.190056, ..., 0.01635, 0.027738, 'WALKING'],
       [0.988339, 0.025706, -0.012189, ..., 0.630796, -0.799983,
        'WALKING'],
       ...,
       [1.000489, 0.001597, 0.000608, ..., -0.131635, 0.020957,
        'SLEEPING'],
       [1.000168, 0.003089, -0.0016879999999999998, ...,
        -0.08845900000000001, -0.090014, 'SLEEPING'],
       [1.019187, 0.143322, 0.24915, ..., 0.19007, -0.548567, 'SLEEPING']],
      dtype=object)

# One-Hot Encoding the labels

In [7]:
ct = ColumnTransformer(transformers = [('encoder', OneHotEncoder(), [-1])], remainder = 'passthrough')
df = np.array(ct.fit_transform(df))
df

array([[0.0, 0.0, 1.0, ..., -0.03083, 0.143926, -0.530033],
       [0.0, 0.0, 1.0, ..., 0.105712, 0.01635, 0.027738],
       [0.0, 0.0, 1.0, ..., -0.603787, 0.630796, -0.799983],
       ...,
       [0.0, 1.0, 0.0, ..., -0.036237, -0.131635, 0.020957],
       [0.0, 1.0, 0.0, ..., -0.153961, -0.08845900000000001, -0.090014],
       [0.0, 1.0, 0.0, ..., -0.33720100000000003, 0.19007, -0.548567]],
      dtype=object)

In [8]:
X = df[:,3:]
y = df[:,:3]

# Train/Test Split

In [9]:
X_train, X_test, y_train, y_test = train_test_split(X, y, test_size = 0.2)
scaler = StandardScaler()
X_train = scaler.fit_transform(X_train)
X_test = scaler.transform(X_test)
X_train

array([[-0.15318826,  0.17868897, -0.57553311, ..., -0.328621  ,
         3.56939516,  0.15235929],
       [-0.41974312, -0.32761746, -0.30091875, ..., -0.14314379,
         0.21891196,  0.06965899],
       [-0.16352668, -0.32911315, -0.30147572, ..., -0.19254031,
        -0.91191421,  0.48393729],
       ...,
       [-0.05276937, -0.33121933, -0.30076558, ..., -0.06681618,
         0.1276062 ,  0.46506161],
       [ 0.04300125, -0.31668977, -0.27504746, ...,  2.27662378,
        -0.15462445,  1.23336846],
       [-0.13050786, -0.32961681, -0.28820589, ..., -0.17347456,
        -0.03012983,  0.22394108]])

In [10]:
y_train = y_train.astype(int)
y_test = y_test.astype(int)

# Defining the model

In [11]:
def classifier_block(input_dim, output_dim):
    return nn.Sequential(
        nn.Linear(input_dim, output_dim),
        nn.Dropout(0.1),
        nn.LeakyReLU(0.05)
    )

class Classifier(nn.Module):
    def __init__(self, feature_dim = 26):
        super(Classifier, self).__init__()
        self.network = nn.Sequential(
            classifier_block(feature_dim, 20),
            classifier_block(20, 15),
            classifier_block(15, 10),
            classifier_block(10, 5),
            nn.Linear(5, 3)
        )
    def forward(self, x):
        softmax = nn.Softmax(dim = 1)
        return softmax(self.network(x))

# Hyperparameters

In [12]:
model = Classifier()
lr = 0.001
n_epochs = 500
batch_size = 50

### 2000 epochs 75 batch size 0.001 learning rate
criterion = FocalLoss(alpha = 0.2, gamma = 5)
#criterion = nn.CrossEntropyLoss()
optimizer = optim.Adam(model.parameters(), lr = lr)

train_features = torch.tensor(X_train)
train_labels = torch.tensor(y_train)
test_features = torch.tensor(X_test)
test_labels = torch.tensor(y_test)

train_data = torch.utils.data.TensorDataset(train_features, train_labels)
test_data = torch.utils.data.TensorDataset(test_features, test_labels)

train_loader = torch.utils.data.DataLoader(train_data, batch_size = batch_size, shuffle = True)
test_loader = torch.utils.data.DataLoader(test_data, batch_size = len(test_labels), shuffle = True)

# Training Loop

In [13]:
for epoch in range(n_epochs):
    total_loss = 0
    for batch in train_loader:
        features, labels = batch
        #_, encoded_labels = torch.max(labels, dim = 1) 
        
        optimizer.zero_grad()
        preds = model(features.float())
        
        loss = criterion(preds, labels) 
        loss.backward()
        torch.nn.utils.clip_grad_norm_(model.parameters(), max_norm = 1.0)
        optimizer.step()
        total_loss += loss.item()
        
    print(f'Epoch {epoch + 1}, Loss: {total_loss}, Final Batch Loss: {loss.item()}')
#     softmax = nn.Softmax(dim = 1)
#     _, preds = torch.max(softmax(model(features.float())), dim = 1)
#     _, answers = torch.max(labels, dim = 1)
#     print(metrics.confusion_matrix(answers, preds))

Epoch 1, Loss: 0.7624682877212763, Final Batch Loss: 0.007937115617096424
Epoch 2, Loss: 0.5324705438688397, Final Batch Loss: 0.006677409168332815
Epoch 3, Loss: 0.4756819186732173, Final Batch Loss: 0.003642037510871887
Epoch 4, Loss: 0.45176530512981117, Final Batch Loss: 0.004411949310451746
Epoch 5, Loss: 0.42898940620943904, Final Batch Loss: 0.004724546335637569
Epoch 6, Loss: 0.4168614714872092, Final Batch Loss: 0.004989458713680506
Epoch 7, Loss: 0.4097594802733511, Final Batch Loss: 0.005821514874696732
Epoch 8, Loss: 0.39625218766741455, Final Batch Loss: 0.004292347468435764
Epoch 9, Loss: 0.39227309986017644, Final Batch Loss: 0.003743818961083889
Epoch 10, Loss: 0.38318439829163253, Final Batch Loss: 0.003234102390706539
Epoch 11, Loss: 0.3797982183750719, Final Batch Loss: 0.008116241544485092
Epoch 12, Loss: 0.37111475644633174, Final Batch Loss: 0.0030514553654938936
Epoch 13, Loss: 0.36384261841885746, Final Batch Loss: 0.006530009675770998
Epoch 14, Loss: 0.35234665

Epoch 110, Loss: 0.2605456280289218, Final Batch Loss: 0.0017752861604094505
Epoch 111, Loss: 0.2602618501987308, Final Batch Loss: 0.0028246373403817415
Epoch 112, Loss: 0.2482633392792195, Final Batch Loss: 0.003582874545827508
Epoch 113, Loss: 0.2556194810895249, Final Batch Loss: 0.007642595563083887
Epoch 114, Loss: 0.25454450747929513, Final Batch Loss: 0.0023572903592139482
Epoch 115, Loss: 0.24982838914729655, Final Batch Loss: 0.002959892386570573
Epoch 116, Loss: 0.24532923137303442, Final Batch Loss: 0.0015633179573342204
Epoch 117, Loss: 0.2505088694160804, Final Batch Loss: 0.0020705433562397957
Epoch 118, Loss: 0.2602511664154008, Final Batch Loss: 0.008992914110422134
Epoch 119, Loss: 0.25697991671040654, Final Batch Loss: 0.002706927014514804
Epoch 120, Loss: 0.2491494407877326, Final Batch Loss: 0.0036182578187435865
Epoch 121, Loss: 0.24952153849881142, Final Batch Loss: 0.002094357507303357
Epoch 122, Loss: 0.24937488976866007, Final Batch Loss: 0.004922178573906422


Epoch 216, Loss: 0.22143799415789545, Final Batch Loss: 0.002902839332818985
Epoch 217, Loss: 0.23193302127765492, Final Batch Loss: 0.002312529133632779
Epoch 218, Loss: 0.21259832801297307, Final Batch Loss: 0.0023833431769162416
Epoch 219, Loss: 0.2348760743625462, Final Batch Loss: 0.0009951414540410042
Epoch 220, Loss: 0.22511456755455583, Final Batch Loss: 0.0019230649340897799
Epoch 221, Loss: 0.21682122815400362, Final Batch Loss: 0.002590067218989134
Epoch 222, Loss: 0.21718913794029504, Final Batch Loss: 0.00374411279335618
Epoch 223, Loss: 0.21865685458760709, Final Batch Loss: 0.003663575742393732
Epoch 224, Loss: 0.21434394363313913, Final Batch Loss: 0.00455178739503026
Epoch 225, Loss: 0.20873909606598318, Final Batch Loss: 0.002308481140062213
Epoch 226, Loss: 0.2077296235365793, Final Batch Loss: 0.002978629432618618
Epoch 227, Loss: 0.21335419651586562, Final Batch Loss: 0.003520447760820389
Epoch 228, Loss: 0.20551384787540883, Final Batch Loss: 0.001485534361563623


Epoch 323, Loss: 0.20671535737346858, Final Batch Loss: 0.002301756991073489
Epoch 324, Loss: 0.19528748176526278, Final Batch Loss: 0.0018511466914787889
Epoch 325, Loss: 0.2003816991345957, Final Batch Loss: 0.0016221503028646111
Epoch 326, Loss: 0.20049984788056463, Final Batch Loss: 0.003564555896446109
Epoch 327, Loss: 0.19712448865175247, Final Batch Loss: 0.0037062629126012325
Epoch 328, Loss: 0.2092071472434327, Final Batch Loss: 0.0023169792257249355
Epoch 329, Loss: 0.20033253147266805, Final Batch Loss: 0.0031040948815643787
Epoch 330, Loss: 0.20039840310346335, Final Batch Loss: 0.0019205068238079548
Epoch 331, Loss: 0.20291962276678532, Final Batch Loss: 0.0020440544467419386
Epoch 332, Loss: 0.19985499104950577, Final Batch Loss: 0.0026715027634054422
Epoch 333, Loss: 0.2054916974157095, Final Batch Loss: 0.0044033280573785305
Epoch 334, Loss: 0.19887482712510973, Final Batch Loss: 0.0006875409744679928
Epoch 335, Loss: 0.22134141117567196, Final Batch Loss: 0.00095297145

Epoch 430, Loss: 0.1964554728474468, Final Batch Loss: 0.0034637723583728075
Epoch 431, Loss: 0.2021623527398333, Final Batch Loss: 0.00089608458802104
Epoch 432, Loss: 0.20581460266839713, Final Batch Loss: 0.003685278119519353
Epoch 433, Loss: 0.21167204604716972, Final Batch Loss: 0.000683001650031656
Epoch 434, Loss: 0.21043368126265705, Final Batch Loss: 0.003914377186447382
Epoch 435, Loss: 0.2267371986526996, Final Batch Loss: 0.002888280898332596
Epoch 436, Loss: 0.20962845045141876, Final Batch Loss: 0.0024080348666757345
Epoch 437, Loss: 0.2122871621977538, Final Batch Loss: 0.0022521463688462973
Epoch 438, Loss: 0.2197145795216784, Final Batch Loss: 0.0029941832181066275
Epoch 439, Loss: 0.19185004371684045, Final Batch Loss: 0.0030278675258159637
Epoch 440, Loss: 0.20397809229325503, Final Batch Loss: 0.002391932299360633
Epoch 441, Loss: 0.19436105445493013, Final Batch Loss: 0.001369359204545617
Epoch 442, Loss: 0.20485488523263484, Final Batch Loss: 0.002406779443845153


# Evaluation on test data

In [14]:
softmax = nn.Softmax(dim = 1)
for batch in test_loader:
    features, labels = batch
    _, preds = torch.max(softmax(model(features.float())), dim = 1)
    _, answers = torch.max(labels, dim = 1)
    print(metrics.confusion_matrix(answers, preds))
    print(metrics.classification_report(answers, preds, digits = 3))

[[619  36  13]
 [ 22 310   0]
 [ 32   3  32]]
              precision    recall  f1-score   support

           0      0.920     0.927     0.923       668
           1      0.888     0.934     0.910       332
           2      0.711     0.478     0.571        67

    accuracy                          0.901      1067
   macro avg      0.840     0.779     0.802      1067
weighted avg      0.897     0.901     0.897      1067

