# Overview

This program uses a standard fully-connected neural network with ReLU activations to predict whether a person is sitting based on their accelerometer data.

In [1]:
import pandas as pd
import numpy as np
import torch
import torchvision
import torch.nn as nn
import torch.nn.functional as F
import torch.optim as optim

from torchvision.transforms import transforms
from torch.utils.data import DataLoader
from torch.utils.data import Dataset

## Loading the data into a Pandas dataframe

In [2]:
#Change the data file directory below appropriately
data = pd.read_csv('../raw_data/0A986513-7828-4D53-AA1F-E02D6DF9561B.features_labels.csv')
data.head()

Unnamed: 0,timestamp,raw_acc:magnitude_stats:mean,raw_acc:magnitude_stats:std,raw_acc:magnitude_stats:moment3,raw_acc:magnitude_stats:moment4,raw_acc:magnitude_stats:percentile25,raw_acc:magnitude_stats:percentile50,raw_acc:magnitude_stats:percentile75,raw_acc:magnitude_stats:value_entropy,raw_acc:magnitude_stats:time_entropy,...,label:STAIRS_-_GOING_DOWN,label:ELEVATOR,label:OR_standing,label:AT_SCHOOL,label:PHONE_IN_HAND,label:PHONE_IN_BAG,label:PHONE_ON_TABLE,label:WITH_CO-WORKERS,label:WITH_FRIENDS,label_source
0,1449601597,1.000371,0.007671,-0.016173,0.02786,0.998221,1.000739,1.003265,0.891038,6.684582,...,,,,,,,,,,-1
1,1449601657,1.000243,0.003782,-0.002713,0.007046,0.998463,1.000373,1.002088,1.647929,6.684605,...,,,,,,,,,,-1
2,1449601717,1.000811,0.002082,-0.001922,0.003575,0.999653,1.000928,1.002032,1.960286,6.68461,...,,,,,,,,,,-1
3,1449601777,1.001245,0.004715,-0.002895,0.008881,0.999188,1.001425,1.0035,1.614524,6.684601,...,,,,,,,,,,-1
4,1449601855,1.001354,0.065186,-0.09652,0.165298,1.000807,1.002259,1.003631,0.83779,6.682252,...,0.0,,0.0,1.0,,,,,0.0,2


## Interpolating acceleration columns with average values

In [3]:
def interpolation(df):
    col_to_avg = list(df.columns) #Start with keeping all the columns as columns to use an average interpolation on
    for k in range(len(list(df.columns))):
        if list(df.columns)[k].startswith(('discrete', 'label')): #Remove label and discrete columns from col_to_avg
            col_to_avg.remove(list(df.columns)[k])
    
    df_with_avg = df[col_to_avg].fillna(df[col_to_avg].mean()) #Interpolate nan columns for all continuous-valued columns with average
    
    col_to_zero = list(df.columns)
    for k in range(len(list(df.columns))):
        if not list(df.columns)[k].startswith(('discrete', 'label')): #Remove all columns except label and discrete
            col_to_zero.remove(list(df.columns)[k])
    
    df_with_zero = df[col_to_zero].fillna(0) #Interpolate nan values for label and discrete columns with 0
    
    return pd.concat([df_with_avg, df_with_zero], axis = 1)

In [4]:
X = data.iloc[:,1:27]
y = data[['label:SITTING']]

X = interpolation(X).values
y = interpolation(y).values

X, y

(array([[ 1.000371,  0.007671, -0.016173, ..., -0.329743,  0.382219,
         -0.121107],
        [ 1.000243,  0.003782, -0.002713, ...,  0.20286 ,  0.335481,
          0.10547 ],
        [ 1.000811,  0.002082, -0.001922, ...,  0.111225,  0.48802 ,
          0.154312],
        ...,
        [ 1.002523,  0.028048,  0.027043, ...,  0.524328,  0.286613,
          0.012429],
        [ 1.00259 ,  0.005246, -0.001691, ..., -0.081698,  0.466467,
          0.545858],
        [ 1.002413,  0.003424,  0.004579, ...,  0.175483, -0.0208  ,
         -0.128086]]),
 array([[0.],
        [0.],
        [0.],
        ...,
        [1.],
        [1.],
        [1.]]))

## Splitting the data and loading it into a PyTorch dataloader

In [5]:
from sklearn.model_selection import train_test_split
X_train, X_test, y_train, y_test = train_test_split(X, y, test_size = 0.2, random_state = 1)

In [6]:
train_features = torch.tensor(X_train)
train_labels = torch.tensor(y_train)
test_features = torch.tensor(X_test)
test_labels = torch.tensor(y_test)

In [7]:
train_data = torch.utils.data.TensorDataset(train_features, train_labels)
test_data = torch.utils.data.TensorDataset(test_features, test_labels)

train_loader = torch.utils.data.DataLoader(train_data, batch_size = 50, shuffle = True)
test_loader = torch.utils.data.DataLoader(test_data, batch_size = 50, shuffle = True)

## Network

In [8]:
class Classifier(nn.Module):
    def __init__(self, input_layer, output_layer):
        super(Classifier, self).__init__()
        self.fc1 = nn.Linear(input_layer, 200)
        self.fc2 = nn.Linear(200, 100)
        self.fc3 = nn.Linear(100, 50)
        self.output = nn.Linear(50, output_layer)
    def forward(self, x):
        x = F.relu(self.fc1(x))
        x = F.relu(self.fc2(x))
        x = F.relu(self.fc3(x))
        x = torch.sigmoid(self.output(x))
        return x

In [9]:
input_layer = train_features.shape[1]
output_layer = 1

model = Classifier(input_layer, output_layer)

In [10]:
criterion = nn.BCELoss()
optimizer = optim.Adam(model.parameters(), lr = 0.001)

## Training loop

In [11]:
epochs = 200
for epoch in range(epochs):
    total_loss = 0
    for batch in train_loader:
        features, labels = batch
        y_preds = model(features.float())
        loss = criterion(y_preds, labels.float())
        
        optimizer.zero_grad()
        loss.backward()
        optimizer.step()
        
        total_loss += loss.item()
    print(f'Epoch: {epoch + 1} | Total Batch Loss: {total_loss}')

Epoch: 1 | Total Batch Loss: 42.02975445985794
Epoch: 2 | Total Batch Loss: 35.77012872695923
Epoch: 3 | Total Batch Loss: 33.23016256093979
Epoch: 4 | Total Batch Loss: 32.07504230737686
Epoch: 5 | Total Batch Loss: 31.372679203748703
Epoch: 6 | Total Batch Loss: 29.845425486564636
Epoch: 7 | Total Batch Loss: 29.079235583543777
Epoch: 8 | Total Batch Loss: 28.515376150608063
Epoch: 9 | Total Batch Loss: 28.098766565322876
Epoch: 10 | Total Batch Loss: 26.746616810560226
Epoch: 11 | Total Batch Loss: 26.832252383232117
Epoch: 12 | Total Batch Loss: 27.156780689954758
Epoch: 13 | Total Batch Loss: 26.984860360622406
Epoch: 14 | Total Batch Loss: 25.61850230395794
Epoch: 15 | Total Batch Loss: 25.622392877936363
Epoch: 16 | Total Batch Loss: 24.556797668337822
Epoch: 17 | Total Batch Loss: 24.272471517324448
Epoch: 18 | Total Batch Loss: 24.860372230410576
Epoch: 19 | Total Batch Loss: 23.90154267847538
Epoch: 20 | Total Batch Loss: 23.894635915756226
Epoch: 21 | Total Batch Loss: 23.75

## Evaluating the model

In [12]:
total_wrong = 0
positive_preds = 0 #tp + fp
true_positives = 0
false_negatives = 0

with torch.no_grad():
    for X_test_data, y_test in test_loader:
        y_test_preds = model(X_test_data.float())
        y_test_preds = torch.round(y_test_preds)

        for k in range(len(y_test_preds)):
            if y_test_preds[k].item() == 1:
                positive_preds += 1
            if y_test_preds[k].item() == y_test[k].item() == 1:
                true_positives += 1
            if y_test_preds[k].item() == 0 and y_test[k].item() == 1:
                false_negatives += 1

        current_wrong = (abs(y_test_preds - y_test)).sum().item()
        total_wrong += current_wrong

    class_acc = (len(X_test) - total_wrong) / len(X_test) * 100
    precision = true_positives / positive_preds
    recall = true_positives / (true_positives + false_negatives)

    print(f'Classification Accuracy: {class_acc:.2f}')
    print(f'Precision: {precision:.2f}') #What percentage of a model's positive predictions were actually positive
    print(f'Recall: {recall:.2f}') #What percent of the true positives were identified
    print(f'F-1 Score: {2*(precision * recall / (precision + recall)):.2f}')

Classification Accuracy: 84.72
Precision: 0.87
Recall: 0.87
F-1 Score: 0.87
