In [81]:
import pandas as pd
import numpy as np
import torch
import torchvision
import torch.nn as nn
import torch.nn.functional as F
import torch.optim as optim

from torchvision.transforms import transforms
from torch.utils.data import DataLoader
from torch.utils.data import Dataset

In [82]:
###Change the data file directory below appropriately
data = pd.read_csv('data/0A986513-7828-4D53-AA1F-E02D6DF9561B.features_labels.csv')
data.head()

Unnamed: 0,timestamp,raw_acc:magnitude_stats:mean,raw_acc:magnitude_stats:std,raw_acc:magnitude_stats:moment3,raw_acc:magnitude_stats:moment4,raw_acc:magnitude_stats:percentile25,raw_acc:magnitude_stats:percentile50,raw_acc:magnitude_stats:percentile75,raw_acc:magnitude_stats:value_entropy,raw_acc:magnitude_stats:time_entropy,...,label:STAIRS_-_GOING_DOWN,label:ELEVATOR,label:OR_standing,label:AT_SCHOOL,label:PHONE_IN_HAND,label:PHONE_IN_BAG,label:PHONE_ON_TABLE,label:WITH_CO-WORKERS,label:WITH_FRIENDS,label_source
0,1449601597,1.000371,0.007671,-0.016173,0.02786,0.998221,1.000739,1.003265,0.891038,6.684582,...,,,,,,,,,,-1
1,1449601657,1.000243,0.003782,-0.002713,0.007046,0.998463,1.000373,1.002088,1.647929,6.684605,...,,,,,,,,,,-1
2,1449601717,1.000811,0.002082,-0.001922,0.003575,0.999653,1.000928,1.002032,1.960286,6.68461,...,,,,,,,,,,-1
3,1449601777,1.001245,0.004715,-0.002895,0.008881,0.999188,1.001425,1.0035,1.614524,6.684601,...,,,,,,,,,,-1
4,1449601855,1.001354,0.065186,-0.09652,0.165298,1.000807,1.002259,1.003631,0.83779,6.682252,...,0.0,,0.0,1.0,,,,,0.0,2


# Interpolating acceleration columns with average values

In [83]:
def interpolation(df):
    col_to_avg = list(df.columns) #Start with keeping all the columns as columns to use an average interpolation on
    for k in range(len(list(df.columns))):
        if list(df.columns)[k].startswith(('discrete', 'label')): #Remove label and discrete columns from col_to_avg
            col_to_avg.remove(list(df.columns)[k])
    
    df_with_avg = df[col_to_avg].fillna(df[col_to_avg].mean()) #Interpolate nan columns for all continuous-valued columns with average
    
    col_to_zero = list(df.columns)
    for k in range(len(list(df.columns))):
        if not list(df.columns)[k].startswith(('discrete', 'label')): #Remove all columns except label and discrete
            col_to_zero.remove(list(df.columns)[k])
    
    df_with_zero = df[col_to_zero].fillna(0) #Interpolate nan values for label and discrete columns with 0
    
    return pd.concat([df_with_avg, df_with_zero], axis = 1)

# Splitting the data and loading it into a PyTorch dataloader

In [84]:
X = data.iloc[:,1:27]
y = data[['label:SITTING']]

X = interpolation(X).values
y = interpolation(y).values

X, y

(array([[ 1.000371,  0.007671, -0.016173, ..., -0.329743,  0.382219,
         -0.121107],
        [ 1.000243,  0.003782, -0.002713, ...,  0.20286 ,  0.335481,
          0.10547 ],
        [ 1.000811,  0.002082, -0.001922, ...,  0.111225,  0.48802 ,
          0.154312],
        ...,
        [ 1.002523,  0.028048,  0.027043, ...,  0.524328,  0.286613,
          0.012429],
        [ 1.00259 ,  0.005246, -0.001691, ..., -0.081698,  0.466467,
          0.545858],
        [ 1.002413,  0.003424,  0.004579, ...,  0.175483, -0.0208  ,
         -0.128086]]),
 array([[0.],
        [0.],
        [0.],
        ...,
        [1.],
        [1.],
        [1.]]))

In [85]:
from sklearn.preprocessing import StandardScaler
sc = StandardScaler()
X = sc.fit_transform(X)
X

array([[-2.03792305e-01, -2.77241041e-01, -5.23171386e-01, ...,
        -1.33596908e+00,  1.64427842e+00, -2.13828571e-01],
       [-2.13218961e-01, -3.31655898e-01, -3.60036412e-01, ...,
         6.15081160e-01,  1.46906384e+00,  5.40337546e-01],
       [-1.71388173e-01, -3.55442285e-01, -3.50449506e-01, ...,
         2.79400571e-01,  2.04091238e+00,  7.02909129e-01],
       ...,
       [-4.53066441e-02,  7.87378517e-03,  6.05806340e-04, ...,
         1.79269417e+00,  1.28586417e+00,  2.30648700e-01],
       [-4.03723787e-02, -3.11171622e-01, -3.47649790e-01, ...,
        -4.27321821e-01,  1.96011303e+00,  2.00617789e+00],
       [-5.34076770e-02, -3.36665032e-01, -2.71657495e-01, ...,
         5.14792761e-01,  1.33413481e-01, -2.37058313e-01]])

In [86]:
train_features = torch.tensor(X)
train_labels = torch.tensor(y)

train_data = torch.utils.data.TensorDataset(train_features, train_labels)
train_loader = torch.utils.data.DataLoader(train_data, batch_size = 50, shuffle = True)

In [100]:
def generator_block(input_dim, output_dim):
    return nn.Sequential(
        nn.Linear(input_dim, output_dim),
        nn.BatchNorm1d(output_dim),
        nn.ReLU(inplace = True)
    )
def get_noise(n_samples, z_dim):
    return torch.randn(n_samples, z_dim)

class Generator(nn.Module):
    def __init__(self, z_dim = 10, feature_dim = 26, hidden_dim = 128):
        super(Generator, self).__init__()
        self.gen = nn.Sequential(
            generator_block(z_dim, hidden_dim * 2),
            generator_block(hidden_dim * 2, hidden_dim),
            generator_block(hidden_dim, int(hidden_dim * 0.5)),
            generator_block(int(hidden_dim * 0.5), feature_dim),
            nn.Tanh()
        )
    def forward(self, noise):
        return self.gen(noise)

def discriminator_block(input_dim, output_dim):
    return nn.Sequential(
        nn.Linear(input_dim, output_dim),
        nn.LeakyReLU(0.2),
        nn.Dropout()
    )

class Discriminator(nn.Module):
    def __init__(self, feature_dim = 26, hidden_dim = 128):
        super(Discriminator, self).__init__()
        self.disc = nn.Sequential(
            discriminator_block(feature_dim, hidden_dim),
            discriminator_block(hidden_dim, int(hidden_dim * 0.5)),
            discriminator_block(int(hidden_dim * 0.5), hidden_dim),
            nn.Linear(hidden_dim, 1)
        )
    def forward(self, feature_vector):
        return self.disc(feature_vector)

def get_disc_loss(gen, disc, criterion, real_features, batch_size, z_dim):
    latent_vectors = get_noise(batch_size, z_dim)
    fake_features = gen(latent_vectors)
    pred_fake = disc(fake_features.detach())
    
    ground_truth = torch.zeros_like(pred_fake)
    loss_fake = criterion(pred_fake, ground_truth)
    
    pred_real = disc(real_features)
    ground_truth = torch.ones_like(pred_real)
    loss_real = criterion(pred_real, ground_truth)
    
    disc_loss = (loss_fake + loss_real) / 2
    return disc_loss
def get_gen_loss(gen, disc, criterion, batch_size, z_dim):
    latent_vectors = get_noise(batch_size, z_dim)
    fake_features = gen(latent_vectors)
    pred = disc(fake_features)
    gen_loss = criterion(pred, torch.ones_like(pred))
    return gen_loss

In [101]:
criterion = nn.BCEWithLogitsLoss()
n_epochs = 100
z_dim = 10
lr = 0.000001

disc = Discriminator()
gen = Generator(z_dim)

opt_disc = optim.Adam(disc.parameters(), lr = lr)
opt_gen = optim.Adam(gen.parameters(), lr = lr)

# Training Loop

In [102]:
for epoch in range(n_epochs):
    for batch_idx, (real_features, _) in enumerate(train_loader):
        batch_size = len(real_features)
        
        ### Training Discriminator
        opt_disc.zero_grad()
        disc_loss = get_disc_loss(gen, disc, criterion, real_features.float(), batch_size, z_dim)
        disc_loss.backward(retain_graph = True)
        opt_disc.step()
        
        ### Training Generator
        opt_gen.zero_grad()
        gen_loss = get_gen_loss(gen, disc, criterion, batch_size, z_dim)
        gen_loss.backward()
        opt_gen.step()
        
        if batch_idx == 0:
            print(
                f'Epoch [{epoch + 1} / {n_epochs}] Loss D: {disc_loss.item():.4f}, Loss G: {gen_loss.item():.4f} '
            )

Epoch [1 / 100] Loss D: 0.6871, Loss G: 0.7045 
Epoch [2 / 100] Loss D: 0.6933, Loss G: 0.7107 
Epoch [3 / 100] Loss D: 0.6889, Loss G: 0.7192 
Epoch [4 / 100] Loss D: 0.6808, Loss G: 0.7029 
Epoch [5 / 100] Loss D: 0.6785, Loss G: 0.7121 
Epoch [6 / 100] Loss D: 0.6843, Loss G: 0.7053 
Epoch [7 / 100] Loss D: 0.6880, Loss G: 0.7108 
Epoch [8 / 100] Loss D: 0.6939, Loss G: 0.7065 
Epoch [9 / 100] Loss D: 0.7012, Loss G: 0.7043 
Epoch [10 / 100] Loss D: 0.6895, Loss G: 0.7133 
Epoch [11 / 100] Loss D: 0.6748, Loss G: 0.7101 
Epoch [12 / 100] Loss D: 0.6818, Loss G: 0.7070 
Epoch [13 / 100] Loss D: 0.6878, Loss G: 0.7102 
Epoch [14 / 100] Loss D: 0.6879, Loss G: 0.7004 
Epoch [15 / 100] Loss D: 0.6885, Loss G: 0.7074 
Epoch [16 / 100] Loss D: 0.6885, Loss G: 0.7060 
Epoch [17 / 100] Loss D: 0.6824, Loss G: 0.7099 
Epoch [18 / 100] Loss D: 0.6839, Loss G: 0.7026 
Epoch [19 / 100] Loss D: 0.6958, Loss G: 0.6978 
Epoch [20 / 100] Loss D: 0.6794, Loss G: 0.7041 
Epoch [21 / 100] Loss D: 0.68