In [1]:
import pandas as pd
import torch.nn as nn
import torch.optim as optim
import torch
from sklearn import metrics
from sklearn.model_selection import train_test_split
import numpy as np
import seaborn as sns
import csv

In [2]:
name_dataframe = pd.read_csv('../data/features.txt', delimiter = '\n', header = None)
names = name_dataframe.values.tolist()
names = [k for row in names for k in row] #List of column names

data = pd.read_csv('../data/X_train.txt', delim_whitespace = True, header = None) #Read in train dataframe
data.columns = names #Setting column names

X_train = data.loc[:,'1 tBodyAcc-mean()-X':'40 tBodyAcc-correlation()-Y,Z'] #Selecting only acceleration columns

y_train_activity = pd.read_csv('../data/y_train.txt', header = None)
y_train_activity.columns = ['Activity']

y_train_subject = pd.read_csv('../data/subject_train.txt', header = None)
y_train_subject.columns = ['Subject']

GAN_data = pd.concat([X_train, y_train_activity, y_train_subject], axis = 1)
GAN_data.head()

Unnamed: 0,1 tBodyAcc-mean()-X,2 tBodyAcc-mean()-Y,3 tBodyAcc-mean()-Z,4 tBodyAcc-std()-X,5 tBodyAcc-std()-Y,6 tBodyAcc-std()-Z,7 tBodyAcc-mad()-X,8 tBodyAcc-mad()-Y,9 tBodyAcc-mad()-Z,10 tBodyAcc-max()-X,...,"33 tBodyAcc-arCoeff()-Y,4","34 tBodyAcc-arCoeff()-Z,1","35 tBodyAcc-arCoeff()-Z,2","36 tBodyAcc-arCoeff()-Z,3","37 tBodyAcc-arCoeff()-Z,4","38 tBodyAcc-correlation()-X,Y","39 tBodyAcc-correlation()-X,Z","40 tBodyAcc-correlation()-Y,Z",Activity,Subject
0,0.288585,-0.020294,-0.132905,-0.995279,-0.983111,-0.913526,-0.995112,-0.983185,-0.923527,-0.934724,...,-0.095246,0.278851,-0.465085,0.491936,-0.190884,0.376314,0.435129,0.66079,5,1
1,0.278419,-0.016411,-0.12352,-0.998245,-0.9753,-0.960322,-0.998807,-0.974914,-0.957686,-0.943068,...,-0.281211,0.085988,-0.022153,-0.016657,-0.220643,-0.013429,-0.072692,0.579382,5,1
2,0.279653,-0.019467,-0.113462,-0.99538,-0.967187,-0.978944,-0.99652,-0.963668,-0.977469,-0.938692,...,-0.332564,0.239281,-0.136204,0.173863,-0.299493,-0.124698,-0.181105,0.6089,5,1
3,0.279174,-0.026201,-0.123283,-0.996091,-0.983403,-0.990675,-0.997099,-0.98275,-0.989302,-0.938692,...,-0.170813,0.294938,-0.306081,0.482148,-0.470129,-0.305693,-0.362654,0.507459,5,1
4,0.276629,-0.01657,-0.115362,-0.998139,-0.980817,-0.990482,-0.998321,-0.979672,-0.990441,-0.942469,...,-0.315375,0.439744,-0.269069,0.179414,-0.088952,-0.155804,-0.189763,0.599213,5,1


In [3]:
GAN_data = GAN_data.groupby('Subject').apply(pd.DataFrame.sample, frac=0.03).reset_index(drop=True)
GAN_data

Unnamed: 0,1 tBodyAcc-mean()-X,2 tBodyAcc-mean()-Y,3 tBodyAcc-mean()-Z,4 tBodyAcc-std()-X,5 tBodyAcc-std()-Y,6 tBodyAcc-std()-Z,7 tBodyAcc-mad()-X,8 tBodyAcc-mad()-Y,9 tBodyAcc-mad()-Z,10 tBodyAcc-max()-X,...,"33 tBodyAcc-arCoeff()-Y,4","34 tBodyAcc-arCoeff()-Z,1","35 tBodyAcc-arCoeff()-Z,2","36 tBodyAcc-arCoeff()-Z,3","37 tBodyAcc-arCoeff()-Z,4","38 tBodyAcc-correlation()-X,Y","39 tBodyAcc-correlation()-X,Z","40 tBodyAcc-correlation()-Y,Z",Activity,Subject
0,0.325886,0.000817,0.016658,-0.982403,-0.946708,-0.736155,-0.983673,-0.945060,-0.727475,-0.916762,...,-0.240399,-0.000734,-0.241857,0.085815,0.295091,0.449622,0.474105,0.934028,6,1
1,0.277510,-0.016334,-0.109895,-0.997329,-0.994007,-0.997461,-0.997473,-0.992929,-0.997822,-0.940043,...,-0.087491,0.470310,-0.093276,-0.000678,0.148952,-0.255320,-0.064980,-0.035453,4,1
2,0.273027,-0.010197,-0.111153,-0.993125,-0.990001,-0.995408,-0.993589,-0.989888,-0.994165,-0.936510,...,-0.139590,0.561777,-0.243836,0.318708,-0.114301,0.016113,-0.051272,-0.162163,6,1
3,0.275676,-0.021264,-0.110801,-0.997862,-0.990091,-0.994593,-0.998333,-0.989473,-0.994485,-0.944567,...,0.045064,0.490784,-0.204408,0.227647,-0.196758,-0.097759,0.084406,0.107562,5,1
4,0.303689,-0.004243,-0.150850,-0.956503,-0.838672,-0.943010,-0.962006,-0.844821,-0.937195,-0.916064,...,-0.115909,-0.046818,-0.036225,-0.032126,0.011889,-0.910582,-0.706014,0.373016,4,1
...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...
213,0.279998,-0.008851,-0.103882,-0.996803,-0.976461,-0.981593,-0.997044,-0.976664,-0.980501,-0.941751,...,0.015619,0.211139,-0.052256,-0.103982,0.008793,0.122814,-0.125760,0.476450,5,30
214,0.270066,-0.050618,-0.092672,-0.989959,-0.854669,-0.969782,-0.991565,-0.841260,-0.969198,-0.931457,...,-0.141419,0.073816,-0.172261,0.255886,-0.030827,0.497752,-0.443817,-0.474042,4,30
215,0.272359,-0.001138,-0.111587,-0.988056,-0.831742,-0.932375,-0.990062,-0.853840,-0.939390,-0.924010,...,0.542931,-0.046517,-0.033900,0.234856,-0.231697,-0.412537,0.053695,-0.046689,5,30
216,0.325338,-0.026044,-0.122840,-0.967250,-0.928780,-0.957976,-0.969361,-0.937497,-0.954042,-0.905316,...,-0.052980,0.105778,-0.062396,-0.000555,-0.163072,-0.660981,-0.198059,-0.551580,4,30


In [4]:
def get_train_test_split(data, current_user_tested):
    """
    data: DataFrame
    current_user_tested: int
    
    Returns: numpy arrays of X_train, y_train, X_test, y_test
    """
    data = data.copy(deep = True)
    
    #Getting only acceleration columns
    X_train = data.loc[:, "1 tBodyAcc-mean()-X": "40 tBodyAcc-correlation()-Y,Z"].values 
    #Selecting all activity labels
    y_train = data.loc[:, "Activity"].values
    
    #X_test is the acceleration data for the current user being tested on
    X_test = data[data['Subject'] == current_user_tested].loc[:, "1 tBodyAcc-mean()-X": "40 tBodyAcc-correlation()-Y,Z"].values 
    #y_test is the activity label for the current user being tested on
    y_test = data[data['Subject'] == current_user_tested].loc[:, "Activity"].values 
    
    #Zero indexing all activity labels since they start at 1
    y_train -= 1 
    y_test -=1

    return X_train, y_train, X_test, y_test

In [5]:
def classifier_block(input_dim, output_dim):
    return nn.Sequential(
        nn.Linear(input_dim, output_dim),
        nn.Dropout(0.1),
        nn.LeakyReLU(0.05)
    )

class Classifier(nn.Module):
    def __init__(self, feature_dim = 40):
        super(Classifier, self).__init__()
        self.network = nn.Sequential(
            classifier_block(feature_dim, 25),
            classifier_block(25, 20),
            classifier_block(20, 15),
            classifier_block(15, 10),
            nn.Linear(10, 6)
        )
    def forward(self, x):
        return self.network(x)

In [6]:
def initialize_params(X_train, y_train, X_test, y_test):
    lr = 0.001
    n_epochs = 1000
    batch_size = 250
    
    model = Classifier()
    
    criterion = nn.CrossEntropyLoss()
    optimizer = optim.Adam(model.parameters(), lr = lr)

    train_features = torch.tensor(X_train)
    train_labels = torch.tensor(y_train)
    test_features = torch.tensor(X_test)
    test_labels = torch.tensor(y_test)
    
    train_data = torch.utils.data.TensorDataset(train_features, train_labels)
    test_data = torch.utils.data.TensorDataset(test_features, test_labels)

    train_loader = torch.utils.data.DataLoader(train_data, batch_size = batch_size, shuffle = True)
    test_loader = torch.utils.data.DataLoader(test_data, batch_size = len(test_labels), shuffle = True)
    
    return model, train_loader, test_loader, optimizer, criterion

def training_loop(model, train_loader, test_loader, optimizer, criterion, n_epochs = 1000):
    for epoch in range(n_epochs):
        total_loss = 0
        for batch in train_loader:
            features, labels = batch

            optimizer.zero_grad()
            preds = model(features.float())

            loss = criterion(preds, labels) 
            loss.backward()

            optimizer.step()
            total_loss += loss.item()

        print(f'Epoch {epoch + 1}, Loss: {total_loss}, Final Batch Loss: {loss.item()}')
    
    return model, test_loader

In [7]:
def evaluation(model, test_loader):
    softmax = nn.Softmax(dim = 1)
    for batch in test_loader: #Runs once since the batch is the entire testing data
        features, labels = batch
        _, preds = torch.max(softmax(model(features.float())), dim = 1) #Getting the model's predictions
        report = metrics.classification_report(labels, preds, digits = 3, output_dict = True, zero_division = 0)
        f1_score = pd.DataFrame(report).transpose().loc['weighted avg', :]['f1-score']
    return f1_score

In [8]:
subject_numbers = list(GAN_data['Subject'].unique()) #list of all unique subject numbers
n_subjects = len(subject_numbers) #Number of unique subjects
n_iters = 100

X_train, y_train, X_test, y_test = get_train_test_split(GAN_data, subject_numbers[0]) #Subject_numbers[0] is a placeholder

model, train_loader, test_loader, optimizer, criterion = initialize_params(X_train, y_train, X_test, y_test)
#Train model on all user data ONCE
model, _ = training_loop(model, train_loader, test_loader, optimizer, criterion) 

Epoch 1, Loss: 1.7878259420394897, Final Batch Loss: 1.7878259420394897
Epoch 2, Loss: 1.786778211593628, Final Batch Loss: 1.786778211593628
Epoch 3, Loss: 1.7858480215072632, Final Batch Loss: 1.7858480215072632
Epoch 4, Loss: 1.7847460508346558, Final Batch Loss: 1.7847460508346558
Epoch 5, Loss: 1.7807590961456299, Final Batch Loss: 1.7807590961456299
Epoch 6, Loss: 1.7825628519058228, Final Batch Loss: 1.7825628519058228
Epoch 7, Loss: 1.782076120376587, Final Batch Loss: 1.782076120376587
Epoch 8, Loss: 1.7821141481399536, Final Batch Loss: 1.7821141481399536
Epoch 9, Loss: 1.780045986175537, Final Batch Loss: 1.780045986175537
Epoch 10, Loss: 1.7767857313156128, Final Batch Loss: 1.7767857313156128
Epoch 11, Loss: 1.7735542058944702, Final Batch Loss: 1.7735542058944702
Epoch 12, Loss: 1.7753028869628906, Final Batch Loss: 1.7753028869628906
Epoch 13, Loss: 1.7754052877426147, Final Batch Loss: 1.7754052877426147
Epoch 14, Loss: 1.7732810974121094, Final Batch Loss: 1.7732810974

Epoch 130, Loss: 1.130394458770752, Final Batch Loss: 1.130394458770752
Epoch 131, Loss: 1.1336355209350586, Final Batch Loss: 1.1336355209350586
Epoch 132, Loss: 1.1307388544082642, Final Batch Loss: 1.1307388544082642
Epoch 133, Loss: 1.0637767314910889, Final Batch Loss: 1.0637767314910889
Epoch 134, Loss: 1.0808111429214478, Final Batch Loss: 1.0808111429214478
Epoch 135, Loss: 1.0978724956512451, Final Batch Loss: 1.0978724956512451
Epoch 136, Loss: 1.0825631618499756, Final Batch Loss: 1.0825631618499756
Epoch 137, Loss: 1.1002819538116455, Final Batch Loss: 1.1002819538116455
Epoch 138, Loss: 1.0967830419540405, Final Batch Loss: 1.0967830419540405
Epoch 139, Loss: 1.080553650856018, Final Batch Loss: 1.080553650856018
Epoch 140, Loss: 1.0935121774673462, Final Batch Loss: 1.0935121774673462
Epoch 141, Loss: 1.1125088930130005, Final Batch Loss: 1.1125088930130005
Epoch 142, Loss: 1.1127363443374634, Final Batch Loss: 1.1127363443374634
Epoch 143, Loss: 1.085721492767334, Final 

Epoch 263, Loss: 0.8700008988380432, Final Batch Loss: 0.8700008988380432
Epoch 264, Loss: 0.8296788334846497, Final Batch Loss: 0.8296788334846497
Epoch 265, Loss: 0.8286680579185486, Final Batch Loss: 0.8286680579185486
Epoch 266, Loss: 0.9003423452377319, Final Batch Loss: 0.9003423452377319
Epoch 267, Loss: 0.8050321340560913, Final Batch Loss: 0.8050321340560913
Epoch 268, Loss: 0.8887962102890015, Final Batch Loss: 0.8887962102890015
Epoch 269, Loss: 0.8116957545280457, Final Batch Loss: 0.8116957545280457
Epoch 270, Loss: 0.8567867279052734, Final Batch Loss: 0.8567867279052734
Epoch 271, Loss: 0.8199712038040161, Final Batch Loss: 0.8199712038040161
Epoch 272, Loss: 0.8527567982673645, Final Batch Loss: 0.8527567982673645
Epoch 273, Loss: 0.8488361239433289, Final Batch Loss: 0.8488361239433289
Epoch 274, Loss: 0.833950936794281, Final Batch Loss: 0.833950936794281
Epoch 275, Loss: 0.832815408706665, Final Batch Loss: 0.832815408706665
Epoch 276, Loss: 0.835023820400238, Final 

Epoch 402, Loss: 0.7579106092453003, Final Batch Loss: 0.7579106092453003
Epoch 403, Loss: 0.7333508133888245, Final Batch Loss: 0.7333508133888245
Epoch 404, Loss: 0.749610185623169, Final Batch Loss: 0.749610185623169
Epoch 405, Loss: 0.7211530208587646, Final Batch Loss: 0.7211530208587646
Epoch 406, Loss: 0.7254535555839539, Final Batch Loss: 0.7254535555839539
Epoch 407, Loss: 0.7395862936973572, Final Batch Loss: 0.7395862936973572
Epoch 408, Loss: 0.7093861699104309, Final Batch Loss: 0.7093861699104309
Epoch 409, Loss: 0.7257923483848572, Final Batch Loss: 0.7257923483848572
Epoch 410, Loss: 0.7230382561683655, Final Batch Loss: 0.7230382561683655
Epoch 411, Loss: 0.700825572013855, Final Batch Loss: 0.700825572013855
Epoch 412, Loss: 0.7247857451438904, Final Batch Loss: 0.7247857451438904
Epoch 413, Loss: 0.7408142685890198, Final Batch Loss: 0.7408142685890198
Epoch 414, Loss: 0.6852356195449829, Final Batch Loss: 0.6852356195449829
Epoch 415, Loss: 0.7017635107040405, Final

Epoch 538, Loss: 0.6605439782142639, Final Batch Loss: 0.6605439782142639
Epoch 539, Loss: 0.6506525278091431, Final Batch Loss: 0.6506525278091431
Epoch 540, Loss: 0.6259909272193909, Final Batch Loss: 0.6259909272193909
Epoch 541, Loss: 0.6362252831459045, Final Batch Loss: 0.6362252831459045
Epoch 542, Loss: 0.6614155769348145, Final Batch Loss: 0.6614155769348145
Epoch 543, Loss: 0.6271560192108154, Final Batch Loss: 0.6271560192108154
Epoch 544, Loss: 0.672586977481842, Final Batch Loss: 0.672586977481842
Epoch 545, Loss: 0.6484466791152954, Final Batch Loss: 0.6484466791152954
Epoch 546, Loss: 0.6360553503036499, Final Batch Loss: 0.6360553503036499
Epoch 547, Loss: 0.6340427994728088, Final Batch Loss: 0.6340427994728088
Epoch 548, Loss: 0.6492859721183777, Final Batch Loss: 0.6492859721183777
Epoch 549, Loss: 0.6542497277259827, Final Batch Loss: 0.6542497277259827
Epoch 550, Loss: 0.6498773097991943, Final Batch Loss: 0.6498773097991943
Epoch 551, Loss: 0.6434166431427002, Fin

Epoch 673, Loss: 0.5920640230178833, Final Batch Loss: 0.5920640230178833
Epoch 674, Loss: 0.6133078336715698, Final Batch Loss: 0.6133078336715698
Epoch 675, Loss: 0.6488122344017029, Final Batch Loss: 0.6488122344017029
Epoch 676, Loss: 0.6288819909095764, Final Batch Loss: 0.6288819909095764
Epoch 677, Loss: 0.5799027681350708, Final Batch Loss: 0.5799027681350708
Epoch 678, Loss: 0.6035512685775757, Final Batch Loss: 0.6035512685775757
Epoch 679, Loss: 0.5979938507080078, Final Batch Loss: 0.5979938507080078
Epoch 680, Loss: 0.5811524987220764, Final Batch Loss: 0.5811524987220764
Epoch 681, Loss: 0.6083056330680847, Final Batch Loss: 0.6083056330680847
Epoch 682, Loss: 0.5654834508895874, Final Batch Loss: 0.5654834508895874
Epoch 683, Loss: 0.6509070992469788, Final Batch Loss: 0.6509070992469788
Epoch 684, Loss: 0.6038361191749573, Final Batch Loss: 0.6038361191749573
Epoch 685, Loss: 0.5658625960350037, Final Batch Loss: 0.5658625960350037
Epoch 686, Loss: 0.5648903250694275, F

Epoch 811, Loss: 0.5236705541610718, Final Batch Loss: 0.5236705541610718
Epoch 812, Loss: 0.5198904871940613, Final Batch Loss: 0.5198904871940613
Epoch 813, Loss: 0.5164024233818054, Final Batch Loss: 0.5164024233818054
Epoch 814, Loss: 0.5083781480789185, Final Batch Loss: 0.5083781480789185
Epoch 815, Loss: 0.4874233603477478, Final Batch Loss: 0.4874233603477478
Epoch 816, Loss: 0.6120728254318237, Final Batch Loss: 0.6120728254318237
Epoch 817, Loss: 0.5623838901519775, Final Batch Loss: 0.5623838901519775
Epoch 818, Loss: 0.5083295702934265, Final Batch Loss: 0.5083295702934265
Epoch 819, Loss: 0.5788191556930542, Final Batch Loss: 0.5788191556930542
Epoch 820, Loss: 0.5307239294052124, Final Batch Loss: 0.5307239294052124
Epoch 821, Loss: 0.5185619592666626, Final Batch Loss: 0.5185619592666626
Epoch 822, Loss: 0.5045744776725769, Final Batch Loss: 0.5045744776725769
Epoch 823, Loss: 0.5168071389198303, Final Batch Loss: 0.5168071389198303
Epoch 824, Loss: 0.5653396248817444, F

Epoch 929, Loss: 0.5085602402687073, Final Batch Loss: 0.5085602402687073
Epoch 930, Loss: 0.4599432945251465, Final Batch Loss: 0.4599432945251465
Epoch 931, Loss: 0.5227734446525574, Final Batch Loss: 0.5227734446525574
Epoch 932, Loss: 0.518881618976593, Final Batch Loss: 0.518881618976593
Epoch 933, Loss: 0.4664376378059387, Final Batch Loss: 0.4664376378059387
Epoch 934, Loss: 0.4742336869239807, Final Batch Loss: 0.4742336869239807
Epoch 935, Loss: 0.46605536341667175, Final Batch Loss: 0.46605536341667175
Epoch 936, Loss: 0.48427248001098633, Final Batch Loss: 0.48427248001098633
Epoch 937, Loss: 0.491139680147171, Final Batch Loss: 0.491139680147171
Epoch 938, Loss: 0.48199141025543213, Final Batch Loss: 0.48199141025543213
Epoch 939, Loss: 0.4862942397594452, Final Batch Loss: 0.4862942397594452
Epoch 940, Loss: 0.4766271412372589, Final Batch Loss: 0.4766271412372589
Epoch 941, Loss: 0.4814637303352356, Final Batch Loss: 0.4814637303352356
Epoch 942, Loss: 0.4813186228275299,

In [9]:
all_scores = np.zeros((n_subjects, n_iters + 1))

for k in range(n_subjects):
    for j in range(n_iters):
        #Get the train test split of the current user (train data is always all users, test data changes)
        X_train, y_train, X_test, y_test = get_train_test_split(GAN_data, subject_numbers[k])
        #Change the data in the test loader to reflect the current user
        _, _, test_loader, _, _ = initialize_params(X_train, y_train, X_test, y_test)
        #Get the performance on the current user
        f1_score = evaluation(model, test_loader)
        #Modify the zero matrix with the current f1 score
        all_scores[k, j] = f1_score

In [10]:
for k in range(n_subjects):
    all_scores[k, -1] = subject_numbers[k]
    
all_scores

array([[ 0.55      ,  0.58      ,  0.47619048, ...,  0.65      ,
         0.65      ,  1.        ],
       [ 0.86      ,  0.86      ,  0.8       , ...,  0.86      ,
         0.76666667,  3.        ],
       [ 0.73148148,  0.67195767,  0.87830688, ...,  0.73148148,
         0.77777778,  5.        ],
       ...,
       [ 0.62809917,  0.62809917,  0.58181818, ...,  0.62809917,
         0.62809917, 28.        ],
       [ 0.7       ,  0.69      ,  0.8       , ...,  0.8       ,
         0.71666667, 29.        ],
       [ 0.62012987,  0.62012987,  0.62012987, ...,  0.62012987,
         0.70649351, 30.        ]])

In [17]:
heading = list(np.arange(0, n_iters, 1)) + ['Subject Number']
heading

[0,
 1,
 2,
 3,
 4,
 5,
 6,
 7,
 8,
 9,
 10,
 11,
 12,
 13,
 14,
 15,
 16,
 17,
 18,
 19,
 20,
 21,
 22,
 23,
 24,
 25,
 26,
 27,
 28,
 29,
 30,
 31,
 32,
 33,
 34,
 35,
 36,
 37,
 38,
 39,
 40,
 41,
 42,
 43,
 44,
 45,
 46,
 47,
 48,
 49,
 50,
 51,
 52,
 53,
 54,
 55,
 56,
 57,
 58,
 59,
 60,
 61,
 62,
 63,
 64,
 65,
 66,
 67,
 68,
 69,
 70,
 71,
 72,
 73,
 74,
 75,
 76,
 77,
 78,
 79,
 80,
 81,
 82,
 83,
 84,
 85,
 86,
 87,
 88,
 89,
 90,
 91,
 92,
 93,
 94,
 95,
 96,
 97,
 98,
 99,
 'Subject Number']

In [18]:
with open("../model_outputs/All User F-1 Scores.csv", "w") as csvfile:
    csvwriter = csv.writer(csvfile) 
    csvwriter.writerow(heading)
    csvwriter.writerows(all_scores)

print("Model results saved to All User F-1 Scores.csv")

Model results saved to All User F-1 Scores.csv
