In [321]:
import torch
import pandas as pd
import numpy as np
import torch.nn as nn
from sklearn.linear_model import LogisticRegression
from sklearn.model_selection import train_test_split
from sklearn import metrics

In [322]:
sub_features = ['42 tGravityAcc-mean()-Y',
 '43 tGravityAcc-mean()-Z',
 '51 tGravityAcc-max()-Y',
 '52 tGravityAcc-max()-Z',
 '54 tGravityAcc-min()-Y',
 '55 tGravityAcc-min()-Z',
 '56 tGravityAcc-sma()',
 '59 tGravityAcc-energy()-Z',
 '125 tBodyGyro-std()-Y',
 '128 tBodyGyro-mad()-Y',
 '138 tBodyGyro-energy()-Y',
 '165 tBodyGyroJerk-std()-Y',
 '168 tBodyGyroJerk-mad()-Y',
 '178 tBodyGyroJerk-energy()-Y',
 '181 tBodyGyroJerk-iqr()-Y',
 '425 fBodyGyro-mean()-Y',
 '428 fBodyGyro-std()-Y',
 '431 fBodyGyro-mad()-Y',
 '441 fBodyGyro-energy()-Y',
 '475 fBodyGyro-bandsEnergy()-1,8',
 '478 fBodyGyro-bandsEnergy()-25,32',
 '483 fBodyGyro-bandsEnergy()-1,16',
 '487 fBodyGyro-bandsEnergy()-1,24',
 '559 angle(X,gravityMean)',
 '560 angle(Y,gravityMean)',
 '561 angle(Z,gravityMean)']

act_features = ['4 tBodyAcc-std()-X',
 '7 tBodyAcc-mad()-X',
 '10 tBodyAcc-max()-X',
 '17 tBodyAcc-energy()-X',
 '202 tBodyAccMag-std()',
 '204 tBodyAccMag-max()',
 '215 tGravityAccMag-std()',
 '217 tGravityAccMag-max()',
 '266 fBodyAcc-mean()-X',
 '269 fBodyAcc-std()-X',
 '272 fBodyAcc-mad()-X',
 '275 fBodyAcc-max()-X',
 '282 fBodyAcc-energy()-X',
 '303 fBodyAcc-bandsEnergy()-1,8',
 '311 fBodyAcc-bandsEnergy()-1,16',
 '315 fBodyAcc-bandsEnergy()-1,24',
 '504 fBodyAccMag-std()',
 '505 fBodyAccMag-mad()',
 '506 fBodyAccMag-max()',
 '509 fBodyAccMag-energy()']

input_shape = len(sub_features) + len(act_features)

In [323]:
#label is a list of integers specifying which labels to filter by
#users is a list of integers specifying which users to filter by
#y_label is a string, either "Activity" or "Subject" depending on what y output needs to be returned
def start_data(label, users, y_label, sub_features, act_features):
    #get the dataframe column names
    name_dataframe = pd.read_csv('../data/features.txt', delimiter = '\n', header = None)
    names = name_dataframe.values.tolist()
    names = [k for row in names for k in row] #List of column names

    data = pd.read_csv('../data/X_train.txt', delim_whitespace = True, header = None) #Read in dataframe
    data.columns = names #Setting column names

    #X_train = data.loc[:,'1 tBodyAcc-mean()-X':'40 tBodyAcc-correlation()-Y,Z'] #Selecting only acceleration columns
    
    #X_train_1 = data.loc[:,'1 tBodyAcc-mean()-X':'40 tBodyAcc-correlation()-Y,Z']
    #X_train_2 = data.loc[:,'81 tBodyAccJerk-mean()-X':'160 tBodyGyro-correlation()-Y,Z']
    X_train_1 = data[sub_features]
    X_train_2 = data[act_features]
    X_train = pd.concat([X_train_1, X_train_2], axis = 1)
    
    y_train_activity = pd.read_csv('../data/y_train.txt', header = None)
    y_train_activity.columns = ['Activity']
    
    y_train_subject = pd.read_csv('../data/subject_train.txt', header = None)
    y_train_subject.columns = ['Subject']

    GAN_data = pd.concat([X_train, y_train_activity, y_train_subject], axis = 1)
    GAN_data = GAN_data[GAN_data['Activity'].isin(label)]
    GAN_data = GAN_data[GAN_data['Subject'].isin(users)]
    
    X = GAN_data.iloc[:,:-2].values
    #X = GAN_data.loc[:,'1 tBodyAcc-mean()-X':'160 tBodyGyro-correlation()-Y,Z'].values
    y = GAN_data[[y_label]].values
    
    return X, y

In [324]:
#defines each generator layer
#input and output dimensions needed
def generator_block(input_dim, output_dim):
    return nn.Sequential(
        nn.Linear(input_dim, output_dim),
        nn.Dropout(0.1),
        nn.BatchNorm1d(output_dim),
        nn.ReLU(inplace = True)
    )

#returns n_samples of z_dim (number of dimensions of latent space) noise
def get_noise(n_samples, z_dim):
    return torch.randn(n_samples, z_dim)

#defines generator class
class Generator(nn.Module):
    def __init__(self, z_dim = 10, feature_dim = input_shape, hidden_dim = 128):
        super(Generator, self).__init__()
        self.gen = nn.Sequential(
            generator_block(z_dim, 80),
            generator_block(80, 60),
            generator_block(60, 50),
            nn.Linear(50, feature_dim),
            nn.Tanh()
        )
    def forward(self, noise):
        return self.gen(noise)

def get_act_matrix(batch_size, a_dim):
    indexes = np.random.randint(a_dim, size = batch_size)
    
    one_hot = np.zeros((len(indexes), indexes.max()+1))
    one_hot[np.arange(len(indexes)),indexes] = 1
    return torch.Tensor(indexes).long(), torch.Tensor(one_hot)
    
def get_usr_matrix(batch_size, u_dim):
    indexes = np.random.randint(u_dim, size = batch_size)
    
    one_hot = np.zeros((indexes.size, indexes.max()+1))
    one_hot[np.arange(indexes.size),indexes] = 1
    return torch.Tensor(indexes).long(), torch.Tensor(one_hot)

def load_model(model, model_name):
    model.load_state_dict(torch.load(f'../saved_models/{model_name}'))

In [325]:
gen = Generator(z_dim = 106)
load_model(gen, "cGAN_UCI_30k_TEST_gen.param")
gen.eval()

Generator(
  (gen): Sequential(
    (0): Sequential(
      (0): Linear(in_features=106, out_features=80, bias=True)
      (1): Dropout(p=0.1, inplace=False)
      (2): BatchNorm1d(80, eps=1e-05, momentum=0.1, affine=True, track_running_stats=True)
      (3): ReLU(inplace=True)
    )
    (1): Sequential(
      (0): Linear(in_features=80, out_features=60, bias=True)
      (1): Dropout(p=0.1, inplace=False)
      (2): BatchNorm1d(60, eps=1e-05, momentum=0.1, affine=True, track_running_stats=True)
      (3): ReLU(inplace=True)
    )
    (2): Sequential(
      (0): Linear(in_features=60, out_features=50, bias=True)
      (1): Dropout(p=0.1, inplace=False)
      (2): BatchNorm1d(50, eps=1e-05, momentum=0.1, affine=True, track_running_stats=True)
      (3): ReLU(inplace=True)
    )
    (3): Linear(in_features=50, out_features=46, bias=True)
    (4): Tanh()
  )
)

# Train On Real, Test On Real

In [326]:
activities = [1, 3, 4]
users = [1, 3, 5]
X, y = start_data(activities, users, "Activity", sub_features, act_features)

In [327]:
#Transforming activity labels (1, 3, 4 --> 0, 1, 2)
for k in range(len(y)): 
    if y[k] == 1:
        y[k] = 0
    elif y[k] == 3:
        y[k] = 1
    else:
        y[k] = 2

In [328]:
X_train, X_test, y_train, y_test = train_test_split(X, y.flatten(), test_size = 0.2, shuffle = True)

classifier_real_act = LogisticRegression(max_iter = 300)
classifier_real_act.fit(X_train, y_train)

y_pred = classifier_real_act.predict(X_test)
print(metrics.confusion_matrix(y_test, y_pred))
print(metrics.classification_report(y_test, y_pred, digits = 3)) 

[[44  0  0]
 [ 1 27  0]
 [ 0  0 28]]
              precision    recall  f1-score   support

           0      0.978     1.000     0.989        44
           1      1.000     0.964     0.982        28
           2      1.000     1.000     1.000        28

    accuracy                          0.990       100
   macro avg      0.993     0.988     0.990       100
weighted avg      0.990     0.990     0.990       100



# Train On Fake, Test On Real

In [329]:
latent_vectors = get_noise(len(X_train), 100)
act_vectors = get_act_matrix(len(X_train), 3)
usr_vectors = get_usr_matrix(len(X_train), 3)

to_gen = torch.cat((latent_vectors, act_vectors[1], usr_vectors[1]), 1)
fake_features = gen(to_gen).detach().numpy()

classifier_fake = LogisticRegression(max_iter = 300)
classifier_fake.fit(fake_features, act_vectors[0])

y_pred = classifier_fake.predict(X_test)
print(metrics.confusion_matrix(y_test, y_pred))
print(metrics.classification_report(y_test, y_pred, digits = 3)) 

[[43  1  0]
 [ 2 26  0]
 [ 0  0 28]]
              precision    recall  f1-score   support

           0      0.956     0.977     0.966        44
           1      0.963     0.929     0.945        28
           2      1.000     1.000     1.000        28

    accuracy                          0.970       100
   macro avg      0.973     0.969     0.971       100
weighted avg      0.970     0.970     0.970       100



# Subject Test

In [330]:
X, y = start_data(activities, users, "Subject", sub_features, act_features)

In [331]:
for k in range(len(y)): 
    if y[k] == 1:
        y[k] = 0
    elif y[k] == 3:
        y[k] = 1
    else:
        y[k] = 2

In [334]:
X_train, X_test, y_train, y_test = train_test_split(X, y.flatten(), test_size = 0.2, shuffle = True)

classifier_real_user = LogisticRegression(max_iter = 300)
classifier_real_user.fit(X_train, y_train)

y_pred = classifier_real_user.predict(X_test)
print(metrics.confusion_matrix(y_test, y_pred))
print(metrics.classification_report(y_test, y_pred, digits = 3)) 

[[26  1  0]
 [ 3 34  7]
 [ 0  1 28]]
              precision    recall  f1-score   support

           0      0.897     0.963     0.929        27
           1      0.944     0.773     0.850        44
           2      0.800     0.966     0.875        29

    accuracy                          0.880       100
   macro avg      0.880     0.900     0.885       100
weighted avg      0.890     0.880     0.878       100



In [333]:
latent_vectors = get_noise(len(X_train), 100)
act_vectors = get_act_matrix(len(X_train), 3)
usr_vectors = get_usr_matrix(len(X_train), 3)

to_gen = torch.cat((latent_vectors, act_vectors[1], usr_vectors[1]), 1)
fake_features = gen(to_gen).detach().numpy()

classifier_fake = LogisticRegression(max_iter = 300)
classifier_fake.fit(fake_features, usr_vectors[0])

y_pred = classifier_fake.predict(X_test)
print(metrics.confusion_matrix(y_test, y_pred))
print(metrics.classification_report(y_test, y_pred, digits = 3)) 

[[36  0  2]
 [ 3 23  4]
 [ 5  0 27]]
              precision    recall  f1-score   support

           0      0.818     0.947     0.878        38
           1      1.000     0.767     0.868        30
           2      0.818     0.844     0.831        32

    accuracy                          0.860       100
   macro avg      0.879     0.853     0.859       100
weighted avg      0.873     0.860     0.860       100



# ALL ACTIVITIES

In [277]:
### ACTIVITY 0
gen = Generator(z_dim = 100)
gen.eval()
load_model(gen, "U0A0 Solo GAN_gen.param")
latent_vectors = get_noise(len(X_test), 100)
fake_features_1 = gen(latent_vectors).detach().numpy()

gen = Generator(z_dim = 100)
gen.eval()
load_model(gen, "U1A0 Solo GAN_gen.param")
latent_vectors = get_noise(len(X_test), 100)
fake_features_2 = gen(latent_vectors).detach().numpy()

gen = Generator(z_dim = 100)
gen.eval()
load_model(gen, "U2A0 Solo GAN_gen.param")
latent_vectors = get_noise(len(X_test), 100)
fake_features_3 = gen(latent_vectors).detach().numpy()

y_test_1 = np.zeros(300)

### ACTIVITY 1

gen = Generator(z_dim = 100)
gen.eval()
load_model(gen, "U0A1 Solo GAN_gen.param")
latent_vectors = get_noise(len(X_test), 100)
fake_features_4 = gen(latent_vectors).detach().numpy()

gen = Generator(z_dim = 100)
gen.eval()
load_model(gen, "U1A1 Solo GAN_gen.param")
latent_vectors = get_noise(len(X_test), 100)
fake_features_5 = gen(latent_vectors).detach().numpy()

gen = Generator(z_dim = 100)
gen.eval()
load_model(gen, "U2A1 Solo GAN_gen.param")
latent_vectors = get_noise(len(X_test), 100)
fake_features_6 = gen(latent_vectors).detach().numpy()

y_test_2 = np.ones(300)

### ACTIVITY 2

gen = Generator(z_dim = 100)
gen.eval()
load_model(gen, "U0A2 Solo GAN_gen.param")
latent_vectors = get_noise(len(X_test), 100)
fake_features_7 = gen(latent_vectors).detach().numpy()

gen = Generator(z_dim = 100)
gen.eval()
load_model(gen, "U1A2 Solo GAN_gen.param")
latent_vectors = get_noise(len(X_test), 100)
fake_features_8 = gen(latent_vectors).detach().numpy()

gen = Generator(z_dim = 100)
gen.eval()
load_model(gen, "U2A2 Solo GAN_gen.param")
latent_vectors = get_noise(len(X_test), 100)
fake_features_9 = gen(latent_vectors).detach().numpy()

y_test_3 = np.ones(300) + 1

X_train = np.concatenate((fake_features_1, fake_features_2, fake_features_3, fake_features_4, fake_features_5, fake_features_6,
                         fake_features_7, fake_features_8, fake_features_9))
y_train = np.concatenate((y_test_1, y_test_2, y_test_3))

activities = [1, 3, 4]
users = [1, 3, 5]
X, y = start_data(activities, users, "Activity", sub_features, act_features)

for k in range(len(y)): 
    if y[k] == 1:
        y[k] = 0
    elif y[k] == 3:
        y[k] = 1
    else:
        y[k] = 2

_, X_test, _, y_test = train_test_split(X, y.flatten(), test_size = 0.2, shuffle = True)

classifier = LogisticRegression(max_iter = 300)
classifier.fit(X_train, y_train)

y_pred = classifier.predict(X_test)
print(metrics.confusion_matrix(y_test, y_pred))
print(metrics.classification_report(y_test, y_pred, digits = 3)) 

[[41  0  0]
 [ 0 29  0]
 [ 0  0 30]]
              precision    recall  f1-score   support

           0      1.000     1.000     1.000        41
           1      1.000     1.000     1.000        29
           2      1.000     1.000     1.000        30

    accuracy                          1.000       100
   macro avg      1.000     1.000     1.000       100
weighted avg      1.000     1.000     1.000       100



In [279]:
### SUBJECT 0
gen = Generator(z_dim = 100)
gen.eval()
load_model(gen, "U0A0 Solo GAN_gen.param")
latent_vectors = get_noise(len(X_test), 100)
fake_features_1 = gen(latent_vectors).detach().numpy()

gen = Generator(z_dim = 100)
gen.eval()
load_model(gen, "U0A1 Solo GAN_gen.param")
latent_vectors = get_noise(len(X_test), 100)
fake_features_2 = gen(latent_vectors).detach().numpy()

gen = Generator(z_dim = 100)
gen.eval()
load_model(gen, "U0A2 Solo GAN_gen.param")
latent_vectors = get_noise(len(X_test), 100)
fake_features_3 = gen(latent_vectors).detach().numpy()

y_test_1 = np.zeros(300)

### SUBJECT 1

gen = Generator(z_dim = 100)
gen.eval()
load_model(gen, "U1A0 Solo GAN_gen.param")
latent_vectors = get_noise(len(X_test), 100)
fake_features_4 = gen(latent_vectors).detach().numpy()

gen = Generator(z_dim = 100)
gen.eval()
load_model(gen, "U1A1 Solo GAN_gen.param")
latent_vectors = get_noise(len(X_test), 100)
fake_features_5 = gen(latent_vectors).detach().numpy()

gen = Generator(z_dim = 100)
gen.eval()
load_model(gen, "U1A2 Solo GAN_gen.param")
latent_vectors = get_noise(len(X_test), 100)
fake_features_6 = gen(latent_vectors).detach().numpy()

y_test_2 = np.ones(300)

### SUBJECT 2

gen = Generator(z_dim = 100)
gen.eval()
load_model(gen, "U2A0 Solo GAN_gen.param")
latent_vectors = get_noise(len(X_test), 100)
fake_features_7 = gen(latent_vectors).detach().numpy()

gen = Generator(z_dim = 100)
gen.eval()
load_model(gen, "U2A1 Solo GAN_gen.param")
latent_vectors = get_noise(len(X_test), 100)
fake_features_8 = gen(latent_vectors).detach().numpy()

gen = Generator(z_dim = 100)
gen.eval()
load_model(gen, "U2A2 Solo GAN_gen.param")
latent_vectors = get_noise(len(X_test), 100)
fake_features_9 = gen(latent_vectors).detach().numpy()

y_test_3 = np.ones(300) + 1


X_train = np.concatenate((fake_features_1, fake_features_2, fake_features_3, fake_features_4, fake_features_5, fake_features_6,
                         fake_features_7, fake_features_8, fake_features_9))
y_train = np.concatenate((y_test_1, y_test_2, y_test_3))

activities = [1, 3, 4]
users = [1, 3, 5]
X, y = start_data(activities, users, "Subject", sub_features, act_features)

for k in range(len(y)): 
    if y[k] == 1:
        y[k] = 0
    elif y[k] == 3:
        y[k] = 1
    else:
        y[k] = 2

_, X_test, _, y_test = train_test_split(X, y.flatten(), test_size = 0.2, shuffle = True)

classifier = LogisticRegression(max_iter = 300)
classifier.fit(X_train, y_train)

y_pred = classifier.predict(X_test)
print(metrics.confusion_matrix(y_test, y_pred))
print(metrics.classification_report(y_test, y_pred, digits = 3)) 

[[34  1  0]
 [ 4 30  4]
 [ 0  5 22]]
              precision    recall  f1-score   support

           0      0.895     0.971     0.932        35
           1      0.833     0.789     0.811        38
           2      0.846     0.815     0.830        27

    accuracy                          0.860       100
   macro avg      0.858     0.859     0.858       100
weighted avg      0.858     0.860     0.858       100

