# Import Modules

In [11]:
import torch
import time
import sys
import pandas as pd
import numpy as np
import torch.nn as nn
from sklearn.linear_model import LogisticRegression
from sklearn.model_selection import train_test_split
from sklearn import metrics

In [2]:
##load and scale data
#returns scaled data (X) and labels (Y)
#file_name: string, file with data to be used
#label: array, list of activities to use
#users: array, list of users whose data is to be used

def start_data(file_name, label, users):
    #read csv into dataframe
    data = pd.read_csv(file_name)
    data = data[data['UUID'].isin(users)]

    #seperate only acceleration data
    X = data.loc[:,'raw_acc:magnitude_stats:mean':'raw_acc:3d:ro_yz']    
    y = data[label]

    #seperate only "on" labels
    X = X[(y!=0).any(axis=1)]
    y = y[(y!=0).any(axis=1)]
    
    #interpolate averages per column
    X = interpolation(X).values
    y = interpolation(y).values
    
    #scale the data
    from sklearn.preprocessing import StandardScaler
    sc = StandardScaler()
    X = sc.fit_transform(X)
    
    return X, y

# Import the Data

In [3]:
def interpolation(df):
    col_to_avg = list(df.columns) #Start with keeping all the columns as columns to use an average interpolation on
    for k in range(len(list(df.columns))):
        if list(df.columns)[k].startswith(('discrete', 'label')): #Remove label and discrete columns from col_to_avg
            col_to_avg.remove(list(df.columns)[k])
    
    df_with_avg = df[col_to_avg].fillna(df[col_to_avg].mean()) #Interpolate nan columns for all continuous-valued columns with average
    
    col_to_zero = list(df.columns)
    for k in range(len(list(df.columns))):
        if not list(df.columns)[k].startswith(('discrete', 'label')): #Remove all columns except label and discrete
            col_to_zero.remove(list(df.columns)[k])
    
    df_with_zero = df[col_to_zero].fillna(0) #Interpolate nan values for label and discrete columns with 0
    
    return pd.concat([df_with_avg, df_with_zero], axis = 1)

# Prepare Data

# The Generator

In [15]:
#defines each generator layer
#input and output dimensions needed
def generator_block(input_dim, output_dim):
    return nn.Sequential(
        nn.Linear(input_dim, output_dim),
        nn.Dropout(0.1),
        nn.BatchNorm1d(output_dim),
        nn.ReLU(inplace = True)
    )

#returns n_samples of z_dim (number of dimensions of latent space) noise
def get_noise(n_samples, z_dim):
    #torch.manual_seed(0)
    return torch.randn(n_samples, z_dim)

#defines generator class
class Generator(nn.Module):
    def __init__(self, z_dim = 10, feature_dim = 26, hidden_dim = 128):
        super(Generator, self).__init__()
        self.gen = nn.Sequential(
            generator_block(z_dim, int(hidden_dim/2)),
            generator_block(int(hidden_dim/2), int(hidden_dim/4)),
            generator_block(int(hidden_dim/4), 30),
            generator_block(30, 28),
            nn.Linear(28, feature_dim)
        )
    def forward(self, noise):
        return self.gen(noise)

##calculates generator loss
#gen: generator
#disc: discriminator
#criterion1: loss function1
#criterion2: loss function2
#batch_size: batch size
#z_dim: number of dimensions in the latent space
def get_gen_loss(gen, disc, act, usr, criterion1, criterion2, batch_size, z_dim, activities, users):
    latent_vectors = get_noise(batch_size, z_dim)
    act_vectors = get_act_matrix(batch_size, activities)
    usr_vectors = get_usr_matrix(batch_size, users)
    
    to_gen = torch.cat((latent_vectors, act_vectors[1].to(device), usr_vectors[1].to(device)), 1)
    fake_features = gen(to_gen)
    
    pred_disc = disc(fake_features.to(device))
    pred_act = act(fake_features.to(device)) ### CrossEntropyLoss Criterion automatically applies softmax and torch.max
    pred_usr = usr(fake_features.to(device))
    
    d_loss = criterion1(pred_disc, torch.ones_like(pred_disc))
    act_loss = criterion2(pred_act, act_vectors[0].to(device))
    usr_loss = criterion2(pred_usr, usr_vectors[0].to(device))
    
    gen_loss = d_loss + act_loss + usr_loss
    return gen_loss
    
def get_act_matrix(batch_size, a_dim):
    indexes = np.random.randint(a_dim, size = batch_size)
    #print(indexes)
    
    one_hot = np.zeros((len(indexes), indexes.max()+1))
    one_hot[np.arange(len(indexes)),indexes] = 1
    
    return torch.Tensor(indexes).long(), torch.Tensor(one_hot)
    
def get_usr_matrix(batch_size, u_dim):
    indexes = np.random.randint(u_dim, size = batch_size)
    
    one_hot = np.zeros((indexes.size, indexes.max()+1))
    one_hot[np.arange(indexes.size),indexes] = 1
    
    return torch.Tensor(indexes).long(), torch.Tensor(one_hot)

def load_model(model, model_name):
    model.load_state_dict(torch.load(f'../saved_models/{model_name}'))

In [27]:
#Sample from real sitting data
X, _ = start_data("../aggregated_data/aggregated_data.csv", ["label:SITTING", "label:FIX_walking", "label:SLEEPING"], ["0BFC35E2-4817-4865-BFA7-764742302A2D", "0A986513-7828-4D53-AA1F-E02D6DF9561B", "00EABED2-271D-49D8-B599-1D4A09240601"])
y = np.ones(len(X))

X_train, X_test, y_train, y_test = train_test_split(X, y, test_size = 0.2, shuffle = True)
print(np.shape(X_train))
print(np.shape(y_train))

gen = Generator(z_dim = 106)
load_model(gen, "TEST_cGAN_FIX_1_gen")

latent_vectors = get_noise(len(X_train), 100)
act_vectors = get_act_matrix(len(X_train), 3)
usr_vectors = get_usr_matrix(len(X_train), 3)
    
to_gen = torch.cat((latent_vectors, act_vectors[1], usr_vectors[1]), 1)
fake_features = gen(to_gen).detach().numpy()
y_fake = np.zeros(len(X_train))

X_train = np.concatenate((fake_features, X_train), axis = 0)
y_train = np.concatenate((y_fake, y_train), axis = None)
print(np.shape(X_train))
print(np.shape(y_train))

(6952, 26)
(6952,)
(13904, 26)
(13904,)


# Train on Real + Fake, Evaluate on Fake 

In [31]:
classifier = LogisticRegression(penalty = 'l2', C = 0.7)
classifier.fit(X_train, y_train)
y_pred = classifier.predict(X_test)
print(metrics.confusion_matrix(y_test, y_pred))
print(metrics.classification_report(y_test, y_pred, digits = 3))                        

[[   0    0]
 [   2 1737]]
              precision    recall  f1-score   support

         0.0      0.000     0.000     0.000         0
         1.0      1.000     0.999     0.999      1739

    accuracy                          0.999      1739
   macro avg      0.500     0.499     0.500      1739
weighted avg      1.000     0.999     0.999      1739



  _warn_prf(average, modifier, msg_start, len(result))
  _warn_prf(average, modifier, msg_start, len(result))
  _warn_prf(average, modifier, msg_start, len(result))


In [16]:
to_print = "Hello There"
empty = " "
print(to_print, end = "\r")
for i in range(10):
    time.sleep(1)
    print(empty * len(to_print), end = "\r")
    to_print = f"New {i}"
    print(to_print, end = "\r")

New 9      