In [1]:
import pandas as pd
import numpy as np
import random
import csv
from prettytable import PrettyTable
from pylab import *
from scipy.stats import wasserstein_distance
import torch
import torchvision
import torch.nn as nn
import torch.nn.functional as F
import torch.optim as optim
import matplotlib.pyplot as plt
import seaborn as sns
%matplotlib inline

from torchvision.transforms import transforms
from torch.utils.data import DataLoader
from torch.utils.data import Dataset
from sklearn.model_selection import train_test_split
from sklearn.linear_model import LogisticRegression

In [2]:
###Change the data file directory below appropriately
data = pd.read_csv('../raw_data/0A986513-7828-4D53-AA1F-E02D6DF9561B.features_labels.csv')
#data = pd.read_csv('../aggregated_data/aggregated_data.csv')
data.head()

Unnamed: 0,timestamp,raw_acc:magnitude_stats:mean,raw_acc:magnitude_stats:std,raw_acc:magnitude_stats:moment3,raw_acc:magnitude_stats:moment4,raw_acc:magnitude_stats:percentile25,raw_acc:magnitude_stats:percentile50,raw_acc:magnitude_stats:percentile75,raw_acc:magnitude_stats:value_entropy,raw_acc:magnitude_stats:time_entropy,...,label:STAIRS_-_GOING_DOWN,label:ELEVATOR,label:OR_standing,label:AT_SCHOOL,label:PHONE_IN_HAND,label:PHONE_IN_BAG,label:PHONE_ON_TABLE,label:WITH_CO-WORKERS,label:WITH_FRIENDS,label_source
0,1449601597,1.000371,0.007671,-0.016173,0.02786,0.998221,1.000739,1.003265,0.891038,6.684582,...,,,,,,,,,,-1
1,1449601657,1.000243,0.003782,-0.002713,0.007046,0.998463,1.000373,1.002088,1.647929,6.684605,...,,,,,,,,,,-1
2,1449601717,1.000811,0.002082,-0.001922,0.003575,0.999653,1.000928,1.002032,1.960286,6.68461,...,,,,,,,,,,-1
3,1449601777,1.001245,0.004715,-0.002895,0.008881,0.999188,1.001425,1.0035,1.614524,6.684601,...,,,,,,,,,,-1
4,1449601855,1.001354,0.065186,-0.09652,0.165298,1.000807,1.002259,1.003631,0.83779,6.682252,...,0.0,,0.0,1.0,,,,,0.0,2


# Interpolating columns with average values

In [3]:
def interpolation(df):
    col_to_avg = list(df.columns) #Start with keeping all the columns as columns to use an average interpolation on
    for k in range(len(list(df.columns))):
        if list(df.columns)[k].startswith(('discrete', 'label')): #Remove label and discrete columns from col_to_avg
            col_to_avg.remove(list(df.columns)[k])
    
    df_with_avg = df[col_to_avg].fillna(df[col_to_avg].mean()) #Interpolate nan columns for all continuous-valued columns with average
    
    col_to_zero = list(df.columns)
    for k in range(len(list(df.columns))):
        if not list(df.columns)[k].startswith(('discrete', 'label')): #Remove all columns except label and discrete
            col_to_zero.remove(list(df.columns)[k])
    
    df_with_zero = df[col_to_zero].fillna(0) #Interpolate nan values for label and discrete columns with 0
    
    return pd.concat([df_with_avg, df_with_zero], axis = 1)

# Splitting the data and loading it into a PyTorch dataloader

In [4]:
X = data.iloc[:,1:27]
y = data[['label:SITTING']]

X = X[y['label:SITTING'] == 1]
y = y[y['label:SITTING'] == 1]

X = interpolation(X).values
y = interpolation(y).values

print(len(X), len(y))

2253 2253


In [5]:
from sklearn.preprocessing import StandardScaler
sc = StandardScaler()
X = sc.fit_transform(X)
#X

# Helper Functions for Training

In [6]:
def generator_block(input_dim, output_dim):
    return nn.Sequential(
        nn.Linear(input_dim, output_dim),
        nn.Dropout(0.1),
        nn.BatchNorm1d(output_dim),
        nn.ReLU(inplace = True)
    )
def get_noise(n_samples, z_dim):
    return torch.randn(n_samples, z_dim)

class Generator(nn.Module):
    def __init__(self, z_dim = 10, feature_dim = 26, hidden_dim = 128):
        super(Generator, self).__init__()
        self.gen = nn.Sequential(
             generator_block(z_dim, 80),
            generator_block(80, 60),
            generator_block(60, 40),
            generator_block(40, 28),
            nn.Linear(28, feature_dim)
        )
    def forward(self, noise):
        return self.gen(noise)

def discriminator_block(input_dim, output_dim):
    return nn.Sequential(
        nn.Linear(input_dim, output_dim),
        nn.Dropout(0.1),
        nn.LeakyReLU(0.05)
    )

class Discriminator(nn.Module):
    def __init__(self, feature_dim = 26, hidden_dim = 16):
        super(Discriminator, self).__init__()
        self.disc = nn.Sequential(
            discriminator_block(feature_dim, hidden_dim),
            discriminator_block(hidden_dim, int(hidden_dim/2)),
            discriminator_block(int(hidden_dim/2), int(hidden_dim/4)),
            nn.Linear(int(hidden_dim/4), 1),
            nn.Sigmoid()                    
        )
    def forward(self, feature_vector):
        return self.disc(feature_vector)

def get_disc_loss(gen, disc, criterion, real_features, batch_size, z_dim):
    latent_vectors = get_noise(batch_size, z_dim)
    fake_features = gen(latent_vectors)
    pred_fake = disc(fake_features.detach())
    
    ground_truth = torch.zeros_like(pred_fake)
    loss_fake = criterion(pred_fake, ground_truth)
    
    pred_real = disc(real_features)
    ground_truth = torch.ones_like(pred_real)
    loss_real = criterion(pred_real, ground_truth)
    
    disc_loss = (loss_fake + loss_real) / 2
    return disc_loss

def get_gen_loss(gen, disc, criterion, batch_size, z_dim):
    latent_vectors = get_noise(batch_size, z_dim)
    fake_features = gen(latent_vectors)
    pred = disc(fake_features)
    gen_loss = criterion(pred, torch.ones_like(pred))
    return gen_loss

def visualize_gen_batch(gen, b_size, epochs = -1):
    #print(str(b_size))
    latent_vectors = get_noise(b_size, z_dim)
    #print(latent_vectors.shape)
    fake_features = gen(latent_vectors)
    #print(fake_features.shape)
    
    w_img = fake_features
    wmin = torch.min(w_img)
    wmax = torch.max(w_img)
    w_img = w_img.cpu()
    w_img = w_img.detach().numpy()
    c = plt.imshow(w_img, cmap ='Reds', vmin = wmin , vmax = wmax,
                        interpolation ='nearest', origin ='upper')
    plt.colorbar(c)
    plt.title('Generated Batch at Epoch ' + str(epochs), fontweight ="bold")
    plt.show()
    
def visualize_real_batch(features):
    w_img = features
    wmin = torch.min(w_img)
    wmax = torch.max(w_img)
    w_img = w_img.cpu()
    w_img = w_img.detach().numpy()
    c = plt.imshow(w_img, cmap ='Reds', vmin = wmin , vmax = wmax,
                        interpolation ='nearest', origin ='upper')
    plt.colorbar(c)
    plt.title('Real Batch of Data', fontweight ="bold")
    plt.show()
    
def performance_stats(gen, disc, b_size, batch = None):
    tp = 0
    fp = 0
    tn = 0
    fn = 0

    with torch.no_grad():
        if batch is None:
            latent_vectors = get_noise(b_size, z_dim)
            fake_features = gen(latent_vectors)
            y_hat = torch.round(disc(fake_features))
            y_label = [0] * b_size
            y_label = torch.Tensor(y_label)
        else:
            latent_vectors = get_noise(int(b_size/2), z_dim)
            fake_features = gen(latent_vectors)
            y_hat = torch.round(disc(fake_features))
            y_label = [0] * int(b_size/2)
            
            real_y_hat = torch.round(disc(batch[:int(b_size/2)]))
            for i in range(0, int(b_size/2)):
                y_label.append(1)
            y_hat = torch.cat((y_hat, real_y_hat), dim = 0)
            
            #print(y_hat)
            #print(y_label)
         
        
        for k in range(len(y_hat)):
            #True positive
            if y_label[k] == 1 and y_hat[k] == 1:
                tp += 1
            #False Negative
            elif y_label[k] == 1 and y_hat[k] == 0:
                fn += 1
            #True Negative
            elif y_label[k] == 0 and y_hat[k] == 0:
                tn += 1
            elif y_label[k] == 0 and y_hat[k] == 1:
                fp += 1
            else:
                print("Error")
                exit()
            
        class_acc = (tp + tn)/(tp + tn + fp + fn)
        
        if tp + fp == 0:
            precision = 0
        else:
            precision = tp / (tp + fp)
            
        if tp + fn == 0:
            recall = 0
        else:
            recall = tp / (tp + fn)
            
        if fp + tn == 0:
            fpR = 0
        else: 
            fpR = fp / (fp + tn)

        #print(f'Classification Accuracy: {class_acc:.2f}')
        #print(f'Precision: {precision:.2f}') #What percentage of a model's positive predictions were actually positive
        #print(f'Recall: {recall:.2f}') #What percent of the true positives were identified
        #print(f'F-1 Score: {2*(precision * recall / (precision + recall + 0.001)):.2f}')
        return class_acc, precision, recall, fpR, 2*(precision * recall / (precision + recall + 0.0001))
    
    
def density_curves(reals, fakes):
    plt.figure(figsize = (15, 15))
    subplot(2, 2, 1)
    sns.kdeplot(fakes.numpy()[:,0], color = 'r', shade = True, label = 'Fake Distribution')
    sns.kdeplot(reals[:,0], color = 'b', shade = True, label = 'Real Distribution')
    plt.xlabel('Mean Acceleration')
    plt.ylabel('Density')
    plt.legend()
    #plt.show()

    subplot(2, 2, 2)
    sns.kdeplot(fakes.numpy()[:,18], color = 'r', shade = True, label = 'Fake Distribution')
    sns.kdeplot(reals[:,18], color = 'b', shade = True, label = 'Real Distribution')
    plt.xlabel('Mean X-Acceleration')
    plt.ylabel('Density')
    plt.legend()
    #plt.show()

    subplot(2, 2, 3)
    sns.kdeplot(fakes.numpy()[:,19], color = 'r', shade = True, label = 'Fake Distribution')
    sns.kdeplot(reals[:,19], color = 'b', shade = True, label = 'Real Distribution')
    plt.xlabel('Mean Y-Acceleration')
    plt.ylabel('Density')
    plt.legend()
    #plt.show()

    subplot(2, 2, 4)
    sns.kdeplot(fakes.numpy()[:,20], color = 'r', shade = True, label = 'Fake Distribution')
    sns.kdeplot(reals[:,20], color = 'b', shade = True, label = 'Real Distribution')
    plt.xlabel('Mean Z-Acceleration')
    plt.ylabel('Density')
    plt.legend()
    plt.show()
    
def get_fake_samples(gen, batch_size, z_dim):
    """
    Generates fake acceleration features given a batch size, latent vector dimension, and trained generator.
    
    """
    latent_vectors = get_noise(batch_size, z_dim) ### Retrieves a 2D tensor of noise
    fake_features = gen(latent_vectors)
    
    return fake_features ### Returns a 2D tensor of fake features of size batch_size x z_dim

def all_Wasserstein_dists(gen, z_dim, feature_dim, sample):
    wasser_dim = []
    latent_vectors = get_noise(len(sample), z_dim)
    fake_features = gen(latent_vectors)
    for k in range(feature_dim):
        wasser_dim.append(wasserstein_distance(fake_features[:, k].detach().numpy(), sample[:, k].detach().numpy()))
    return torch.tensor(wasser_dim)

# Model Hyperparameters (Always Run Again Before Starting Training Loop) 

In [7]:
# Loss function for model
criterion = nn.BCELoss()

#GAN Name (used for saving model and its output)
gan_id = "20k_5_2_constant_GAN"

# Digit Precision for printouts
dig = 5

# Max epochs to run
n_epochs = 20000

# Number of dimensions of output from generator
feature_dim = 26

# Number of dimensions of latent vector
z_dim = 100

# Batch Size
batch_size = 100

# Learning Rates for Generator (Gen) and Discriminator (Disc)
gen_lr =  0.0001
disc_lr = 0.0001

# Constant epochs approach to train Discriminator, Generator
constant_train_flag = True # Set to true to train based on constant # of epochs per machine 
                            # Set to false to train dynamically based on machine performance
disc_epochs = 5             # Number of consecutive epochs to train discriminator before epoch threshold
gen_epochs = 2              # Number of consecutive epochs to train generator before epoch threshold
epoch_threshold = 50        # Epoch number to change training epoch ratio
disc_epochs_change = 5      # New number of consecutive epochs to train discriminator
gen_epochs_change = 2      # New number of consecutive epochs to train generator
rel_epochs = 0              # Epochs passed since last switch (always set to 0)


# Dynamic number of epochs to train Discriminator, Generator
static_threshold = 77   # Epoch number to change from static ratio to dynamic
static_disc_epochs = 5  # Number of consecutive epochs to train discriminator before epoch threshold
static_gen_epochs = 2   # Number of consecutive epochs to train generator before epoch threshold
pull_threshold = 0.2    # Accuracy threshold for switching machine training when the generator is no longer competitive
push_threshold = 0.8    # Accuracy threshold for switching machine training when the discriminator is no longer competitive
recall_threshold = 0.80

# Which machine to train (0 for Generator, 1 for Discriminator) !!!(do not change unless for good reason)!!!
GENERATOR = 0
DISCRIMINATOR = 1
to_train = DISCRIMINATOR
train_string = "DISC"

# Show model performance per batch (will always show summary for each epoch)
print_batches = False

# Moving corpus data into a pyTorch format !!!(do not change unless for good reason)!!!
train_features = torch.tensor(X)
train_labels = torch.tensor(y)
train_data = torch.utils.data.TensorDataset(train_features, train_labels)
train_loader = torch.utils.data.DataLoader(train_data, batch_size = batch_size, shuffle = True)

# Initializiing the Machines !!!(do not change unless for good reason)!!!
disc = Discriminator()
gen = Generator(z_dim)
opt_disc = optim.Adam(disc.parameters(), lr = disc_lr)
opt_gen = optim.Adam(gen.parameters(), lr = gen_lr)

torch.save(gen.state_dict(), "../saved_models/20k_5_2_constant_gan")
torch.save(disc.state_dict(), "../saved_models/20k_5_2_constant_disc")

# Training Loop

In [None]:
#For saving prettyTable.txt file
heading = ["Epoch", "Machine Training", "Discriminator Loss", "Generator Loss", "FPR", "Recall", "Median Wasserstein", "Mean Wasserstein"]
table = PrettyTable()
table.field_names = heading

#For saving .csv file
rows = []

last_real_features = []
switch_flag = False
switch_count = 0

for epoch in range(n_epochs):  
    if constant_train_flag:
        if to_train == DISCRIMINATOR and rel_epochs >= disc_epochs:
            rel_epochs = 0
            to_train = GENERATOR
            train_string = "GEN"

        elif to_train == GENERATOR and rel_epochs >= gen_epochs:
            rel_epochs = 0
            to_train = DISCRIMINATOR
            train_string = "DISC"
        
        # Change epoch ratio after intial 'leveling out'
        if epoch == epoch_threshold:
            rel_epochs = 0
            to_train = GENERATOR
            train_string = "GENERATOR"

            old_ratio = gen_epochs / disc_epochs
            gen_epochs = gen_epochs_change
            disc_epochs = disc_epochs_change
            new_ratio = gen_epochs / disc_epochs
            print(f'\n\nTraining ratio of G/D switched from {old_ratio:.{dig}f} to {new_ratio:.{dig}f}\n\n')
    else:
        if epoch < static_threshold:
            if to_train == DISCRIMINATOR and rel_epochs >= static_disc_epochs:
                rel_epochs = 0
                to_train = GENERATOR
                train_string = "GEN"

            elif to_train == GENERATOR and rel_epochs >= static_gen_epochs:
                rel_epochs = 0
                to_train = DISCRIMINATOR
                train_string = "DISC"
        
        else:
            if not switch_flag:
                print("\nSwitching to Dynamic Training\n")
                switch_flag = True
                to_train = DISCRIMINATOR
                train_string = "DISC"
            if to_train == DISCRIMINATOR and fpR <= pull_threshold and R >= recall_threshold:
                to_train = GENERATOR
                train_string = "GEN"
                print("\nPull Generator\n")
                switch_count += 1
            if to_train == GENERATOR and fpR >= push_threshold:
                to_train = DISCRIMINATOR
                train_string = "DISC"
                print("\nPush Generator\n")
                switch_count += 1

    print(f'Epoch [{epoch + 1}/{n_epochs}] Training: {train_string} ', end ='')
    for batch_idx, (real_features, _) in enumerate(train_loader):
        #batch_size = len(real_features)
    
        if print_batches:
                print(f'\n\tBatch [{batch_idx + 1} / {len(train_loader)}] |', end ='')
    
        if to_train == DISCRIMINATOR:
            ### Training Discriminator
            #visualize_real_batch(real_features.float())
            opt_disc.zero_grad()
            disc_loss = get_disc_loss(gen, disc, criterion, real_features.float(), len(real_features), z_dim)
            disc_loss.backward(retain_graph = True)
            opt_disc.step()
            acc, P, R, fpR, F1 = performance_stats(gen, disc, len(real_features), batch = real_features.float())
            w_dist = all_Wasserstein_dists(gen, z_dim, feature_dim, real_features.float())
            median_w_dist = torch.median(w_dist)
            mean_w_dist = torch.mean(w_dist)
            if print_batches:
                print(f'Loss D: {disc_loss.item():.digf}, Loss G: {get_gen_loss(gen, disc, criterion, len(real_features), z_dim):.{dig}f} | Accuracy: {acc:.{dig}f} | fpR: {fpR:.{dig}f} P: {P:.{dig}f} | R: {R:.{dig}f} | F1: {F1:.{dig}f} | Median Wasserstein: {median_w_dist:.{dig}f} | Mean Wasserstein: {mean_w_dist:.{dig}f}')
        else:
            ### Training Generator
            opt_gen.zero_grad()
            gen_loss = get_gen_loss(gen, disc, criterion, len(real_features), z_dim)
            gen_loss.backward()
            opt_gen.step()
            acc, P, R, fpR, F1 = performance_stats(gen, disc, len(real_features), batch = real_features.float())
            w_dist = all_Wasserstein_dists(gen, z_dim, feature_dim, real_features.float())
            median_w_dist = torch.median(w_dist)
            mean_w_dist = torch.mean(w_dist)
            if print_batches:
                print(f'Loss D: {disc_loss.item():.digf}, Loss G: {get_gen_loss(gen, disc, criterion, len(real_features), z_dim):.{dig}f} | Accuracy: {acc:.{dig}f} | fpR: {fpR:.{dig}f} P: {P:.{dig}f} | R: {R:.{dig}f} | F1: {F1:.{dig}f} | Median Wasserstein: {median_w_dist:.{dig}f} | Mean Wasserstein: {mean_w_dist:.{dig}f}')
        
    if not print_batches:
        if to_train == DISCRIMINATOR:
            ### Currently doesn't print Median/Mean Wasserstein --> Change if needed
            print(f'| Loss D: {disc_loss.item():.{dig}f}, Loss G: {get_gen_loss(gen, disc, criterion, len(real_features), z_dim):.{dig}f} | Accuracy: {acc:.{dig}f} | fpR: {fpR:.{dig}f} | P: {P:.{dig}f} | R: {R:.{dig}f} | F1: {F1:.{dig}f} | Mean Wasserstein: {mean_w_dist:.{dig}f}')
            row_to_add = [f"{epoch + 1}", "Discriminator", f"{disc_loss.item():.{dig}f}", f"{get_gen_loss(gen, disc, criterion, len(real_features), z_dim):.{dig}f}", f"{fpR:.{dig}f}", f"{R:.{dig}f}", f"{median_w_dist:.{dig}f}", f"{mean_w_dist:.{dig}f}"]
            table.add_row(row_to_add)
            rows.append(row_to_add)
        else:
            print(f'| Loss D: {get_disc_loss(gen, disc, criterion, real_features.float(), len(real_features), z_dim):.{dig}f}, Loss G: {gen_loss.item():.{dig}f} | Accuracy: {acc:.{dig}f} | fpR: {fpR:.{dig}f} | P: {P:.{dig}f} | R: {R:.{dig}f} | F1: {F1:.{dig}f} | Mean Wasserstein: {mean_w_dist:.{dig}f}')
            row_to_add = [f"{epoch + 1}", "Generator", f"{disc_loss.item():.{dig}f}", f"{get_gen_loss(gen, disc, criterion, len(real_features), z_dim):.{dig}f}", f"{fpR:.{dig}f}", f"{R:.{dig}f}", f"{median_w_dist:.{dig}f}", f"{mean_w_dist:.{dig}f}"]
            table.add_row(row_to_add)
            rows.append(row_to_add)
    rel_epochs += 1
    
    if mean_w_dist <= 0.1:
        break

    
print("\n\nTraining Session Finished")
print(f"Encountered {switch_count} non-trivial training swaps")

f = open("../model_outputs/" + gan_id + ".txt", "w")
f.write(table.get_string())
f.close()
print("Model Results Sucessfully Saved to \"../model_outputs/" + gan_id + ".txt\"")

with open("../model_outputs/" + gan_id + ".csv", "w") as csvfile: 
    # creating a csv writer object 
    csvwriter = csv.writer(csvfile) 
    # writing the fields 
    csvwriter.writerow(heading)
    # writing the data rows 
    csvwriter.writerows(rows)
print("Model Results Sucessfully Saved to \"../model_outputs/" + gan_id + ".csv\"")

Epoch [1/20000] Training: DISC | Loss D: 0.73119, Loss G: 0.98274 | Accuracy: 0.50000 | fpR: 0.00000 | P: 0.00000 | R: 0.00000 | F1: 0.00000 | Mean Wasserstein: 0.54281
Epoch [2/20000] Training: DISC | Loss D: 0.73307, Loss G: 0.96530 | Accuracy: 0.50000 | fpR: 0.00000 | P: 0.00000 | R: 0.00000 | F1: 0.00000 | Mean Wasserstein: 0.58612
Epoch [3/20000] Training: DISC | Loss D: 0.72313, Loss G: 0.96933 | Accuracy: 0.50000 | fpR: 0.00000 | P: 0.00000 | R: 0.00000 | F1: 0.00000 | Mean Wasserstein: 0.49133
Epoch [4/20000] Training: DISC | Loss D: 0.72340, Loss G: 0.95834 | Accuracy: 0.50000 | fpR: 0.00000 | P: 0.00000 | R: 0.00000 | F1: 0.00000 | Mean Wasserstein: 0.50525
Epoch [5/20000] Training: DISC | Loss D: 0.72169, Loss G: 0.95163 | Accuracy: 0.50000 | fpR: 0.00000 | P: 0.00000 | R: 0.00000 | F1: 0.00000 | Mean Wasserstein: 0.52981
Epoch [6/20000] Training: GEN | Loss D: 0.72340, Loss G: 0.96332 | Accuracy: 0.50000 | fpR: 0.00000 | P: 0.00000 | R: 0.00000 | F1: 0.00000 | Mean Wasserst

Epoch [50/20000] Training: DISC | Loss D: 0.68092, Loss G: 0.86181 | Accuracy: 0.51923 | fpR: 0.00000 | P: 1.00000 | R: 0.03846 | F1: 0.07407 | Mean Wasserstein: 0.38685


Training ratio of G/D switched from 0.40000 to 0.40000


Epoch [51/20000] Training: GENERATOR | Loss D: 0.69232, Loss G: 0.84509 | Accuracy: 0.48077 | fpR: 0.03846 | P: 0.00000 | R: 0.00000 | F1: 0.00000 | Mean Wasserstein: 0.44547
Epoch [52/20000] Training: GENERATOR | Loss D: 0.70258, Loss G: 0.84437 | Accuracy: 0.50000 | fpR: 0.00000 | P: 0.00000 | R: 0.00000 | F1: 0.00000 | Mean Wasserstein: 0.40234
Epoch [53/20000] Training: DISC | Loss D: 0.68847, Loss G: 0.83204 | Accuracy: 0.48077 | fpR: 0.03846 | P: 0.00000 | R: 0.00000 | F1: 0.00000 | Mean Wasserstein: 0.41508
Epoch [54/20000] Training: DISC | Loss D: 0.69688, Loss G: 0.83297 | Accuracy: 0.51923 | fpR: 0.00000 | P: 1.00000 | R: 0.03846 | F1: 0.07407 | Mean Wasserstein: 0.53960
Epoch [55/20000] Training: DISC | Loss D: 0.69080, Loss G: 0.83633 | Accuracy: 0.

Epoch [98/20000] Training: DISC | Loss D: 0.67571, Loss G: 0.81101 | Accuracy: 0.61538 | fpR: 0.07692 | P: 0.80000 | R: 0.30769 | F1: 0.44440 | Mean Wasserstein: 0.37795
Epoch [99/20000] Training: DISC | Loss D: 0.67182, Loss G: 0.82304 | Accuracy: 0.59615 | fpR: 0.00000 | P: 1.00000 | R: 0.19231 | F1: 0.32255 | Mean Wasserstein: 0.43631
Epoch [100/20000] Training: GEN | Loss D: 0.68257, Loss G: 0.81857 | Accuracy: 0.57692 | fpR: 0.11538 | P: 0.70000 | R: 0.26923 | F1: 0.38885 | Mean Wasserstein: 0.46740
Epoch [101/20000] Training: GEN | Loss D: 0.70040, Loss G: 0.78544 | Accuracy: 0.57692 | fpR: 0.19231 | P: 0.64286 | R: 0.34615 | F1: 0.44995 | Mean Wasserstein: 0.33667
Epoch [102/20000] Training: DISC | Loss D: 0.68378, Loss G: 0.77679 | Accuracy: 0.59615 | fpR: 0.19231 | P: 0.66667 | R: 0.38462 | F1: 0.48776 | Mean Wasserstein: 0.33885
Epoch [103/20000] Training: DISC | Loss D: 0.67083, Loss G: 0.81344 | Accuracy: 0.53846 | fpR: 0.15385 | P: 0.60000 | R: 0.23077 | F1: 0.33329 | Mean

Epoch [146/20000] Training: DISC | Loss D: 0.65002, Loss G: 0.76480 | Accuracy: 0.65385 | fpR: 0.23077 | P: 0.70000 | R: 0.53846 | F1: 0.60865 | Mean Wasserstein: 0.30108
Epoch [147/20000] Training: DISC | Loss D: 0.63343, Loss G: 0.82427 | Accuracy: 0.65385 | fpR: 0.23077 | P: 0.70000 | R: 0.53846 | F1: 0.60865 | Mean Wasserstein: 0.37065
Epoch [148/20000] Training: DISC | Loss D: 0.62322, Loss G: 0.86281 | Accuracy: 0.61538 | fpR: 0.11538 | P: 0.75000 | R: 0.34615 | F1: 0.47364 | Mean Wasserstein: 0.32154
Epoch [149/20000] Training: GEN | Loss D: 0.61979, Loss G: 0.79511 | Accuracy: 0.80769 | fpR: 0.19231 | P: 0.80769 | R: 0.80769 | F1: 0.80764 | Mean Wasserstein: 0.37676
Epoch [150/20000] Training: GEN | Loss D: 0.64276, Loss G: 0.73144 | Accuracy: 0.57692 | fpR: 0.46154 | P: 0.57143 | R: 0.61538 | F1: 0.59254 | Mean Wasserstein: 0.45257
Epoch [151/20000] Training: DISC | Loss D: 0.62606, Loss G: 0.75992 | Accuracy: 0.63462 | fpR: 0.23077 | P: 0.68421 | R: 0.50000 | F1: 0.57773 | Me

Epoch [194/20000] Training: DISC | Loss D: 0.58876, Loss G: 0.87844 | Accuracy: 0.63462 | fpR: 0.15385 | P: 0.73333 | R: 0.42308 | F1: 0.53654 | Mean Wasserstein: 0.43954
Epoch [195/20000] Training: DISC | Loss D: 0.60147, Loss G: 0.90389 | Accuracy: 0.78846 | fpR: 0.03846 | P: 0.94118 | R: 0.61538 | F1: 0.74414 | Mean Wasserstein: 0.39631
Epoch [196/20000] Training: DISC | Loss D: 0.61108, Loss G: 0.90869 | Accuracy: 0.57692 | fpR: 0.11538 | P: 0.70000 | R: 0.26923 | F1: 0.38885 | Mean Wasserstein: 0.34362
Epoch [197/20000] Training: DISC | Loss D: 0.57280, Loss G: 0.91262 | Accuracy: 0.65385 | fpR: 0.23077 | P: 0.70000 | R: 0.53846 | F1: 0.60865 | Mean Wasserstein: 0.37779
Epoch [198/20000] Training: GEN | Loss D: 0.60259, Loss G: 0.82631 | Accuracy: 0.69231 | fpR: 0.15385 | P: 0.77778 | R: 0.53846 | F1: 0.63632 | Mean Wasserstein: 0.37227
Epoch [199/20000] Training: GEN | Loss D: 0.65060, Loss G: 0.73455 | Accuracy: 0.53846 | fpR: 0.46154 | P: 0.53846 | R: 0.53846 | F1: 0.53841 | Me

# Saving the Model

In [None]:
# Change path and name of the Generator and Discriminator accordingly
torch.save(gen.state_dict(), "../saved_models/20k_5_2_trained_constant_gan")
torch.save(disc.state_dict(), "../saved_models/20k_5_2_trained_constant_disc")

# Visualizing Generation Quality

In [None]:
#Number of datum to visualize
sample_size = len(X)
reals = X[0:sample_size, :]
fakes = get_fake_samples(gen, sample_size, z_dim).detach()
density_curves(reals, fakes)

In [None]:
model_output = pd.read_csv('../model_outputs/Test Gan.csv')
model_output.head()

def plot_metrics(data, vanilla = True):
    if vanilla:
        sns.set(style = 'whitegrid', context = 'talk', palette = 'rainbow')
    
        plt.figure(figsize = (15, 15))
        subplot(2, 2, 1)
        sns.scatterplot(x = 'Epoch', y = 'FPR', data = data).set(xlim = (0, None))
        sns.despine()
        
        subplot(2, 2, 2)
        sns.scatterplot(x = 'Epoch', y = 'Recall', data = data).set(xlim = (0, None))
        sns.despine()
        
        subplot(2, 2, 3)
        sns.regplot(x = 'Epoch', y = 'Median Wasserstein', data = data, line_kws = {'color': 'orange'}).set(xlim = (0, None))
        sns.despine()
        
        subplot(2, 2, 4)
        sns.regplot(x = 'Epoch', y = 'Mean Wasserstein', data = data, line_kws = {'color': 'orange'}).set(xlim = (0, None))
        sns.despine()
        plt.show()
    else:
        sns.set(style = 'whitegrid', context = 'talk', palette = 'rainbow')
        plt.figure(figsize = (15, 8))
        
        subplot(1, 2, 1)
        sns.regplot(x = 'Epoch', y = 'Median Wasserstein', data = data, line_kws = {'color': 'orange'}).set(xlim = (0, None))
        sns.despine()
        
        subplot(1, 2, 2)
        sns.regplot(x = 'Epoch', y = 'Mean Wasserstein', data = data, line_kws = {'color': 'orange'}).set(xlim = (0, None))
        sns.despine()
        
        plt.show()

In [None]:
plot_metrics(model_output, True)

# Real/Fake Train Evaluation Metric

In [None]:
def generate_fake_real_dataset(gen, data, scaler):
    data = interpolation(data) #Interpolates entire dataframe
    not_sitting = data[data['label:SITTING'] == 0] #Only selects rows where sitting is 0
    real_features = not_sitting.iloc[:,1:27] #Selects only acceleration columns
    
    real_features = real_features.values #Converting to a numpy array
    real_features = scaler.transform(real_features) # These are all the scaled acceleration features for non-sitting real data
    real_features_size = real_features.shape[0] #Number of real samples
    y_label_notsitting = [0] * real_features_size #0 corresponds to the non-sitting class
    y_label_notsitting = np.asarray(y_label_notsitting).reshape(-1, 1) #Reshaping into a 2D column vector
    
    #Generating fake acceleration features for sitting data
    latent_vectors = get_noise(real_features_size, z_dim) #Generate the same number of fake sitting samples as real non-sitting samples
    fake_features = gen(latent_vectors).detach().numpy() #Generator already creates scaled features, so no scaling necessary
    y_label_sitting = [1] * real_features_size
    y_label_sitting = np.asarray(y_label_sitting).reshape(-1, 1)
    
    #Concatenating fake/real features and labels
    all_features = np.concatenate((real_features, fake_features), axis = 0) #Vertical concatenation
    all_labels = np.concatenate((y_label_notsitting, y_label_sitting), axis = 0).flatten() #Flatten 2D vector into 1D array for LogisticRegression
    
    #Splitting into train/test sets
    X_train, X_test, y_train, y_test = train_test_split(all_features, all_labels, test_size = 0.2, shuffle = True)
    return X_train, y_train, X_test, y_test

def generate_real_dataset(data, scaler):
    data = interpolation(data) #Interpolating the entire dataframe
    features = data.iloc[:,1:27] #Selecting only acceleration columns
    features = features.values #Converting to a numpy array
    features = scaler.transform(features) #Scaling all rows in the dataframe
    labels = data['label:SITTING'].values
    
    X_train, X_test, y_train, y_test = train_test_split(features, labels, test_size = 0.2, shuffle = True)
    return X_train, y_train, X_test, y_test

def train_test_real_fake(X_train, y_train, X_test, y_test):
    classifier = LogisticRegression(penalty = 'l2', C = 0.8)
    classifier.fit(X_train, y_train)
    y_pred = classifier.predict(X_test)
    f1 = classifier_performance(y_pred, y_test)
    return f1

def train_test_real(X_train, y_train, X_test, y_test): 
    classifier = LogisticRegression(penalty = 'l2', C = 0.8)
    classifier.fit(X_train, y_train)
    y_pred = classifier.predict(X_test)
    f1 = classifier_performance(y_pred, y_test)
    return f1

def classifier_performance(y_pred, y_test):
    tp = 0
    tn = 0
    fp = 0
    fn = 0
    
    for k in range(len(y_pred)):
        #True positive
        if y_test[k] == 1 and y_pred[k] == 1:
            tp += 1
        #False Negative
        elif y_test[k] == 1 and y_pred[k] == 0:
            fn += 1
        #True Negative
        elif y_test[k] == 0 and y_pred[k] == 0:
            tn += 1
        elif y_test[k] == 0 and y_pred[k] == 1:
            fp += 1
        else:
            print("Error")
            exit()
            
    acc = (tp + tn)/(tp + tn + fp + fn)

    if tp + fp == 0:
        precision = 0
    else:
        precision = tp / (tp + fp)

    if tp + fn == 0:
        recall = 0
    else:
        recall = tp / (tp + fn)
    
    f1 = 2*(precision * recall / (precision + recall + 0.001))
    
    print(f'Precision: {precision:.3f} Recall: {recall:.3f} F-1 Score: {f1:.3f}')
    
    return acc, f1