# Import Modules

In [1]:
import torch
import pandas as pd
import numpy as np
import torch.nn as nn
from sklearn.linear_model import LogisticRegression

# Import the Data

In [2]:
def interpolation(df):
    col_to_avg = list(df.columns) #Start with keeping all the columns as columns to use an average interpolation on
    for k in range(len(list(df.columns))):
        if list(df.columns)[k].startswith(('discrete', 'label')): #Remove label and discrete columns from col_to_avg
            col_to_avg.remove(list(df.columns)[k])
    
    df_with_avg = df[col_to_avg].fillna(df[col_to_avg].mean()) #Interpolate nan columns for all continuous-valued columns with average
    
    col_to_zero = list(df.columns)
    for k in range(len(list(df.columns))):
        if not list(df.columns)[k].startswith(('discrete', 'label')): #Remove all columns except label and discrete
            col_to_zero.remove(list(df.columns)[k])
    
    df_with_zero = df[col_to_zero].fillna(0) #Interpolate nan values for label and discrete columns with 0
    
    return pd.concat([df_with_avg, df_with_zero], axis = 1)


data = pd.read_csv('../aggregated_data/aggregated_data.csv')

X = data.iloc[:,1:27]
y = data[['label:SITTING']]

#Get training data
X_sit = X[y['label:SITTING'] == 1]
X_sit = interpolation(X_sit)
X_sit = X_sit.values

X_not_sit  = X[y['label:SITTING'] == 0]
X_not_sit = interpolation(X_not_sit)
X_not_sit = X_not_sit.values

# Prepare Data

In [3]:
train_sample = 10000

test_sample = 2000

#Sample from real sitting data
index_choice = np.random.choice(len(X_sit), int(train_sample/2), replace=False)
X_sit_sample = X_sit[index_choice]
y_sit_sample = np.ones(int(train_sample/2))

#Sample from real non sitting data
index_choice = np.random.choice(len(X_not_sit), int(train_sample/2), replace=False)
X_not_sit_sample = X_not_sit[index_choice]
y_not_sit_sample = np.zeros(int(train_sample/2))

# Get testing data
index_choice = np.random.choice(len(X_sit), test_sample, replace=False)
X_test = X_sit[index_choice]
y_test = np.ones(test_sample)

#Concatenate to make train set
X_train = np.concatenate((X_sit_sample, X_not_sit_sample), axis = 0)
y_train = np.concatenate((y_sit_sample, y_not_sit_sample), axis = 0)

#Shuffle in unison
shuffler = np.random.permutation(len(X_train))
X_train = X_train[shuffler]
y_train = y_train[shuffler]

In [4]:
def classifier_performance(y_pred, y_test):
    tp = 0
    tn = 0
    fp = 0
    fn = 0
    
    for k in range(len(y_pred)):
        #True positive
        if y_test[k] == 1 and y_pred[k] == 1:
            tp += 1
        #False Negative
        elif y_test[k] == 1 and y_pred[k] == 0:
            fn += 1
        #True Negative
        elif y_test[k] == 0 and y_pred[k] == 0:
            tn += 1
        elif y_test[k] == 0 and y_pred[k] == 1:
            fp += 1
        else:
            print("Error")
            exit()
            
    acc = (tp + tn)/(tp + tn + fp + fn)

    if tp + fp == 0:
        precision = 0
    else:
        precision = tp / (tp + fp)

    if tp + fn == 0:
        recall = 0
    else:
        recall = tp / (tp + fn)
    
    f1 = 2*(precision * recall / (precision + recall + 0.001))
    
    print(f'Precision: {precision:.3f} Recall: {recall:.3f} F-1 Score: {f1:.3f}')
    
    return acc, f1

def generator_block(input_dim, output_dim):
    return nn.Sequential(
        nn.Linear(input_dim, output_dim),
        nn.Dropout(0.1),
        nn.BatchNorm1d(output_dim),
        nn.ReLU(inplace = True)
    )

class Generator(nn.Module):
    def __init__(self, z_dim = 10, feature_dim = 26, hidden_dim = 128):
        super(Generator, self).__init__()
        self.gen = nn.Sequential(
            generator_block(z_dim, 80),
            generator_block(80, 60),
            generator_block(60, 40),
            generator_block(40, 28),
            nn.Linear(28, feature_dim)
        )
    def forward(self, noise):
        return self.gen(noise)

# Train on Real, Evaluate on Real

In [5]:
classifier = LogisticRegression(penalty = 'l2', C = 0.8)
classifier.fit(X_train, y_train)
y_pred = classifier.predict(X_test)
#print(y_pred, y_test)
accuracy, f1 = classifier_performance(y_pred, y_test)
print(f"\nTrain on Real, Evaluate on Real\nAccuracy: {accuracy:.5f} | F1: {f1:.5f}")  

Precision: 1.000 Recall: 1.000 F-1 Score: 1.000

Train on Real, Evaluate on Real
Accuracy: 1.00000 | F1: 0.99950


# Train on Real + Fake, Evaluate on Fake 

In [6]:
#Load in generator
z_dim = 100
Gen = Generator(z_dim)
Gen.load_state_dict(torch.load('../saved_models/gan'))

latent_vectors = torch.randn(int(train_sample/2), 100)
fake_features = Gen(latent_vectors).detach().numpy()
y_label_sitting = np.ones(int(train_sample/2))
                          
X_train = np.concatenate((fake_features, X_not_sit_sample), axis = 0)
y_train = np.concatenate((y_label_sitting, y_not_sit_sample), axis = 0)

#Shuffle in unison
shuffler = np.random.permutation(len(X_train))
X_train = X_train[shuffler]
y_train = y_train[shuffler]                       

                          
classifier = LogisticRegression(penalty = 'l2', C = 0.7)
classifier.fit(X_train, y_train)
y_pred = classifier.predict(X_test)
accuracy, f1 = classifier_performance(y_pred, y_test)

print(f"\nTrain on Real + Fake, Evaluate on Real\nAccuracy: {accuracy:.5f} | F1: {f1:.5f}")                         

Precision: 0.000 Recall: 0.000 F-1 Score: 0.000

Train on Real + Fake, Evaluate on Real
Accuracy: 0.00000 | F1: 0.00000
