In [1]:
from IPython import display
import numpy as np
import pandas as pd

from sklearn import preprocessing
from sklearn.preprocessing import MinMaxScaler
from sklearn.model_selection import train_test_split
from sklearn.neighbors import KernelDensity

import torch
from torch import nn, optim
from torch.autograd.variable import Variable
import torch.utils.data as data_utils

import matplotlib
import matplotlib.pyplot as plt

import time
from visdom import Visdom

from lib.VisdomWrapper import *
from lib.GANs import *
from lib.DataCreationWrapper import *

%load_ext autoreload
%autoreload 2

# Input

In [2]:
file_path = "input/creditcard.csv"
df = pd.read_csv(file_path)

x = df.iloc[:, 1:-1].values
y = df.iloc[:,-1:].values
x_train, x_test, y_train, y_test = train_test_split(x, y, train_size = 0.8, random_state = 21, shuffle = True, stratify = y)

# Store Num Features
n_features = x.shape[1]
n_noise_features = 10

# Center Mean and Unit Variance
scalar = MinMaxScaler(feature_range=(-1,1))

x_train = scalar.fit_transform(x_train)
x_test = scalar.fit_transform(x_test)

# x_train = preprocessing.scale(x_train, axis = 0)
# x_test = preprocessing.scale(x_test, axis = 0)

# Mimic Real Data
data_to_mimic = 0
x_train = x_train[np.ravel(y_train == data_to_mimic)]
y_train = y_train[y_train == data_to_mimic]

# To Tensor
x_train = torch.from_numpy(x_train).float()
x_test = torch.from_numpy(x_test).float()
y_train = torch.from_numpy(y_train).double()
y_test = torch.from_numpy(y_test).double()
# if torch.cuda.is_available():
#         x_train = x_train.cuda()
#         x_test = x_test.cuda()
#         y_train = y_train.cuda()
#         y_test = y_test.cuda()

# Creates Data Loader
ds_train = data_utils.TensorDataset(x_train, y_train)
data_loader = data_utils.DataLoader(ds_train, batch_size=100, shuffle=True)

# Loss

In [3]:
def train_discriminator_wass(discr_nn, discr_optimizer, loss, gen_nn, real_data, noise_function):
    # Makes Fake Data    
    batch_size = real_data.size(0)
    fake_data = synthesize_data(gen_nn, batch_size, noise_function)

    # Zero Grad
    discr_optimizer.zero_grad()
    
    # Prediction on Fake Data
    prediction_fake = discr_nn(fake_data)
    
    # Prediction on Real Data
    prediction_real = discr_nn(real_data)
    
    loss = - torch.mean(prediction_real) + torch.mean(prediction_fake)
    loss.backward()
    
    discr_optimizer.step()
    
    return loss

def train_generator_wass(gen_nn, gen_optimizer, loss, discr_nn, real_data, noise_function):
    # Makes Fake Data
    batch_size = real_data.size(0)
    fake_data = synthesize_data(gen_nn, batch_size, noise_function)

    # Zero Grad
    gen_optimizer.zero_grad()
        
    # Prediction on Fake Data
    prediction_fake = discr_nn(fake_data)
    
    loss = - torch.mean(prediction_fake)
    loss.backward()
    
    gen_optimizer.step()
    
    return loss

# Variables

In [4]:
# Variables

# Models
discr_nn = DiscriminatorNetwork(n_features)
gen_nn = GeneratorNetwork(n_noise_features, n_features)
if torch.cuda.is_available():
    discr_nn.cuda()
    gen_nn.cuda()

# Optimizers
discr_optimizer = optim.SGD(discr_nn.parameters(), lr=1e-2)
gen_optimizer = optim.Adam(gen_nn.parameters(), lr=1e-3)

# Loss
loss = nn.BCELoss()

# Visualizer
vis = VisdomController()

Setting up a new session...


# Test

In [5]:
# Test
num_epochs = 25
noise_function = gaussian_noise
num_scatter_points = 80

# One Off Graphs
vis.ClearPlots()
vis.PlotRealFeatureDistributionComparison(5, 6, x_train, num_scatter_points)

loss_axis = 0
for epoch in range(num_epochs):
    for n_batch, (batch, _) in enumerate(data_loader):
        real_batch = Variable(batch)
        if torch.cuda.is_available():
            real_batch = real_batch.cuda()
        
        t_start = millis = time.time()
        batch_size = real_batch.size(0)
        
        discr_loss = train_discriminator_wass(discr_nn, discr_optimizer, loss, gen_nn, real_batch, noise_function)
        gen_loss = train_generator_wass(gen_nn, gen_optimizer, loss, discr_nn, real_batch, noise_function)
        
        if (n_batch % 100 == 0):
            display.clear_output(True)
            
            # Basic Data            
            print("Epoch {}, {} / {}".format(epoch, n_batch, len(data_loader)))
            print("discr_loss : ", discr_loss)
            print("gen_loss : ", gen_loss) 
            
            vis.PlotLoss("Discr Loss", loss_axis, discr_loss.item())
            vis.PlotLoss("Gen Loss", loss_axis, gen_loss.item())
            loss_axis += 1
            
            t_end = millis = time.time()
            print("Time Elapsed : ", t_end - t_start)
          
    vis.PlotFakeFeatureDistributionComparison(5, 6, gen_nn, num_scatter_points, noise_function)

torch.save(gen_nn.state_dict(),"models\gen_nn" + str(data_to_mimic))

Epoch 24, 2200 / 2275
discr_loss :  tensor(-0.0235, device='cuda:0', grad_fn=<AddBackward0>)
gen_loss :  tensor(-0.2906, device='cuda:0', grad_fn=<NegBackward>)
Time Elapsed :  0.04900002479553223
