In [1]:
from IPython import display
import numpy as np
import pandas as pd
from sklearn import preprocessing
from sklearn.model_selection import train_test_split
from sklearn.neighbors import KernelDensity
import torch
from torch import nn, optim
from torch.autograd.variable import Variable
import torch.utils.data as data_utils
import matplotlib
import matplotlib.pyplot as plt

import time
from visdom import Visdom

from lib.VisdomWrapper import *
from lib.GANs import *
from lib.DataCreationWrapper import *

%load_ext autoreload
%autoreload 2

In [2]:
file_path = "input/creditcard.csv"
df = pd.read_csv(file_path)

x = df.iloc[:, :-1].values
y = df.iloc[:,-1:].values
x_train, x_test, y_train, y_test = train_test_split(x, y, train_size = 0.8, random_state = 21, shuffle = True, stratify = y)

# Store Num Features
n_features = x.shape[1]
n_noise_features = 25

# Center Mean and Unit Variance
x_train = preprocessing.scale(x_train, axis = 0)
x_test = preprocessing.scale(x_test, axis = 0)

# Mimic Real Data
x_train = x_train[np.ravel(y_train == 0)]
y_train = y_train[y_train == 0]

# To Tensor
x_train = torch.from_numpy(x_train).float()
x_test = torch.from_numpy(x_test).float()
y_train = torch.from_numpy(y_train).double()
y_test = torch.from_numpy(y_test).double()
# if torch.cuda.is_available():
#         x_train = x_train.cuda()
#         x_test = x_test.cuda()
#         y_train = y_train.cuda()
#         y_test = y_test.cuda()

# Creates Data Loader
ds_train = data_utils.TensorDataset(x_train, y_train)
data_loader = data_utils.DataLoader(ds_train, batch_size=100, shuffle=True)

In [3]:
# Variables

# Models
discr_nn = DiscriminatorNetwork(n_features)
gen_nn = GeneratorNetwork(n_noise_features, n_features)
if torch.cuda.is_available():
    discr_nn.cuda()
    gen_nn.cuda()

# Optimizers
discr_optimizer = optim.SGD(discr_nn.parameters(), lr=1e-3)
gen_optimizer = optim.Adam(gen_nn.parameters(), lr=1e-3)

# Loss
loss = nn.BCELoss()

# Visualizer
vis = VisdomController()

Setting up a new session...


In [None]:
# Test
num_epochs = 100
noise_function = gaussian_noise
num_scatter_points = 80

# One Off Graphs
if vis.IsConnected():
    vis.ClearPlots()
    vis.PlotRealFeatureDistributionComparison(5, 6, x_train, num_scatter_points)

for epoch in range(num_epochs):
    discr_epoch_loss = 0
    gen_epoch_loss = 0
    for n_batch, (batch, _) in enumerate(data_loader):
        real_batch = Variable(batch)
        if torch.cuda.is_available():
            real_batch = real_batch.cuda()
        
        t_start = millis = time.time()
        batch_size = real_batch.size(0)
        
        discr_loss = train_discriminator(discr_nn, discr_optimizer, loss, gen_nn, real_batch, noise_function)
        gen_loss = train_generator(gen_nn, gen_optimizer, loss, discr_nn, real_batch, noise_function)
        
        # Loss History
        discr_epoch_loss += discr_loss
        gen_epoch_loss += gen_loss
        
        if (n_batch % 100 == 0):
            display.clear_output(True)
            
            # Basic Data            
            print("Epoch {}, {} / {}".format(epoch, n_batch, len(data_loader)))
            print("discr_loss : ", discr_loss)
            print("gen_loss : ", gen_loss) 
                        
            # hellinger_dist = EvaluateHellingerDistance(gen_nn, GetDensityEstimation, real_batch, uniform_noise)
            # print("hellinger_dist : ", (hellinger_dist))
            
            t_end = millis = time.time()
            print("Time Elapsed : ", t_end - t_start)

    if vis.IsConnected():        
        vis.PlotLoss("Discr Loss", epoch, discr_epoch_loss.item() / len(data_loader))
        vis.PlotLoss("Gen Loss", epoch, gen_epoch_loss.item() / len(data_loader))   
        vis.PlotFakeFeatureDistributionComparison(5, 6, gen_nn, num_scatter_points, noise_function)

Epoch 89, 1900 / 2275
discr_loss :  tensor(1.3876, device='cuda:0', grad_fn=<AddBackward0>)
gen_loss :  tensor(0.6947, device='cuda:0', grad_fn=<BinaryCrossEntropyBackward>)
Time Elapsed :  0.007998943328857422
