In [1]:
import pandas as pd
from gan_tf2 import GAN
from sklearn.preprocessing import MinMaxScaler
from category_encoders import BinaryEncoder

In [2]:
%load_ext autoreload

%autoreload 2

In [69]:
test_gan = GAN(num_epochs=400,
               batch_size=100,
               d_hidden_dims=[80, 60, 40],
               g_hidden_dims=[80, 60, 40, 20],
               noise_inputs=15,
               g_outputs=10,
               g_learning_rate=4e-4,
               d_learning_rate=2e-5,
               d_noise = True)

In [4]:
fraud_data = pd.read_csv('data/PS_20174392719_1491204439457_log.csv')

In [5]:
categorical_dict = {'type': {'CASH_OUT' : 0, 'TRANSFER' : 1, 'DEBIT' : 2, 'CASH_IN' : 3, 'PAYMENT' : 4}}
fraud_data.replace(categorical_dict, inplace=True)

benc = BinaryEncoder(cols=['type'])
fraud_data = benc.fit_transform(fraud_data)

fraud_data.drop(['step', 'nameOrig', 'nameDest', 'isFlaggedFraud'], axis=1, inplace=True)

scaler = MinMaxScaler(feature_range=(-1, 1))
fraud_data = pd.DataFrame(scaler.fit_transform(fraud_data), columns=fraud_data.columns)

In [6]:
train_data = fraud_data.iloc[:10000,:].values
train_data.shape

(10000, 10)

In [7]:
train_data[:5]

array([[-1.        , -1.        , -1.        ,  1.        , -0.99978713,
        -0.9942893 , -0.99353449, -1.        , -1.        , -1.        ],
       [-1.        , -1.        , -1.        ,  1.        , -0.99995967,
        -0.99928677, -0.99921812, -1.        , -1.        , -1.        ],
       [-1.        , -1.        ,  1.        , -1.        , -0.99999608,
        -0.99999392, -1.        , -1.        , -1.        ,  1.        ],
       [-1.        , -1.        ,  1.        ,  1.        , -0.99999608,
        -0.99999392, -1.        , -0.99988101, -1.        ,  1.        ],
       [-1.        , -1.        , -1.        ,  1.        , -0.99974757,
        -0.99860522, -0.99879456, -1.        , -1.        , -1.        ]])

In [70]:
test_gan.fit(train_data)

Time for epoch 5 is 41.8 seconds.
Total time passed: 41.8 seconds
Generator loss: 0.7598171830177307
Discriminator loss: 1.3215969800949097
Time for epoch 10 is 42.57 seconds.
Total time passed: 84.37 seconds
Generator loss: 0.7434685230255127
Discriminator loss: 1.2631886005401611
Time for epoch 15 is 39.86 seconds.
Total time passed: 124.23 seconds
Generator loss: 0.7665175795555115
Discriminator loss: 1.2383842468261719
Time for epoch 20 is 40.59 seconds.
Total time passed: 164.82 seconds
Generator loss: 0.7946287989616394
Discriminator loss: 1.2135515213012695
Time for epoch 25 is 37.59 seconds.
Total time passed: 202.41 seconds
Generator loss: 0.7421589493751526
Discriminator loss: 1.2735220193862915
Time for epoch 30 is 39.12 seconds.
Total time passed: 241.53 seconds
Generator loss: 0.7574471235275269
Discriminator loss: 1.2843245267868042
Time for epoch 35 is 39.24 seconds.
Total time passed: 280.77 seconds
Generator loss: 0.7418281435966492
Discriminator loss: 1.29584145545959

<gan_tf2.GAN at 0x143c0f2e0>

In [17]:
test_result = test_gan.sample(8)

In [18]:
test_result

array([[ 0.910182  , -0.98520887, -0.9752883 ,  0.99457186,  0.00423375,
         0.7743641 , -0.6639361 ,  0.9473352 ,  0.04272564, -0.96237135],
       [ 0.9082209 ,  0.9995514 , -0.9886971 ,  0.9568101 , -0.00832636,
         0.84341276, -0.5647451 ,  0.9727669 ,  0.05889408, -0.97560275],
       [ 0.90948814, -0.9882861 , -0.9755381 ,  0.99522156,  0.00328785,
         0.7688832 , -0.6703456 ,  0.9479514 ,  0.04208642, -0.96014655],
       [ 0.9148122 ,  0.9999995 , -0.9871273 ,  0.9950611 ,  0.03107022,
         0.9454979 , -0.38489586,  0.97069377,  0.03623227, -0.96736115],
       [ 0.9123133 , -0.96915317,  1.        , -1.        ,  0.02792535,
         0.7814938 , -0.6088212 ,  0.94834495,  0.03779935, -0.9769307 ],
       [ 0.90706575, -0.98280764, -0.9760368 ,  0.9953471 ,  0.00317393,
         0.7703995 , -0.6630028 ,  0.9504857 ,  0.04238599, -0.95953345],
       [ 0.909603  , -0.9835307 , -0.973935  ,  0.99160475,  0.00588902,
         0.76772463, -0.6644235 ,  0.9502627 

In [11]:
test_gan2 = GAN(num_epochs=4,
               batch_size=100,
               d_hidden_dims=[100, 80, 60, 40],
               g_hidden_dims=[100, 80, 60, 40],
               n_inputs=15,
               g_outputs=10)

In [12]:
test_gan2.fit(train_data)

Time for epoch 1 is 5.01425313949585 sec
Generator loss: 1.6127874851226807
Discriminator loss: 0.6785776615142822
Time for epoch 2 is 5.09872579574585 sec
Generator loss: 3.310512065887451
Discriminator loss: 0.1629384160041809
Time for epoch 3 is 4.925832271575928 sec
Generator loss: 4.329888820648193
Discriminator loss: 0.04788346216082573
Time for epoch 4 is 4.925717115402222 sec
Generator loss: 4.480656623840332
Discriminator loss: 0.05581652745604515


<gan_tf2.GAN at 0x180e71370>