In [1]:
# !mkdir /root/.kaggle
!ls /root/.kaggle -lsa

total 16
4 drwxr-xr-x 2 root root 4096 Jan 11 20:03 .
8 drwx------ 1 root root 4096 Jan 11 20:04 ..
4 -rw-r--r-- 1 root root   66 Jan 11 20:21 kaggle.json


In [2]:
# !mkdir /root/.kaggle
!cp kaggle.json /root/.kaggle/
!ls /root/.kaggle -lsa
!kaggle datasets download -d mlg-ulb/creditcardfraud

total 16
4 drwxr-xr-x 2 root root 4096 Jan 11 20:03 .
8 drwx------ 1 root root 4096 Jan 11 20:04 ..
4 -rw-r--r-- 1 root root   66 Jan 11 20:24 kaggle.json
creditcardfraud.zip: Skipping, found more recently modified local copy (use --force to force download)


In [0]:
import warnings

warnings.filterwarnings("ignore")

In [4]:

from keras.datasets import mnist
from keras.layers import Input, Dense, Reshape, Flatten, Dropout, multiply, GaussianNoise
from keras.layers import BatchNormalization, Activation, Embedding, ZeroPadding2D
from keras.layers import MaxPooling2D, concatenate
from keras.layers.advanced_activations import LeakyReLU
from keras.layers.convolutional import UpSampling2D, Conv2D
from keras.models import Sequential, Model
from keras.optimizers import Adam
from keras import losses
from keras.utils import to_categorical
import keras.backend as K

import matplotlib.pyplot as plt

import numpy as np

class AlphaGAN():
    def __init__(self):
        self.img_shape = (29,)
        self.latent_dim = 10

        optimizer = Adam(0.0002, 0.5)

        # Build and compile the discriminator
        self.discriminator = self.build_discriminator()
        self.discriminator.compile(loss=['binary_crossentropy'],
            optimizer=optimizer,
            metrics=['accuracy'])

        # Build the generator
        self.generator = self.build_generator()

        # Build the encoder
        self.encoder = self.build_encoder()

        # The part of the bigan that trains the discriminator and encoder
        self.discriminator.trainable = False

        # Generate image from sampled noise
        z = Input(shape=(self.latent_dim, ))
        img_ = self.generator(z)

        # Encode image
        img = Input(shape=self.img_shape)
        z_ = self.encoder(img)
        reconstructed_img = self.generator(z_)

        # Latent -> img is fake, and img -> latent is valid
        fake = self.discriminator([z, img_])
        valid = self.discriminator([z_, img])

        # Set up and compile the combined model
        # Trains generator to fool the discriminator
        self.alphagan_generator = Model([z, img], [fake, valid,reconstructed_img])
        self.alphagan_generator.compile(loss=['binary_crossentropy', 'binary_crossentropy','binary_crossentropy'],
            optimizer=optimizer)

    def train(self,X_train, epochs, batch_size=128, sample_interval=50):

        # # Rescale -1 to 1
        # X_train = (X_train.astype(np.float32) - 127.5) / 127.5
        # X_train = np.expand_dims(X_train, axis=3)

        # Adversarial ground truths
        valid = np.ones((batch_size, 1))
        fake = np.zeros((batch_size, 1))

        for epoch in range(epochs):


            # ---------------------
            #  Train Discriminator
            # ---------------------

            # Sample noise and generate img
            z = np.random.normal(size=(batch_size, self.latent_dim))
            imgs_ = self.generator.predict(z)

            # Select a random batch of images and encode
            idx = np.random.randint(0, X_train.shape[0], batch_size)
            imgs = X_train[idx]
            z_ = self.encoder.predict(imgs)
            reconstructed_imgs = self.generator.predict(z_)

            # Train the discriminator (img -> z is valid, z -> img is fake)
            d_loss_real = self.discriminator.train_on_batch([z_, imgs], valid)
            d_loss_fake = self.discriminator.train_on_batch([z, imgs_], fake)
            d_loss = 0.5 * np.add(d_loss_real, d_loss_fake)

            # ---------------------
            #  Train Generator
            # ---------------------

            # Train the generator (z -> img is valid and img -> z is is invalid)
            g_loss = self.alphagan_generator.train_on_batch([z, imgs], [valid, fake,imgs])

            # Plot the progress
            print ("%d [D loss: %f, acc: %.2f%%] [G loss: %f]" % (epoch, d_loss[0], 100*d_loss[1], g_loss[0]))

            # If at save interval => save generated image samples
            if epoch % sample_interval == 0:
                self.sample_interval(epoch)

    def sample_interval(self, epoch):
        pass


    def build_encoder(self):
        model = Sequential()

        # model.add(Flatten(input_shape=self.img_shape))
        model.add(Dense(64,input_shape=self.img_shape))
        model.add(LeakyReLU(alpha=0.2))
        model.add(BatchNormalization(momentum=0.8))
        model.add(Dense(64))
        model.add(LeakyReLU(alpha=0.2))
        model.add(BatchNormalization(momentum=0.8))
        model.add(Dense(self.latent_dim))
        model.name="encoder"      
        model.summary()

        img = Input(shape=self.img_shape)
        z = model(img)

        return Model(img, z)

    def build_generator(self):
        model = Sequential()

        model.add(Dense(64, input_dim=self.latent_dim))
        model.add(LeakyReLU(alpha=0.2))
        model.add(BatchNormalization(momentum=0.8))
        model.add(Dense(64))
        model.add(LeakyReLU(alpha=0.2))
        model.add(BatchNormalization(momentum=0.8))
        model.add(Dense(np.prod(self.img_shape), activation='tanh'))
        model.add(Reshape(self.img_shape))
        model.name="generator"  

        model.summary()
        z = Input(shape=(self.latent_dim,))
        gen_img = model(z)


        return Model(z, gen_img)

    def build_discriminator(self):

        z = Input(shape=(self.latent_dim, ))
        img = Input(shape=self.img_shape)
        d_in = concatenate([z, img])

        model = Dense(128)(d_in)
        model = LeakyReLU(alpha=0.2)(model)
        model = Dropout(0.5)(model)
        model = Dense(128)(model)
        model = LeakyReLU(alpha=0.2)(model)
        model = Dropout(0.5)(model)
        model = Dense(128)(model)
        model = LeakyReLU(alpha=0.2)(model)
        model = Dropout(0.5)(model)
        validity = Dense(1, activation="sigmoid")(model)
        model = Model([z, img], validity)
        model.name="discriminator"
        model.summary()
        return model

    def get_losses(self,imgs):
        batch_size = imgs.shape[0]
        valid = np.ones((batch_size, 1))
        fake = np.zeros((batch_size, 1))

        z = np.random.normal(size=(batch_size, self.latent_dim))
        imgs_ = self.generator.predict(z)

        # Select a random batch of images and encode
        # idx = np.random.randint(0, X.shape[0], batch_size)
        # imgs = X_train[idx]
        z_ = self.encoder.predict(imgs)
        reconstructed_imgs = self.generator.predict(z_)

        # Train the discriminator (img -> z is valid, z -> img is fake)
        d_loss_real = self.discriminator.evaluate([z_, imgs], valid,verbose=0)
        d_loss_fake = self.discriminator.evaluate([z, imgs_], fake,verbose=0)
        d_loss = 0.5 * np.add(d_loss_real, d_loss_fake)

        # Train the generator (z -> img is valid and img -> z is is invalid)
        g_loss = self.alphagan_generator.evaluate([z, imgs], [valid, fake,reconstructed_imgs],verbose=0)

        # Plot the progress
        # print ("[Discrimination loss: %f, acc: %.2f%%] [G loss: %f]" % (d_loss[0], 100*d_loss[1], g_loss[0]))
        # print(self.alphagan_generator.metrics_names)
        return d_loss[0] + g_loss[0]

model = AlphaGAN()

Using TensorFlow backend.






Instructions for updating:
Please use `rate` instead of `keep_prob`. Rate should be set to `rate = 1 - keep_prob`.
Model: "discriminator"
__________________________________________________________________________________________________
Layer (type)                    Output Shape         Param #     Connected to                     
input_1 (InputLayer)            (None, 10)           0                                            
__________________________________________________________________________________________________
input_2 (InputLayer)            (None, 29)           0                                            
__________________________________________________________________________________________________
concatenate_1 (Concatenate)     (None, 39)           0           input_1[0][0]                    
                                                                 input_2[0][0]                    
__________________________________________________________________

In [5]:
import pandas as pd
import matplotlib.pyplot as plt
import numpy as np
data = pd.read_csv("creditcardfraud.zip",compression='zip')
data.head()

Unnamed: 0,Time,V1,V2,V3,V4,V5,V6,V7,V8,V9,V10,V11,V12,V13,V14,V15,V16,V17,V18,V19,V20,V21,V22,V23,V24,V25,V26,V27,V28,Amount,Class
0,0.0,-1.359807,-0.072781,2.536347,1.378155,-0.338321,0.462388,0.239599,0.098698,0.363787,0.090794,-0.5516,-0.617801,-0.99139,-0.311169,1.468177,-0.470401,0.207971,0.025791,0.403993,0.251412,-0.018307,0.277838,-0.110474,0.066928,0.128539,-0.189115,0.133558,-0.021053,149.62,0
1,0.0,1.191857,0.266151,0.16648,0.448154,0.060018,-0.082361,-0.078803,0.085102,-0.255425,-0.166974,1.612727,1.065235,0.489095,-0.143772,0.635558,0.463917,-0.114805,-0.183361,-0.145783,-0.069083,-0.225775,-0.638672,0.101288,-0.339846,0.16717,0.125895,-0.008983,0.014724,2.69,0
2,1.0,-1.358354,-1.340163,1.773209,0.37978,-0.503198,1.800499,0.791461,0.247676,-1.514654,0.207643,0.624501,0.066084,0.717293,-0.165946,2.345865,-2.890083,1.109969,-0.121359,-2.261857,0.52498,0.247998,0.771679,0.909412,-0.689281,-0.327642,-0.139097,-0.055353,-0.059752,378.66,0
3,1.0,-0.966272,-0.185226,1.792993,-0.863291,-0.010309,1.247203,0.237609,0.377436,-1.387024,-0.054952,-0.226487,0.178228,0.507757,-0.287924,-0.631418,-1.059647,-0.684093,1.965775,-1.232622,-0.208038,-0.1083,0.005274,-0.190321,-1.175575,0.647376,-0.221929,0.062723,0.061458,123.5,0
4,2.0,-1.158233,0.877737,1.548718,0.403034,-0.407193,0.095921,0.592941,-0.270533,0.817739,0.753074,-0.822843,0.538196,1.345852,-1.11967,0.175121,-0.451449,-0.237033,-0.038195,0.803487,0.408542,-0.009431,0.798278,-0.137458,0.141267,-0.20601,0.502292,0.219422,0.215153,69.99,0


In [6]:
from sklearn.preprocessing import StandardScaler

data['normAmount'] = StandardScaler().fit_transform(data['Amount'].values.reshape(-1, 1))
data = data.drop(['Time','Amount'],axis=1)
data.head()

Unnamed: 0,V1,V2,V3,V4,V5,V6,V7,V8,V9,V10,V11,V12,V13,V14,V15,V16,V17,V18,V19,V20,V21,V22,V23,V24,V25,V26,V27,V28,Class,normAmount
0,-1.359807,-0.072781,2.536347,1.378155,-0.338321,0.462388,0.239599,0.098698,0.363787,0.090794,-0.5516,-0.617801,-0.99139,-0.311169,1.468177,-0.470401,0.207971,0.025791,0.403993,0.251412,-0.018307,0.277838,-0.110474,0.066928,0.128539,-0.189115,0.133558,-0.021053,0,0.244964
1,1.191857,0.266151,0.16648,0.448154,0.060018,-0.082361,-0.078803,0.085102,-0.255425,-0.166974,1.612727,1.065235,0.489095,-0.143772,0.635558,0.463917,-0.114805,-0.183361,-0.145783,-0.069083,-0.225775,-0.638672,0.101288,-0.339846,0.16717,0.125895,-0.008983,0.014724,0,-0.342475
2,-1.358354,-1.340163,1.773209,0.37978,-0.503198,1.800499,0.791461,0.247676,-1.514654,0.207643,0.624501,0.066084,0.717293,-0.165946,2.345865,-2.890083,1.109969,-0.121359,-2.261857,0.52498,0.247998,0.771679,0.909412,-0.689281,-0.327642,-0.139097,-0.055353,-0.059752,0,1.160686
3,-0.966272,-0.185226,1.792993,-0.863291,-0.010309,1.247203,0.237609,0.377436,-1.387024,-0.054952,-0.226487,0.178228,0.507757,-0.287924,-0.631418,-1.059647,-0.684093,1.965775,-1.232622,-0.208038,-0.1083,0.005274,-0.190321,-1.175575,0.647376,-0.221929,0.062723,0.061458,0,0.140534
4,-1.158233,0.877737,1.548718,0.403034,-0.407193,0.095921,0.592941,-0.270533,0.817739,0.753074,-0.822843,0.538196,1.345852,-1.11967,0.175121,-0.451449,-0.237033,-0.038195,0.803487,0.408542,-0.009431,0.798278,-0.137458,0.141267,-0.20601,0.502292,0.219422,0.215153,0,-0.073403


In [7]:
X = data.ix[:, data.columns != 'Class'].values
y = data.ix[:, data.columns == 'Class'].values.flatten()
X.shape

(284807, 29)

In [8]:
np.unique(y,return_counts=True)

(array([0, 1]), array([284315,    492]))

In [0]:
from sklearn.model_selection import train_test_split
x_in = X[y==0]

X_train, X_test = train_test_split(x_in, test_size=0.01, random_state=42)
# X_train, X_test = 

In [0]:
model.train(X_train=X_train,epochs=40000, batch_size=32, sample_interval=400)









0 [D loss: 0.712193, acc: 46.88%] [G loss: 2.528378]
1 [D loss: 0.774539, acc: 43.75%] [G loss: 2.668298]
2 [D loss: 0.772798, acc: 54.69%] [G loss: 2.402203]
3 [D loss: 0.680436, acc: 56.25%] [G loss: 2.714211]
4 [D loss: 0.591769, acc: 64.06%] [G loss: 1.711408]
5 [D loss: 0.806425, acc: 50.00%] [G loss: 2.331377]
6 [D loss: 0.786680, acc: 54.69%] [G loss: 2.246284]
7 [D loss: 0.951643, acc: 37.50%] [G loss: 2.593740]
8 [D loss: 0.772852, acc: 46.88%] [G loss: 2.288142]
9 [D loss: 0.780662, acc: 48.44%] [G loss: 2.392354]
10 [D loss: 0.703932, acc: 57.81%] [G loss: 1.702257]
11 [D loss: 0.761179, acc: 56.25%] [G loss: 2.219527]
12 [D loss: 0.707027, acc: 45.31%] [G loss: 2.500971]
13 [D loss: 0.777321, acc: 43.75%] [G loss: 2.297823]
14 [D loss: 0.667402, acc: 54.69%] [G loss: 2.299029]
15 [D loss: 0.841051, acc: 42.19%] [G loss: 2.021248]
16 [D loss: 0.804405, acc: 48.44%] [G loss: 2.399034]
17 [D loss: 0.699533, acc: 60.94%] [G loss: 2.133566]
18 [D loss: 0.686913, acc: 54.

In [0]:
model.get_losses(X[:1])

In [0]:
y

In [0]:
x_in = X_test[:492]
x_out = X[y==1][:492]
test_X = np.concatenate((x_in,x_out),axis=0)
test_y = np.concatenate((np.zeros(len(x_in)),np.ones(len(x_out))))
test_X.shape,test_y.shape

In [0]:
inliers = []
outliers = []
for x,label in zip(test_X,test_y):
    # print(x)
    sum_of_losses = np.abs(model.get_losses(x.reshape((1,-1))))
    if label == 0:
        inliers.append(sum_of_losses)
    else:
        outliers.append(sum_of_losses)

inliers = np.array(inliers)
outliers = np.array(outliers)

In [0]:
import matplotlib.pyplot as plt
import seaborn as sns

def plot_density(inl, outl, train=False):
    sns.distplot(inl, hist=True, kde=True,
                 hist_kws={'edgecolor': 'black'},
                 kde_kws={'linewidth': 4}, label='inliers')

    sns.distplot(outl, hist=True, kde=True,
                 hist_kws={'edgecolor': 'black'},
                 kde_kws={'linewidth': 4}, label='outliers')
    if train:
        plt.title('Train dist of L2 norms of inliers and outliers')
    else:
        plt.title('Test dist of L2 norms of inliers and outliers')
    plt.xlabel('Sum of Loss')
    plt.ylabel('Sample Count')
    plt.legend()
    # experiment.log_figure(figure=plt)
    plt.show()
plot_density(inliers, outliers, train=False)