# CNN Model 1

## The approach:

### Without time feature
### Pass 1x29 vectors into a convolutional layer, with kernel size 29, with some D number of filters
### Add extra conv and dense layer to the model to see the effect

## First run: single conv layer single dense layer:

In [2]:
from numpy.random import seed
import pandas as pd
import numpy as np
from sklearn.preprocessing import minmax_scale
# from keras_diagram import ascii
from keras.layers.convolutional import Conv2D, MaxPooling2D, Conv1D, MaxPooling1D
from keras.optimizers import SGD
from keras.models import Sequential
from keras.layers import Dense, Flatten
from sklearn.model_selection import train_test_split
from sklearn.metrics import precision_recall_fscore_support
import keras

data = pd.read_csv("creditcard.csv")

# Normalise and reshape the Amount column, so it's values lie between -1 and 1
from sklearn.preprocessing import StandardScaler
data['norm_Amount'] = StandardScaler().fit_transform(data['Amount'].reshape(-1,1))

# Drop the old Amount column and also the Time column as we don't want to include this at this stage
data = data.drop(['Time', 'Amount'], axis=1)

# Assign variables x and y corresponding to row data and it's class value
X = data.ix[:, data.columns != 'Class']
y = data.ix[:, data.columns == 'Class']

def generate_train_test_sample(x_data, y_data): 
    ''' 1) Generate new, random train-test split
        2) Random smote oversample the train data, keeping test data unseen
        3) Use this new train-test split to fit and test model
    '''

    X_train, X_test, y_train, y_test = train_test_split(x_data,y_data,test_size = 0.3)

    from collections import Counter
    from imblearn.over_sampling import SMOTE
    sm = SMOTE()
    X_res, y_res = sm.fit_sample(X_train, y_train)
    print('Resampling the data with SMOTE. . .')
    print('Resampled training dataset shape {}'.format(Counter(y_res)))

    return X_res, y_res, X_test, y_test

########################################################################

X_res, y_res, X_test, y_test = generate_train_test_sample(X, y)

print X_res.shape, type(X_res)
print y_res.shape

X_train = X_res.reshape(X_res.shape[0], 29, 1)
Y_train = y_res.reshape(y_res.shape[0], 1)
X_test = X_test.values.reshape(X_test.values.shape[0], 29, 1)
Y_test = y_test.values.reshape(y_test.values.shape[0], 1)

Y_test = keras.utils.to_categorical(Y_test)
Y_train = keras.utils.to_categorical(Y_train)
print Y_test.shape
print Y_train.shape


seed(2017)
conv = Sequential()
conv.add(Conv1D(256, 29, input_shape=(29, 1), activation='relu'))

conv.add(Flatten())

conv.add(Dense(300, activation = 'relu'))
conv.add(Dense(2, activation = 'softmax'))

sgd = SGD(lr = 0.1, momentum = 0.9, decay = 0, nesterov = False)
conv.compile(loss = 'categorical_crossentropy', optimizer = sgd, metrics = ['accuracy'])
conv.fit(X_train, Y_train, batch_size = 500, epochs = 50, verbose = 1)
score = conv.evaluate(X_test, Y_test, batch_size=500)

y_pred = conv.predict(X_test)


Using TensorFlow backend.
.ix is deprecated. Please use
.loc for label based indexing or
.iloc for positional indexing

See the documentation here:
http://pandas.pydata.org/pandas-docs/stable/indexing.html#ix-indexer-is-deprecated
  y = column_or_1d(y, warn=True)


Resampling the data with SMOTE. . .
Resampled training dataset shape Counter({0: 199012, 1: 199012})
(398024, 29) <type 'numpy.ndarray'>
(398024,)
(85443, 2)
(398024, 2)
Epoch 1/50
Epoch 2/50
Epoch 3/50
Epoch 4/50
Epoch 5/50
Epoch 6/50
Epoch 7/50
Epoch 8/50
Epoch 9/50
Epoch 10/50
Epoch 11/50
Epoch 12/50
Epoch 13/50
Epoch 14/50
Epoch 15/50
Epoch 16/50
Epoch 17/50
Epoch 18/50
Epoch 19/50
Epoch 20/50
Epoch 21/50
Epoch 22/50
Epoch 23/50
Epoch 24/50
Epoch 25/50
Epoch 26/50
Epoch 27/50
Epoch 28/50
Epoch 29/50
Epoch 30/50
Epoch 31/50
Epoch 32/50
Epoch 33/50
Epoch 34/50
Epoch 35/50
Epoch 36/50
Epoch 37/50
Epoch 38/50
Epoch 39/50
Epoch 40/50
Epoch 41/50
Epoch 42/50
Epoch 43/50
Epoch 44/50
Epoch 45/50
Epoch 46/50
Epoch 47/50
Epoch 48/50
Epoch 49/50
Epoch 50/50


In [3]:
y_pred.shape

(85443, 2)

In [12]:
cutt_off_tr = 0.5
y_pred[np.where(y_pred>=cutt_off_tr)] = 1
y_pred[np.where(y_pred<cutt_off_tr)]  = 0

prfs0 = precision_recall_fscore_support(Y_test, y_pred, labels=[0])

print 'Classification Report: \n'
print '[Precision, Recall, F1, Support]'
print '='*100
print '0:      {}         {}      {}      {}   '.format(prfs0[0][0], prfs0[1][0], prfs0[2][0], prfs0[3][0])
print '1:      {}         {}      {}      {}   '.format(prfs0[0][1], prfs0[1][1], prfs0[2][1], prfs0[3][1])
print '='*100

print 'F1 Score, Fraud Class = {}'.format(prfs0[2][1])

Classification Report: 

[Precision, Recall, F1, Support]
0:      0.999742023241         0.999460745812      0.99960136474      85303   
1:      0.719512195122         0.842857142857      0.776315789474      140   
F1 Score, Fraud Class = 0.776315789474


In [18]:
print conv.summary()

_________________________________________________________________
Layer (type)                 Output Shape              Param #   
conv1d_6 (Conv1D)            (None, 1, 256)            7680      
_________________________________________________________________
conv1d_7 (Conv1D)            (None, 1, 256)            65792     
_________________________________________________________________
flatten_2 (Flatten)          (None, 256)               0         
_________________________________________________________________
dense_3 (Dense)              (None, 300)               77100     
_________________________________________________________________
dense_4 (Dense)              (None, 100)               30100     
_________________________________________________________________
dense_5 (Dense)              (None, 2)                 202       
Total params: 180,874
Trainable params: 180,874
Non-trainable params: 0
_________________________________________________________________
None

### Evaluation of first run

We can see that this very simple model, with just a single convolution layer piped into a simple dense network, already gives comparable F1 to our top two baseline classifiers:

CNNv1.1:                 0.776316

RandomForestClassifier:  0.846437   
MLPClassifier:           0.750672 

## Second run, with added conv and dense layer 

In [17]:
########################################################################
# CNNv1.2

X_res, y_res, X_test, y_test = generate_train_test_sample(X, y)

print X_res.shape, type(X_res)
print y_res.shape

X_train = X_res.reshape(X_res.shape[0], 29, 1)
Y_train = y_res.reshape(y_res.shape[0], 1)
X_test = X_test.values.reshape(X_test.values.shape[0], 29, 1)
Y_test = y_test.values.reshape(y_test.values.shape[0], 1)

Y_test = keras.utils.to_categorical(Y_test)
Y_train = keras.utils.to_categorical(Y_train)
print Y_test.shape
print Y_train.shape


seed(2017)
conv = Sequential()
conv.add(Conv1D(256, 29, input_shape=(29, 1), activation='relu'))
conv.add(Conv1D(256, 1, activation='relu'))
conv.add(Flatten())

conv.add(Dense(300, activation = 'relu'))
conv.add(Dense(100, activation = 'relu'))
conv.add(Dense(2, activation = 'softmax'))

sgd = SGD(lr = 0.1, momentum = 0.9, decay = 0, nesterov = False)
conv.compile(loss = 'categorical_crossentropy', optimizer = sgd, metrics = ['accuracy'])
conv.fit(X_train, Y_train, batch_size = 500, epochs = 50, verbose = 1)
score = conv.evaluate(X_test, Y_test, batch_size=500)

y_pred = conv.predict(X_test)

Resampling the data with SMOTE. . .
Resampled training dataset shape Counter({0: 199022, 1: 199022})
(398044, 29) <type 'numpy.ndarray'>
(398044,)
(85443, 2)
(398044, 2)
Epoch 1/50
Epoch 2/50
Epoch 3/50
Epoch 4/50
Epoch 5/50
Epoch 6/50
Epoch 7/50
Epoch 8/50
Epoch 9/50
Epoch 10/50
Epoch 11/50
Epoch 12/50
Epoch 13/50
Epoch 14/50
Epoch 15/50
Epoch 16/50
Epoch 17/50
Epoch 18/50
Epoch 19/50
Epoch 20/50
Epoch 21/50
Epoch 22/50
Epoch 23/50
Epoch 24/50
Epoch 25/50
Epoch 26/50
Epoch 27/50
Epoch 28/50
Epoch 29/50
Epoch 30/50
Epoch 31/50
Epoch 32/50
Epoch 33/50
Epoch 34/50
Epoch 35/50
Epoch 36/50
Epoch 37/50
Epoch 38/50
Epoch 39/50
Epoch 40/50
Epoch 41/50
Epoch 42/50
Epoch 43/50
Epoch 44/50
Epoch 45/50
Epoch 46/50
Epoch 47/50
Epoch 48/50
Epoch 49/50
Epoch 50/50


In [19]:
y_pred[np.where(y_pred>=cutt_off_tr)] = 1
y_pred[np.where(y_pred<cutt_off_tr)]  = 0

prfs0 = precision_recall_fscore_support(Y_test, y_pred, labels=[0])

print 'Classification Report: \n'
print '[Precision, Recall, F1, Support]'
print '='*100
print '0:      {}         {}      {}      {}   '.format(prfs0[0][0], prfs0[1][0], prfs0[2][0], prfs0[3][0])
print '1:      {}         {}      {}      {}   '.format(prfs0[0][1], prfs0[1][1], prfs0[2][1], prfs0[3][1])
print '='*100

print 'F1 Score, Fraud Class = {}'.format(prfs0[2][1])

Classification Report: 

[Precision, Recall, F1, Support]
0:      0.999718584009         0.999601374087      0.999659975612      85293   
1:      0.7875         0.84      0.812903225806      150   
F1 Score, Fraud Class = 0.812903225806


### Evaluation of second run

This seems very promising, already matching our best baseline classifier, with our first CNN approach.

#### However, we should at least average runs to get confidence in results

## CNNv1 averaged

In [44]:
def average_run():
    reports = []
    for i in range(3):
        X_res, y_res, X_test, y_test = generate_train_test_sample(X, y)

        print X_res.shape, type(X_res)
        print y_res.shape

        X_train = X_res.reshape(X_res.shape[0], 29, 1)
        Y_train = y_res.reshape(y_res.shape[0], 1)
        X_test = X_test.values.reshape(X_test.values.shape[0], 29, 1)
        Y_test = y_test.values.reshape(y_test.values.shape[0], 1)

        Y_test = keras.utils.to_categorical(Y_test)
        Y_train = keras.utils.to_categorical(Y_train)
        print Y_test.shape
        print Y_train.shape


        seed(2017)
        conv = Sequential()
        conv.add(Conv1D(256, 29, input_shape=(29, 1), activation='relu'))
        conv.add(Flatten())
        conv.add(Dense(300, activation = 'relu'))
        conv.add(Dense(2, activation = 'softmax'))

        sgd = SGD(lr = 0.1, momentum = 0.9, decay = 0, nesterov = False)
        conv.compile(loss = 'categorical_crossentropy', optimizer = sgd, metrics = ['accuracy'])
        conv.fit(X_train, Y_train, batch_size = 500, epochs = 50, verbose = 1, callbacks=[keras.callbacks.EarlyStopping(monitor='loss', min_delta=0.0001, patience=5, verbose=1, mode='auto')])
        score = conv.evaluate(X_test, Y_test, batch_size=500)

        y_pred = conv.predict(X_test)

        cutt_off_tr = 0.5
        y_pred[np.where(y_pred>=cutt_off_tr)] = 1
        y_pred[np.where(y_pred<cutt_off_tr)]  = 0

        prfs0 = precision_recall_fscore_support(Y_test, y_pred, labels=[0])
        reports.append(prfs0)
    
    return reports

In [45]:
reports = average_run()

Resampling the data with SMOTE. . .
Resampled training dataset shape Counter({0: 199015, 1: 199015})
(398030, 29) <type 'numpy.ndarray'>
(398030,)
(85443, 2)
(398030, 2)
Epoch 1/50
Epoch 2/50
Epoch 3/50
Epoch 4/50
Epoch 5/50
Epoch 6/50
Epoch 7/50
Epoch 8/50
Epoch 9/50
Epoch 10/50
Epoch 11/50
Epoch 12/50
Epoch 13/50
Epoch 14/50
Epoch 15/50
Epoch 16/50
Epoch 00016: early stopping
Resampling the data with SMOTE. . .
Resampled training dataset shape Counter({0: 199032, 1: 199032})
(398064, 29) <type 'numpy.ndarray'>
(398064,)
(85443, 2)
(398064, 2)
Epoch 1/50
Epoch 2/50
Epoch 3/50
Epoch 4/50
Epoch 5/50
Epoch 6/50
Epoch 7/50
Epoch 8/50
Epoch 9/50
Epoch 10/50
Epoch 11/50
Epoch 12/50
Epoch 13/50
Epoch 14/50
Epoch 15/50
Epoch 16/50
Epoch 00016: early stopping
Resampling the data with SMOTE. . .
Resampled training dataset shape Counter({0: 199021, 1: 199021})
(398042, 29) <type 'numpy.ndarray'>
(398042,)
(85443, 2)
(398042, 2)
Epoch 1/50
Epoch 2/50
Epoch 3/50
Epoch 4/50
Epoch 5/50
Epoch 6/50
Ep

In [46]:
results = [reports[0][2][1],reports[1][2][1],reports[1][2][1]]
avg_f1 = np.mean(results)
print 'F1 AVG = {}'.format(avg_f1)

F1 AVG = 0.771745237875


## CNNv1.2 averaged

In [41]:
def average_run():
    reports = []
    for i in range(3):
        X_res, y_res, X_test, y_test = generate_train_test_sample(X, y)

        print X_res.shape, type(X_res)
        print y_res.shape

        X_train = X_res.reshape(X_res.shape[0], 29, 1)
        Y_train = y_res.reshape(y_res.shape[0], 1)
        X_test = X_test.values.reshape(X_test.values.shape[0], 29, 1)
        Y_test = y_test.values.reshape(y_test.values.shape[0], 1)

        Y_test = keras.utils.to_categorical(Y_test)
        Y_train = keras.utils.to_categorical(Y_train)
        print Y_test.shape
        print Y_train.shape


        seed(2017)
        conv = Sequential()
        conv.add(Conv1D(256, 29, input_shape=(29, 1), activation='relu'))
        conv.add(Conv1D(256, 1, activation='relu'))
        conv.add(Flatten())

        conv.add(Dense(300, activation = 'relu'))
        conv.add(Dense(100, activation = 'relu'))
        conv.add(Dense(2, activation = 'softmax'))

        sgd = SGD(lr = 0.1, momentum = 0.9, decay = 0, nesterov = False)
        conv.compile(loss = 'categorical_crossentropy', optimizer = sgd, metrics = ['accuracy'])
        conv.fit(X_train, Y_train, batch_size = 500, epochs = 50, verbose = 1, callbacks=[keras.callbacks.EarlyStopping(monitor='loss', min_delta=0.0001, patience=5, verbose=0, mode='auto')])
        score = conv.evaluate(X_test, Y_test, batch_size=500)

        y_pred = conv.predict(X_test)

        cutt_off_tr = 0.5
        y_pred[np.where(y_pred>=cutt_off_tr)] = 1
        y_pred[np.where(y_pred<cutt_off_tr)]  = 0

        prfs0 = precision_recall_fscore_support(Y_test, y_pred, labels=[0])
        reports.append(prfs0)
    
    return reports


In [42]:
reports = average_run()


Resampling the data with SMOTE. . .
Resampled training dataset shape Counter({0: 199028, 1: 199028})
(398056, 29) <type 'numpy.ndarray'>
(398056,)
(85443, 2)
(398056, 2)
Epoch 1/50
Epoch 2/50
Epoch 3/50
Epoch 4/50
Epoch 5/50
Epoch 6/50
Epoch 7/50
Epoch 8/50
Epoch 9/50
Epoch 10/50
Epoch 11/50
Epoch 12/50
Epoch 13/50
Epoch 14/50
Epoch 15/50
Epoch 16/50
Resampling the data with SMOTE. . .
Resampled training dataset shape Counter({0: 199033, 1: 199033})
(398066, 29) <type 'numpy.ndarray'>
(398066,)
(85443, 2)
(398066, 2)
Epoch 1/50
Epoch 2/50
Epoch 3/50
Epoch 4/50
Epoch 5/50
Epoch 6/50
Epoch 7/50
Epoch 8/50
Epoch 9/50
Epoch 10/50
Epoch 11/50
Epoch 12/50
Epoch 13/50
Epoch 14/50
Epoch 15/50
Epoch 16/50
Epoch 17/50
Epoch 18/50
Epoch 19/50
Epoch 20/50
Resampling the data with SMOTE. . .
Resampled training dataset shape Counter({0: 199026, 1: 199026})
(398052, 29) <type 'numpy.ndarray'>
(398052,)
(85443, 2)
(398052, 2)
Epoch 1/50
Epoch 2/50
Epoch 3/50
Epoch 4/50
Epoch 5/50
Epoch 6/50
Epoch 7/50

In [43]:
results = [reports[0][2][1],reports[1][2][1],reports[1][2][1]]
avg_f1 = np.mean(results)
print 'F1 AVG = {}'.format(avg_f1)

F1 AVG = 0.810230099502


# CNNv1 : F1 AVG = 0.771745237875
# CNNv1.2: F1 AVG = 0.810230099502