In [6]:
import numpy as np
import pandas as pd
import os
import warnings

import tensorflow as tf
from tensorflow.keras import Sequential
from tensorflow.keras.layers import Dense, Dropout
from tensorflow.keras.optimizers import SGD, Adam

from sklearn.metrics import roc_auc_score, accuracy_score, f1_score, confusion_matrix, precision_score

In [7]:
X = pd.read_csv("../input/frauddatawithoutaggregations/train_set_without_id_features.csv")
X.shape

Select columns to remove

In [8]:
remove_cols = list(X.dtypes[X.dtypes == "object"].keys()) + ['Unnamed: 0', 'TransactionID', 'TransactionDT']

Sort values by time

In [9]:
X.sort_values(by=['TransactionDT'], inplace=True)

In [10]:
y = X['isFraud'].copy()
y = y.to_frame()
X.drop('isFraud', axis=1)
X = X.fillna(-999)

In [11]:
splitpoint = int(0.8 * X.shape[0])

X_train, X_test, y_train, y_test = X[0:splitpoint], X[splitpoint:X.shape[0]], y[0:splitpoint], y[splitpoint:y.shape[0]] 

In [12]:
X_train['TransactionDT'].max(), X_test['TransactionDT'].min()

In [13]:
del X, y

In [14]:
X_train.drop(remove_cols, axis=1, inplace=True)
X_test.drop(remove_cols, axis=1, inplace=True)

X_train.shape, X_test.shape

In [15]:
X_test = X_test[X_train.columns]


X_train = X_train.values
X_test = X_test.values
y_train = y_train.values
y_test = y_test.values

### Training

In [16]:
def build_model(input_dim):
    model = Sequential()
    model.add(Dense(128, activation='relu', input_shape=input_dim))
    model.add(Dense(64, activation='relu'))
    model.add(Dense(64, activation='relu'))
    model.add(Dense(32, activation='relu'))
    model.add(Dense(1, activation='sigmoid'))
    
    model.compile(optimizer='adam', loss='binary_crossentropy', metrics=['accuracy'])
    
    return model

In [17]:
model = build_model((X_train.shape[1],))

In [18]:
warnings.filterwarnings('ignore')

device_name = tf.test.gpu_device_name()

if device_name != '/device:GPU:0':
    raise SystemError('GPU device not found')
    
print('Found GPU at: {}'.format(device_name))

In [19]:
# Early stopping callback

early_stopping = tf.keras.callbacks.EarlyStopping(monitor='loss', patience=3)

In [20]:
with tf.device('/device:GPU:0'):
    model.fit(X_train, y_train, epochs=20, batch_size=512, verbose=1, validation_split=0.2, callbacks=[early_stopping], use_multiprocessing=True)

In [22]:
test_y = model.predict(X_test)

In [47]:
cutoff = 0.200

preds = np.array([1 if x[0] > cutoff else 0 for x in test_y]).astype(int)

In [48]:
print('F1 score:',f1_score(y_test.reshape(-1), preds))
print('Precision score:', precision_score(y_test.reshape(-1), preds))
print('Area Under the Receiver Operating Characteristic Curve:',roc_auc_score(y_test, test_y))

In [49]:
pd.Series(preds).value_counts()

In [76]:
pd.Series(y_test.reshape(-1)).value_counts()