In [1]:
import pandas as pd
import numpy as np
import tensorflow as tf

from sklearn.model_selection import train_test_split, GridSearchCV

from sklearn.metrics import accuracy_score, roc_auc_score

from sklearn.ensemble import GradientBoostingClassifier

from sklearn.preprocessing import MinMaxScaler, StandardScaler

from tensorflow.keras.layers import Input, Add, Dense, Activation, Dropout
from tensorflow.keras.models import Model, load_model, Sequential

import time

In [None]:
start_time = time.time()

In [2]:
def write_to_submission_file(predicted_values, 
                             transaction_ids, 
                             out_file="submission.csv", 
                             target='isFraud', 
                             index_label="TransactionID"):
    
    # turn predictions into data frame and save as csv file
    predicted_df = pd.DataFrame(predicted_values,
                                index = transaction_ids,
                                columns=[target])
    
    predicted_df.to_csv(out_file, index_label=index_label)
    

def rocauc(y_true, y_pred):
    return tf.py_function(roc_auc_score, (y_true, y_pred), tf.double)

In [3]:
# Load Train data
df = pd.read_csv("train_transaction.csv")
train_size = df.shape[0]
df.shape

(590540, 394)

In [4]:
# Load Test data
df_test = pd.read_csv("test_transaction.csv")
test_size = df_test.shape[0]
df_test.shape

(506691, 393)

In [5]:
labels = df["isFraud"]

del df["isFraud"]

len(labels)

590540

In [6]:
# Merge Train and Test data: Transactions
df = pd.concat([df, df_test])

### Load Identities

In [7]:
ids = pd.read_csv("train_identity.csv")
ids.shape

(144233, 41)

In [8]:
ids_test = pd.read_csv("test_identity.csv")
ids_test.shape

(141907, 41)

In [9]:
# Merge Train and Test data: Identities
ids = pd.concat([ids, ids_test])

### Merge Identities and Transactions

In [10]:
data = pd.merge(df, ids, on="TransactionID", how="left")
data.shape

(1097231, 433)

In [11]:
del data["TransactionID"]

### Data Wrangling

In [12]:
#data.columns.tolist()

In [13]:
# TransactionDT
#data.TransactionDT.isna().sum()

data["DTSec"] = data.TransactionDT % 60
data["DTMin"] = (data.TransactionDT % 3600) // 60
data["DTHour"] = (data.TransactionDT % 86400) // 3600
data["DTDow"] = (data.TransactionDT % 604800) // 86400

del data['TransactionDT']

In [14]:
# TransactionAmt
#data.TransactionAmt.isna().sum()
# No changes at the beginning

In [15]:
# ProductCD
#data.ProductCD.isna().sum()
#data.ProductCD.value_counts()

data = pd.get_dummies(data, columns=['ProductCD'], prefix='ProductCD')

In [16]:
# card1
#data.card1.isna().sum()
# No changes at the beginning

In [17]:
# card2
#data.card2.isna().sum()
#data.card2.min()

data["card2_nan"] = data.card2.isna().astype(int)
data.card2.fillna(0.0, inplace=True)

In [18]:
# card3
#data.card3.isna().sum()
#data.card3.min()

data["card3_nan"] = data.card3.isna().astype(int)
data.card3.fillna(0.0, inplace=True)

In [19]:
# card4
#data.card4.isna().sum()

data = pd.get_dummies(data, columns=['card4'], prefix='card4', dummy_na=True)

In [20]:
# card5
#data.card5.isna().sum()
#data.card5.min()

data["card5_nan"] = data.card5.isna().astype(int)
data.card5.fillna(0.0, inplace=True)

In [21]:
# card6
#data.card6.isna().sum()

data = pd.get_dummies(data, columns=['card6'], prefix='card6', dummy_na=True)

In [22]:
# addr1
#data.addr1.isna().sum()

data["addr1_nan"] = data.addr1.isna().astype(int)
data.addr1.fillna(0.0, inplace=True)

In [23]:
# addr2
#data.addr2.isna().sum()
#data.addr2.value_counts()
#data.addr2.min()

data["addr2_nan"] = data.addr2.isna().astype(int)
data.addr2.fillna(0.0, inplace=True)

In [24]:
# dist1
#data.dist1

data["dist1_nan"] = data.dist1.isna().astype(int)
data.dist1.fillna(-1.0, inplace=True)

In [25]:
# dist2
#data.dist2.min()

data["dist2_nan"] = data.dist2.isna().astype(int)
data.dist2.fillna(-1.0, inplace=True)

In [26]:
# P_emaildomain
#data.P_emaildomain

data = pd.get_dummies(data, columns=['P_emaildomain'], prefix='P_emaildomain', dummy_na=True)

In [27]:
# R_emaildomain
#data.R_emaildomain

data = pd.get_dummies(data, columns=['R_emaildomain'], prefix='R_emaildomain', dummy_na=True)

In [28]:
# C1
#data.C1.isna().sum()
#data.C1.value_counts()

data["C1_nan"] = data.C1.isna().astype(int)
data.C1.fillna(-1.0, inplace=True)

In [29]:
# C2
#data.C2.isna().sum()
#data.C2.value_counts()

data["C2_nan"] = data.C2.isna().astype(int)
data.C2.fillna(-1.0, inplace=True)

In [30]:
# C3
#data.C3.isna().sum()
#data.C3.value_counts()

data["C3_nan"] = data.C3.isna().astype(int)
data.C3.fillna(-1.0, inplace=True)

In [31]:
# C4
#data.C4.isna().sum()
#data.C4.value_counts()

data["C4_nan"] = data.C4.isna().astype(int)
data.C4.fillna(-1.0, inplace=True)

In [32]:
# C5
#data.C5.isna().sum()
#data.C5.value_counts()

data["C5_nan"] = data.C5.isna().astype(int)
data.C5.fillna(-1.0, inplace=True)

In [33]:
# C6
#data.C6
#data.C6.isna().sum()
#data.C6.value_counts()

data["C6_nan"] = data.C6.isna().astype(int)
data.C6.fillna(-1.0, inplace=True)

In [34]:
# C7
#data.C7
#data.C7.isna().sum()
#data.C7.value_counts()

data["C7_nan"] = data.C7.isna().astype(int)
data.C7.fillna(-1.0, inplace=True)

In [35]:
# C8
#data.C8
#data.C8.isna().sum()
#data.C8.value_counts()

data["C8_nan"] = data.C8.isna().astype(int)
data.C8.fillna(-1.0, inplace=True)

In [36]:
# C9
#data.C9
#data.C9.isna().sum()
#data.C9.value_counts()

data["C9_nan"] = data.C9.isna().astype(int)
data.C9.fillna(-1.0, inplace=True)

In [37]:
# C10
#data.C10
#data.C10.isna().sum()
#data.C10.value_counts()

data["C10_nan"] = data.C10.isna().astype(int)
data.C10.fillna(-1.0, inplace=True)

In [38]:
# C11
#data.C11
#data.C11.isna().sum()
#data.C11.value_counts()

data["C11_nan"] = data.C11.isna().astype(int)
data.C11.fillna(-1.0, inplace=True)

In [39]:
# C12
#data.C12
#data.C12.isna().sum()
#data.C12.value_counts()

data["C12_nan"] = data.C12.isna().astype(int)
data.C12.fillna(-1.0, inplace=True)

In [40]:
# C13
#data.C13
#data.C13.isna().sum()
#data.C13.value_counts()

data["C13_nan"] = data.C13.isna().astype(int)
data.C13.fillna(-1.0, inplace=True)

In [41]:
# C14
#data.C14
#data.C14.isna().sum()
#data.C14.value_counts()

data["C14_nan"] = data.C14.isna().astype(int)
data.C14.fillna(-1.0, inplace=True)

In [42]:
# D1
#data.D1
#data.D1.isna().sum()

data["D1_nan"] = data.D1.isna().astype(int)
data.D1.fillna(-1.0, inplace=True)

In [43]:
# D2
#data.D2
#data.D2.min()

data["D2_nan"] = data.D2.isna().astype(int)
data.D2.fillna(-1.0, inplace=True)

In [44]:
# D3
#data.D3

data["D3_nan"] = data.D3.isna().astype(int)
data.D3.fillna(-1.0, inplace=True)

In [45]:
# D4
#data.D4

data["D4_nan"] = data.D4.isna().astype(int)
data.D4.fillna(-1.0, inplace=True)

In [46]:
# D5
#data.D5

data["D5_nan"] = data.D5.isna().astype(int)
data.D5.fillna(-1.0, inplace=True)

In [47]:
# D6
#data.D6
#data.D6.value_counts()

data["D6_nan"] = data.D6.isna().astype(int)
data.D6.fillna(-1.0, inplace=True)

In [48]:
# D7
#data.D7
#data.D7.value_counts()

data["D7_nan"] = data.D7.isna().astype(int)
data.D7.fillna(-1.0, inplace=True)

In [49]:
# D8
#data.D8
#data.D8.value_counts()

data["D8_nan"] = data.D8.isna().astype(int)
data.D8.fillna(-1.0, inplace=True)

In [50]:
# D9
#data.D9
#data.D9.value_counts()

data["D9_nan"] = data.D9.isna().astype(int)
data.D9.fillna(-1.0, inplace=True)

In [51]:
# D10
#data.D10

data["D10_nan"] = data.D10.isna().astype(int)
data.D10.fillna(-1.0, inplace=True)

In [52]:
# D11
#data.D11

data["D11_nan"] = data.D11.isna().astype(int)
data.D11.fillna(-1.0, inplace=True)

In [53]:
# D12
#data.D12
#data.D12.value_counts()

data["D12_nan"] = data.D12.isna().astype(int)
data.D12.fillna(-1.0, inplace=True)

In [54]:
# D13
#data.D13
#data.D13.value_counts()

data["D13_nan"] = data.D13.isna().astype(int)
data.D13.fillna(-1.0, inplace=True)

In [55]:
# D14
#data.D14
#data.D14.value_counts()

data["D14_nan"] = data.D14.isna().astype(int)
data.D14.fillna(-1.0, inplace=True)

In [56]:
# D15
#data.D15

data["D15_nan"] = data.D15.isna().astype(int)
data.D15.fillna(-1.0, inplace=True)

In [57]:
# M1
#data.M1
#data.M1.value_counts()

data = pd.get_dummies(data, columns=['M1'], prefix='M1', dummy_na=True)

In [58]:
# M2
#data.M2
#data.M2.value_counts()

data = pd.get_dummies(data, columns=['M2'], prefix='M2', dummy_na=True)

In [59]:
# M3
#data.M3

data = pd.get_dummies(data, columns=['M3'], prefix='M3', dummy_na=True)

In [60]:
# M4
#data.M4
#data.M4.value_counts()

data = pd.get_dummies(data, columns=['M4'], prefix='M4', dummy_na=True)

In [61]:
# M5
#data.M5

data = pd.get_dummies(data, columns=['M5'], prefix='M5', dummy_na=True)

In [62]:
# M6
#data.M6

data = pd.get_dummies(data, columns=['M6'], prefix='M6', dummy_na=True)

In [63]:
# M7
#data.M7

data = pd.get_dummies(data, columns=['M7'], prefix='M7', dummy_na=True)

In [64]:
# M8
#data.M8

data = pd.get_dummies(data, columns=['M8'], prefix='M8', dummy_na=True)

In [65]:
# M9
#data.M9

data = pd.get_dummies(data, columns=['M9'], prefix='M9', dummy_na=True)

In [66]:
#
# Process all V-columns at once
#

MAX_AMOUNT_OF_UNIQUE = 16

for i in range(1, 340):
    col = "V" + str(i)
    
    if (len(data[col].value_counts()) > MAX_AMOUNT_OF_UNIQUE):
        data[col + "_nan"] = data[col].isna().astype(int)
        data[col].fillna(-1.0, inplace=True)
    else:
        data = pd.get_dummies(data, columns=[col], prefix=col, dummy_na=True)

### Data Wrangling for Identities

In [67]:
# id_01
#data.id_01.isna().sum()
#data.id_01.value_counts()

data["id_01" + "_nan"] = data["id_01"].isna().astype(int)
data["id_01"].fillna(0.0, inplace=True)

In [68]:
# id_02
#data.id_02.isna().sum()
#data.id_02.max()

data["id_02" + "_nan"] = data["id_02"].isna().astype(int)
data["id_02"].fillna(0.0, inplace=True)

In [69]:
# id_03
#data.id_03.isna().sum()
#data.id_03.value_counts()

data = pd.get_dummies(data, columns=['id_03'], prefix='id_03', dummy_na=True)

In [70]:
# id_04
#data.id_04.isna().sum()
#data.id_04.value_counts()

data = pd.get_dummies(data, columns=['id_04'], prefix='id_04', dummy_na=True)

In [71]:
# id_05
#data.id_05.isna().sum()
#data.id_05.value_counts()

data["id_05" + "_nan"] = data["id_05"].isna().astype(int)
data["id_05"].fillna(100.0, inplace=True)

In [72]:
# id_06
#data.id_06.isna().sum()
#data.id_06.value_counts()

data["id_06" + "_nan"] = data["id_06"].isna().astype(int)
data["id_06"].fillna(100.0, inplace=True)

In [73]:
# id_07
#data.id_07.isna().sum()
#data.id_07.value_counts()
#data.id_07.min()

data["id_07" + "_nan"] = data["id_07"].isna().astype(int)
data["id_07"].fillna(100.0, inplace=True)

In [74]:
# id_08
#data.id_08.isna().sum()
#data.id_08.value_counts()
#data.id_08.max()

data["id_08" + "_nan"] = data["id_08"].isna().astype(int)
data["id_08"].fillna(100.0, inplace=True)

In [75]:
# id_09
#data.id_09.isna().sum()
#data.id_09.value_counts()

data["id_09" + "_nan"] = data["id_09"].isna().astype(int)
data["id_09"].fillna(100.0, inplace=True)

In [76]:
# id_10
#data.id_10.isna().sum()
#data.id_10.value_counts()

data["id_10" + "_nan"] = data["id_10"].isna().astype(int)
data["id_10"].fillna(100.0, inplace=True)

In [77]:
# id_11
#data.id_11.isna().sum()
#data.id_11.value_counts()
#data.id_11.min()

data["id_11" + "_nan"] = data["id_11"].isna().astype(int)
data["id_11"].fillna(0.0, inplace=True)

In [78]:
# id_12
#data.id_12.isna().sum()
#data.id_12.value_counts()

data = pd.get_dummies(data, columns=['id_12'], prefix='id_12', dummy_na=True)

In [79]:
# id_13
#data.id_13.isna().sum()
#data.id_13.value_counts()

data["id_13" + "_nan"] = data["id_13"].isna().astype(int)
data["id_13"].fillna(100.0, inplace=True)

In [80]:
# id_14
#data.id_14.isna().sum()
#data.id_14.value_counts()

data = pd.get_dummies(data, columns=['id_14'], prefix='id_14', dummy_na=True)

In [81]:
# id_15
#data.id_15.isna().sum()
#data.id_15.value_counts()

data = pd.get_dummies(data, columns=['id_15'], prefix='id_15', dummy_na=True)

In [82]:
# id_16
#data.id_16.isna().sum()
#data.id_16.value_counts()

data = pd.get_dummies(data, columns=['id_16'], prefix='id_16', dummy_na=True)

In [83]:
# id_17
#data.id_17.isna().sum()
#data.id_17.value_counts()
#data.id_17.min()

data["id_17" + "_nan"] = data["id_17"].isna().astype(int)
data["id_17"].fillna(0.0, inplace=True)

In [84]:
# id_18
#data.id_18.isna().sum()
#data.id_18.value_counts()

data = pd.get_dummies(data, columns=['id_18'], prefix='id_18', dummy_na=True)

In [85]:
# id_19
#data.id_19.isna().sum()
#data.id_19.value_counts()
#data.id_19.min()

data["id_19" + "_nan"] = data["id_19"].isna().astype(int)
data["id_19"].fillna(0.0, inplace=True)

In [86]:
# id_20
#data.id_20.isna().sum()
#data.id_20.value_counts()
#data.id_20.min()

data["id_20" + "_nan"] = data["id_20"].isna().astype(int)
data["id_20"].fillna(0.0, inplace=True)

In [87]:
# id_21
#data.id_21.isna().sum()
#data.id_21.value_counts()
#data.id_21.min()

data["id_21" + "_nan"] = data["id_21"].isna().astype(int)
data["id_21"].fillna(0.0, inplace=True)

In [88]:
# id_22
#data.id_22.isna().sum()
#data.id_22.value_counts()

data = pd.get_dummies(data, columns=['id_22'], prefix='id_22', dummy_na=True)

In [89]:
# id_23
#data.id_23.isna().sum()
#data.id_23.value_counts()

data = pd.get_dummies(data, columns=['id_23'], prefix='id_23', dummy_na=True)

In [90]:
# id_24
#data.id_24.isna().sum()
#data.id_24.value_counts()

data = pd.get_dummies(data, columns=['id_24'], prefix='id_24', dummy_na=True)

In [91]:
# id_25
#data.id_25.isna().sum()
#data.id_25.value_counts()
#data.id_25.min()

data["id_25" + "_nan"] = data["id_25"].isna().astype(int)
data["id_25"].fillna(0.0, inplace=True)

In [92]:
# id_26
#data.id_26.isna().sum()
#data.id_26.value_counts()
#data.id_26.min()

data["id_26" + "_nan"] = data["id_26"].isna().astype(int)
data["id_26"].fillna(0.0, inplace=True)

In [93]:
# id_27
#data.id_27.isna().sum()
#data.id_27.value_counts()

data = pd.get_dummies(data, columns=['id_27'], prefix='id_27', dummy_na=True)

In [94]:
# id_28
#data.id_28.isna().sum()
#data.id_28.value_counts()

data = pd.get_dummies(data, columns=['id_28'], prefix='id_28', dummy_na=True)

In [95]:
# id_29
#data.id_29.isna().sum()
#data.id_29.value_counts()

data = pd.get_dummies(data, columns=['id_29'], prefix='id_29', dummy_na=True)

In [96]:
# id_30
#data.id_30.isna().sum()
#data.id_30.value_counts()

data["id_30"].fillna("Missing", inplace=True)
data["OS_Type"] = data.id_30.str.split(" ").map(lambda x: x[0])

data = pd.get_dummies(data, columns=['id_30'], prefix='id_30', dummy_na=True)
data = pd.get_dummies(data, columns=['OS_Type'], prefix='OS_Type', dummy_na=True)

In [97]:
# id_31
#data.id_31.isna().sum()
#list(data.id_31.value_counts().index)

data["id_31"].fillna("Missing", inplace=True)

browser_type = []

for i in range(data.shape[0]):
    browser = data.loc[i, "id_31"].lower()
    
    if ("chrome" in browser):
        browser_type.append("chrome")
    elif("safari" in browser):
        browser_type.append("safari")
    elif("ie" in browser):
        browser_type.append("ie")    
    elif("safari" in browser):
        browser_type.append("safari")
    elif("firefox" in browser):
        browser_type.append("firefox")      
    elif("samsung" in browser):
        browser_type.append("samsung")   
    elif("edge" in browser):
        browser_type.append("edge")   
    elif("webview" in browser):
        browser_type.append("webview") 
    elif("opera" in browser):
        browser_type.append("opera")         
    elif("search" in browser):
        browser_type.append("search")     
    elif("microsoft" in browser):
        browser_type.append("microsoft") 
    else:
        browser_type.append(browser)
        
data['Browser_Type'] = browser_type

del data['id_31']

data = pd.get_dummies(data, columns=['Browser_Type'], prefix='Browser_Type', dummy_na=True)

In [98]:
# id_32
#data.id_32.isna().sum()
#data.id_32.value_counts()

data = pd.get_dummies(data, columns=['id_32'], prefix='id_32', dummy_na=True)

In [99]:
# id_33
#data.id_33.isna().sum()
#data.id_33.value_counts()

data = pd.get_dummies(data, columns=['id_33'], prefix='id_33', dummy_na=True)

In [100]:
# id_34
#data.id_34.isna().sum()
#data.id_34.value_counts()

data = pd.get_dummies(data, columns=['id_34'], prefix='id_34', dummy_na=True)

In [101]:
# id_35
#data.id_35.isna().sum()
#data.id_35.value_counts()

data = pd.get_dummies(data, columns=['id_35'], prefix='id_35', dummy_na=True)

In [102]:
# id_36
#data.id_36.isna().sum()
#data.id_36.value_counts()

data = pd.get_dummies(data, columns=['id_36'], prefix='id_36', dummy_na=True)

In [103]:
# id_37
#data.id_37.isna().sum()
#data.id_37.value_counts()

data = pd.get_dummies(data, columns=['id_37'], prefix='id_37', dummy_na=True)

In [104]:
# id_38
#data.id_38.isna().sum()
#data.id_38.value_counts()

data = pd.get_dummies(data, columns=['id_38'], prefix='id_38', dummy_na=True)

In [105]:
# DeviceType
#data.DeviceType.isna().sum()
#data.DeviceType.value_counts()

data = pd.get_dummies(data, columns=['DeviceType'], prefix='DeviceType', dummy_na=True)

In [106]:
# DeviceInfo
#data.DeviceInfo.isna().sum()
#data.DeviceInfo.value_counts()

# TEMP SOLUTION
del data["DeviceInfo"]

In [107]:
# What is the final shape?
print(data.shape)

# Any NaN left?
print(data.isna().sum().sum())

(1097231, 2637)
0


In [None]:
end_time = time.time()

print(end_time - start_time)

### Scale Data

In [108]:
%%time

columns = data.columns

scaler = StandardScaler(copy=False)

data = scaler.fit_transform(data)

data = pd.DataFrame(data, columns=columns)

  return self.partial_fit(X, y)
  return self.fit(X, **fit_params).transform(X)


Wall time: 1min 57s


### Split to Train and Test Sets

In [109]:
#X_train, X_test, y_train, y_test = train_test_split(data, labels, stratify=labels, test_size=0.3, random_state=8)

X_train = data.iloc[:train_size, :]
X_test = data.iloc[train_size:, :]
y_train = labels

### Neural Network

In [110]:
classifier = Sequential()

classifier.add(Dense(512, activation='relu', kernel_initializer='normal', input_dim=X_train.shape[1]))
classifier.add(Dropout(0.1))
classifier.add(Dense(512, activation='relu', kernel_initializer='normal'))
classifier.add(Dropout(0.1))
classifier.add(Dense(32, activation='relu', kernel_initializer='normal'))
classifier.add(Dense(1, activation='sigmoid', kernel_initializer='normal'))

classifier.compile(optimizer ='adam', loss='binary_crossentropy', metrics =['accuracy', rocauc])

classifier.summary()

Instructions for updating:
Colocations handled automatically by placer.
Instructions for updating:
Please use `rate` instead of `keep_prob`. Rate should be set to `rate = 1 - keep_prob`.
_________________________________________________________________
Layer (type)                 Output Shape              Param #   
dense (Dense)                (None, 512)               1350656   
_________________________________________________________________
dropout (Dropout)            (None, 512)               0         
_________________________________________________________________
dense_1 (Dense)              (None, 512)               262656    
_________________________________________________________________
dropout_1 (Dropout)          (None, 512)               0         
_________________________________________________________________
dense_2 (Dense)              (None, 32)                16416     
_________________________________________________________________
dense_3 (Dense)      

In [113]:
%%time

classifier.fit(X_train, y_train, batch_size=512, epochs=32)

Epoch 1/32
Epoch 2/32
Epoch 3/32
Epoch 4/32
Epoch 5/32
Epoch 6/32
Epoch 7/32
Epoch 8/32
Epoch 9/32
Epoch 10/32
Epoch 11/32
Epoch 12/32
Epoch 13/32
Epoch 14/32
Epoch 15/32
Epoch 16/32
Epoch 17/32
Epoch 18/32
Epoch 19/32
Epoch 20/32
Epoch 21/32
Epoch 22/32
Epoch 23/32
Epoch 24/32
Epoch 25/32
Epoch 26/32
Epoch 27/32
Epoch 28/32
Epoch 29/32
Epoch 30/32
Epoch 31/32
Epoch 32/32
Wall time: 58min 33s


<tensorflow.python.keras.callbacks.History at 0xed21969e80>

In [114]:
predictions = classifier.predict(X_test)

write_to_submission_file(np.round(predictions, 1), df_test.TransactionID, out_file="submission.02.csv")

In [None]:
X_train_1 = X_train[y_train.astype(bool).values]
X_train_1['Label'] = 1

In [None]:
X_train_0 = X_train[(1 - y_train).astype(bool).values]
X_train_0['Label'] = 0

In [None]:
X_train_0.shape

In [None]:
%%time

for _ in range(50):
    sample_X_zeros = X_train_0.sample(14464)
    temp_X_train = pd.concat([sample_X_zeros, X_train_1])
    temp_X_train = temp_X_train.sample(frac=1).reset_index(drop=True)
    
    temp_y_train = temp_X_train['Label']
    del temp_X_train['Label']
    
    classifier.fit(temp_X_train, temp_y_train, batch_size=512, epochs=5)

# NN Performance check
print("Accuracy on Test set:")
predictions = classifier.predict(X_test)
print(roc_auc_score(y_test, np.around(predictions)))

In [None]:
predictions = classifier.predict(X_test)

print(accuracy_score(y_test, np.around(predictions)))

In [None]:


print(accuracy_score(y_test, np.around(predictions)))

In [None]:
count = 0

for x in predictions:
    if (x > 0.5):
        count = count + 1
        
print(count)

In [None]:
temp_X_train