In [1]:
import pandas as pd
import numpy as np
import tensorflow as tf

from sklearn.model_selection import train_test_split, GridSearchCV

from sklearn.metrics import accuracy_score, roc_auc_score

from sklearn.ensemble import GradientBoostingClassifier

from sklearn.preprocessing import MinMaxScaler, StandardScaler

from tensorflow.keras.layers import Input, Add, Dense, Activation, Dropout
from tensorflow.keras.models import Model, load_model, Sequential

In [73]:
def write_to_submission_file(predicted_values, 
                             transaction_ids, 
                             out_file="submission.csv", 
                             target='isFraud', 
                             index_label="TransactionID"):
    
    # turn predictions into data frame and save as csv file
    predicted_df = pd.DataFrame(predicted_values,
                                index = transaction_ids,
                                columns=[target])
    
    predicted_df.to_csv(out_file, index_label=index_label)

In [2]:
# Load Train data
df = pd.read_csv("train_transaction.csv")
train_size = df.shape[0]
df.shape

(590540, 394)

In [3]:
# Load Test data
df_test = pd.read_csv("test_transaction.csv")
test_size = df_test.shape[0]
df_test.shape

(506691, 393)

In [4]:
df.head(3)

Unnamed: 0,TransactionID,isFraud,TransactionDT,TransactionAmt,ProductCD,card1,card2,card3,card4,card5,...,V330,V331,V332,V333,V334,V335,V336,V337,V338,V339
0,2987000,0,86400,68.5,W,13926,,150.0,discover,142.0,...,,,,,,,,,,
1,2987001,0,86401,29.0,W,2755,404.0,150.0,mastercard,102.0,...,,,,,,,,,,
2,2987002,0,86469,59.0,W,4663,490.0,150.0,visa,166.0,...,,,,,,,,,,


In [5]:
labels = df["isFraud"]

del df["isFraud"]

In [6]:
len(labels)

590540

In [7]:
# Merge Train and Test data
df = pd.concat([df, df_test])

### Data Wrangling for Transactions

In [8]:
#df.columns.tolist()

In [9]:
# TransactionDT
#df.TransactionDT.isna().sum()

df["DTSec"] = df.TransactionDT % 60
df["DTMin"] = (df.TransactionDT % 3600) // 60
df["DTHour"] = (df.TransactionDT % 86400) // 3600
df["DTDow"] = (df.TransactionDT % 604800) // 86400

del df['TransactionDT']

In [10]:
# TransactionAmt
#df.TransactionAmt.isna().sum()
# No changes at the beginning

In [11]:
# ProductCD
#df.ProductCD.isna().sum()
#df.ProductCD.value_counts()

df = pd.get_dummies(df, columns=['ProductCD'], prefix='ProductCD')

In [12]:
# card1
#df.card1.isna().sum()
# No changes at the beginning

In [13]:
# card2
#df.card2.isna().sum()
#df.card2.min()

df["card2_nan"] = df.card2.isna().astype(int)
df.card2.fillna(0.0, inplace=True)

In [14]:
# card3
#df.card3.isna().sum()
#df.card3.min()

df["card3_nan"] = df.card3.isna().astype(int)
df.card3.fillna(0.0, inplace=True)

In [15]:
# card4
#df.card4.isna().sum()

df = pd.get_dummies(df, columns=['card4'], prefix='card4', dummy_na=True)

In [16]:
# card5
#df.card5.isna().sum()
#df.card5.min()

df["card5_nan"] = df.card5.isna().astype(int)
df.card5.fillna(0.0, inplace=True)

In [17]:
# card6
#df.card6.isna().sum()

df = pd.get_dummies(df, columns=['card6'], prefix='card6', dummy_na=True)

In [18]:
# addr1
#df.addr1.isna().sum()

df["addr1_nan"] = df.addr1.isna().astype(int)
df.addr1.fillna(0.0, inplace=True)

In [19]:
# addr2
#df.addr2.isna().sum()
#df.addr2.value_counts()
#df.addr2.min()

df["addr2_nan"] = df.addr2.isna().astype(int)
df.addr2.fillna(0.0, inplace=True)

In [20]:
# dist1
#df.dist1

df["dist1_nan"] = df.dist1.isna().astype(int)
df.dist1.fillna(-1.0, inplace=True)

In [21]:
# dist2
#df.dist2.min()

df["dist2_nan"] = df.dist2.isna().astype(int)
df.dist2.fillna(-1.0, inplace=True)

In [22]:
# P_emaildomain
#df.P_emaildomain

df = pd.get_dummies(df, columns=['P_emaildomain'], prefix='P_emaildomain', dummy_na=True)

In [23]:
# R_emaildomain
#df.R_emaildomain

df = pd.get_dummies(df, columns=['R_emaildomain'], prefix='R_emaildomain', dummy_na=True)

In [24]:
# C1
#df.C1.isna().sum()
#df.C1.value_counts()

df["C1_nan"] = df.C1.isna().astype(int)
df.C1.fillna(-1.0, inplace=True)

In [25]:
# C2
#df.C2.isna().sum()
#df.C2.value_counts()

df["C2_nan"] = df.C2.isna().astype(int)
df.C2.fillna(-1.0, inplace=True)

In [26]:
# C3
#df.C3.isna().sum()
#df.C3.value_counts()

df["C3_nan"] = df.C3.isna().astype(int)
df.C3.fillna(-1.0, inplace=True)

In [27]:
# C4
#df.C4.isna().sum()
#df.C4.value_counts()

df["C4_nan"] = df.C4.isna().astype(int)
df.C4.fillna(-1.0, inplace=True)

In [28]:
# C5
#df.C5.isna().sum()
#df.C5.value_counts()

df["C5_nan"] = df.C5.isna().astype(int)
df.C5.fillna(-1.0, inplace=True)

In [29]:
# C6
#df.C6
#df.C6.isna().sum()
#df.C6.value_counts()

df["C6_nan"] = df.C6.isna().astype(int)
df.C6.fillna(-1.0, inplace=True)

In [30]:
# C7
#df.C7
#df.C7.isna().sum()
#df.C7.value_counts()

df["C7_nan"] = df.C7.isna().astype(int)
df.C7.fillna(-1.0, inplace=True)

In [31]:
# C8
#df.C8
#df.C8.isna().sum()
#df.C8.value_counts()

df["C8_nan"] = df.C8.isna().astype(int)
df.C8.fillna(-1.0, inplace=True)

In [32]:
# C9
#df.C9
#df.C9.isna().sum()
#df.C9.value_counts()

df["C9_nan"] = df.C9.isna().astype(int)
df.C9.fillna(-1.0, inplace=True)

In [33]:
# C10
#df.C10
#df.C10.isna().sum()
#df.C10.value_counts()

df["C10_nan"] = df.C10.isna().astype(int)
df.C10.fillna(-1.0, inplace=True)

In [34]:
# C11
#df.C11
#df.C11.isna().sum()
#df.C11.value_counts()

df["C11_nan"] = df.C11.isna().astype(int)
df.C11.fillna(-1.0, inplace=True)

In [35]:
# C12
#df.C12
#df.C12.isna().sum()
#df.C12.value_counts()

df["C12_nan"] = df.C12.isna().astype(int)
df.C12.fillna(-1.0, inplace=True)

In [36]:
# C13
#df.C13
#df.C13.isna().sum()
#df.C13.value_counts()

df["C13_nan"] = df.C13.isna().astype(int)
df.C13.fillna(-1.0, inplace=True)

In [37]:
# C14
#df.C14
#df.C14.isna().sum()
#df.C14.value_counts()

df["C14_nan"] = df.C14.isna().astype(int)
df.C14.fillna(-1.0, inplace=True)

In [38]:
# D1
#df.D1
#df.D1.isna().sum()

df["D1_nan"] = df.D1.isna().astype(int)
df.D1.fillna(-1.0, inplace=True)

In [39]:
# D2
#df.D2
#df.D2.min()

df["D2_nan"] = df.D2.isna().astype(int)
df.D2.fillna(-1.0, inplace=True)

In [40]:
# D3
#df.D3

df["D3_nan"] = df.D3.isna().astype(int)
df.D3.fillna(-1.0, inplace=True)

In [41]:
# D4
#df.D4

df["D4_nan"] = df.D4.isna().astype(int)
df.D4.fillna(-1.0, inplace=True)

In [42]:
# D5
#df.D5

df["D5_nan"] = df.D5.isna().astype(int)
df.D5.fillna(-1.0, inplace=True)

In [43]:
# D6
#df.D6
#df.D6.value_counts()

df["D6_nan"] = df.D6.isna().astype(int)
df.D6.fillna(-1.0, inplace=True)

In [44]:
# D7
#df.D7
#df.D7.value_counts()

df["D7_nan"] = df.D7.isna().astype(int)
df.D7.fillna(-1.0, inplace=True)

In [45]:
# D8
#df.D8
#df.D8.value_counts()

df["D8_nan"] = df.D8.isna().astype(int)
df.D8.fillna(-1.0, inplace=True)

In [46]:
# D9
#df.D9
#df.D9.value_counts()

df["D9_nan"] = df.D9.isna().astype(int)
df.D9.fillna(-1.0, inplace=True)

In [47]:
# D10
#df.D10

df["D10_nan"] = df.D10.isna().astype(int)
df.D10.fillna(-1.0, inplace=True)

In [48]:
# D11
#df.D11

df["D11_nan"] = df.D11.isna().astype(int)
df.D11.fillna(-1.0, inplace=True)

In [49]:
# D12
#df.D12
#df.D12.value_counts()

df["D12_nan"] = df.D12.isna().astype(int)
df.D12.fillna(-1.0, inplace=True)

In [50]:
# D13
#df.D13
#df.D13.value_counts()

df["D13_nan"] = df.D13.isna().astype(int)
df.D13.fillna(-1.0, inplace=True)

In [51]:
# D14
#df.D14
#df.D14.value_counts()

df["D14_nan"] = df.D14.isna().astype(int)
df.D14.fillna(-1.0, inplace=True)

In [52]:
# D15
#df.D15

df["D15_nan"] = df.D15.isna().astype(int)
df.D15.fillna(-1.0, inplace=True)

In [53]:
# M1
#df.M1
#df.M1.value_counts()

df = pd.get_dummies(df, columns=['M1'], prefix='M1', dummy_na=True)

In [54]:
# M2
#df.M2
#df.M2.value_counts()

df = pd.get_dummies(df, columns=['M2'], prefix='M2', dummy_na=True)

In [55]:
# M3
#df.M3

df = pd.get_dummies(df, columns=['M3'], prefix='M3', dummy_na=True)

In [56]:
# M4
#df.M4
#df.M4.value_counts()

df = pd.get_dummies(df, columns=['M4'], prefix='M4', dummy_na=True)

In [57]:
# M5
#df.M5

df = pd.get_dummies(df, columns=['M5'], prefix='M5', dummy_na=True)

In [58]:
# M6
#df.M6

df = pd.get_dummies(df, columns=['M6'], prefix='M6', dummy_na=True)

In [59]:
# M7
#df.M7

df = pd.get_dummies(df, columns=['M7'], prefix='M7', dummy_na=True)

In [60]:
# M8
#df.M8

df = pd.get_dummies(df, columns=['M8'], prefix='M8', dummy_na=True)

In [61]:
# M9
#df.M9

df = pd.get_dummies(df, columns=['M9'], prefix='M9', dummy_na=True)

In [62]:
#
# Process all V-columns at once
#

MAX_AMOUNT_OF_UNIQUE = 16

for i in range(1, 340):
    col = "V" + str(i)
    
    if (len(df[col].value_counts()) > MAX_AMOUNT_OF_UNIQUE):
        df[col + "_nan"] = df[col].isna().astype(int)
        df[col].fillna(-1.0, inplace=True)
    else:
        df = pd.get_dummies(df, columns=[col], prefix=col, dummy_na=True)

In [63]:
# What is the final shape?
print(df.shape)

# Any NaN left?
print(df.isna().sum().sum())

(1097231, 1807)
0


### Scale Transactions

In [64]:
%%time

#
# TEMPORARY DELETE THE TransactionID. It will be needed to join the Identities info
#
del df['TransactionID']

columns = df.columns

scaler = StandardScaler(copy=False)
#scaler = MinMaxScaler(copy=False)

df = scaler.fit_transform(df)

  return self.partial_fit(X, y)
  return self.fit(X, **fit_params).transform(X)


Wall time: 1min 26s


In [65]:
df = pd.DataFrame(df, columns=columns)
df.head(3)

Unnamed: 0,TransactionAmt,card1,card2,card3,card5,addr1,addr2,dist1,dist2,C1,...,V330_nan,V331_nan,V332_nan,V333_nan,V334_nan,V335_nan,V336_nan,V337_nan,V338_nan,V339_nan
0,-0.274058,0.817417,-2.186194,-0.176293,-1.260537,0.435966,0.375378,-0.101776,-0.10397,-0.100534,...,0.411335,0.411335,0.411335,0.411335,0.411335,0.411335,0.411335,0.411335,0.411335,0.411335
1,-0.437119,-1.465279,0.285881,-0.176293,-2.159569,0.510346,0.375378,-0.189345,-0.10397,-0.100534,...,0.411335,0.411335,0.411335,0.411335,0.411335,0.411335,0.411335,0.411335,0.411335,0.411335
2,-0.313275,-1.075396,0.812114,-0.176293,-0.721117,0.547536,0.375378,1.071651,-0.10397,-0.100534,...,0.411335,0.411335,0.411335,0.411335,0.411335,0.411335,0.411335,0.411335,0.411335,0.411335


### Load Identities

In [None]:
ids = pd.read_csv("train_identity.csv")
ids.shape

In [None]:
ids.head()

### Data Wrangling for Identities

In [None]:
# id_01
#ids.id_01.isna().sum()
#ids.id_01.value_counts()
# No changes

In [None]:
# id_02
#ids.id_02.isna().sum()
#ids.id_02.max()

ids["id_02" + "_nan"] = ids["id_02"].isna().astype(int)
ids["id_02"].fillna(0.0, inplace=True)

In [None]:
# id_03
#ids.id_03.isna().sum()
#ids.id_03.value_counts()

ids = pd.get_dummies(ids, columns=['id_03'], prefix='id_03', dummy_na=True)

In [None]:
# id_04
#ids.id_04.isna().sum()
#ids.id_04.value_counts()

ids = pd.get_dummies(ids, columns=['id_04'], prefix='id_04', dummy_na=True)

In [None]:
# id_05
#ids.id_05.isna().sum()
#ids.id_05.value_counts()

ids["id_05" + "_nan"] = ids["id_05"].isna().astype(int)
ids["id_05"].fillna(100.0, inplace=True)

In [None]:
# id_06
#ids.id_06.isna().sum()
#ids.id_06.value_counts()

ids["id_06" + "_nan"] = ids["id_06"].isna().astype(int)
ids["id_06"].fillna(100.0, inplace=True)

In [None]:
# id_07
#ids.id_07.isna().sum()
#ids.id_07.value_counts()
#ids.id_07.min()

ids["id_07" + "_nan"] = ids["id_07"].isna().astype(int)
ids["id_07"].fillna(100.0, inplace=True)

In [None]:
# id_08
#ids.id_08.isna().sum()
#ids.id_08.value_counts()
#ids.id_08.max()

ids["id_08" + "_nan"] = ids["id_08"].isna().astype(int)
ids["id_08"].fillna(100.0, inplace=True)

In [None]:
# id_09
#ids.id_09.isna().sum()
#ids.id_09.value_counts()

ids["id_09" + "_nan"] = ids["id_09"].isna().astype(int)
ids["id_09"].fillna(100.0, inplace=True)

In [None]:
# id_10
#ids.id_10.isna().sum()
#ids.id_10.value_counts()

ids["id_10" + "_nan"] = ids["id_10"].isna().astype(int)
ids["id_10"].fillna(100.0, inplace=True)

In [None]:
# id_11
#ids.id_11.isna().sum()
#ids.id_11.value_counts()
#ids.id_11.min()

ids["id_11" + "_nan"] = ids["id_11"].isna().astype(int)
ids["id_11"].fillna(0.0, inplace=True)

In [None]:
# id_12
#ids.id_12.isna().sum()
#ids.id_12.value_counts()

ids = pd.get_dummies(ids, columns=['id_12'], prefix='id_12', dummy_na=True)

In [None]:
# id_13
#ids.id_13.isna().sum()
#ids.id_13.value_counts()

ids["id_13" + "_nan"] = ids["id_13"].isna().astype(int)
ids["id_13"].fillna(100.0, inplace=True)

In [None]:
# id_14
#ids.id_14.isna().sum()
#ids.id_14.value_counts()

ids = pd.get_dummies(ids, columns=['id_14'], prefix='id_14', dummy_na=True)

In [None]:
# id_15
#ids.id_15.isna().sum()
#ids.id_15.value_counts()

ids = pd.get_dummies(ids, columns=['id_15'], prefix='id_15', dummy_na=True)

In [None]:
# id_16
#ids.id_16.isna().sum()
#ids.id_16.value_counts()

ids = pd.get_dummies(ids, columns=['id_16'], prefix='id_16', dummy_na=True)

In [None]:
# id_17
#ids.id_17.isna().sum()
#ids.id_17.value_counts()
#ids.id_17.min()

ids["id_17" + "_nan"] = ids["id_17"].isna().astype(int)
ids["id_17"].fillna(0.0, inplace=True)

In [None]:
# id_18
#ids.id_18.isna().sum()
#ids.id_18.value_counts()

ids = pd.get_dummies(ids, columns=['id_18'], prefix='id_18', dummy_na=True)

In [None]:
# id_19
#ids.id_19.isna().sum()
#ids.id_19.value_counts()
#ids.id_19.min()

ids["id_19" + "_nan"] = ids["id_19"].isna().astype(int)
ids["id_19"].fillna(0.0, inplace=True)

In [None]:
# id_20
#ids.id_20.isna().sum()
#ids.id_20.value_counts()
#ids.id_20.min()

ids["id_20" + "_nan"] = ids["id_20"].isna().astype(int)
ids["id_20"].fillna(0.0, inplace=True)

In [None]:
# id_21
#ids.id_21.isna().sum()
#ids.id_21.value_counts()
#ids.id_21.min()

ids["id_21" + "_nan"] = ids["id_21"].isna().astype(int)
ids["id_21"].fillna(0.0, inplace=True)

In [None]:
# id_22
#ids.id_22.isna().sum()
#ids.id_22.value_counts()

ids = pd.get_dummies(ids, columns=['id_22'], prefix='id_22', dummy_na=True)

In [None]:
# id_23
#ids.id_23.isna().sum()
#ids.id_23.value_counts()

ids = pd.get_dummies(ids, columns=['id_23'], prefix='id_23', dummy_na=True)

In [None]:
# id_24
#ids.id_24.isna().sum()
#ids.id_24.value_counts()

ids = pd.get_dummies(ids, columns=['id_24'], prefix='id_24', dummy_na=True)

In [None]:
# id_25
#ids.id_25.isna().sum()
#ids.id_25.value_counts()
#ids.id_25.min()

ids["id_25" + "_nan"] = ids["id_25"].isna().astype(int)
ids["id_25"].fillna(0.0, inplace=True)

In [None]:
# id_26
#ids.id_26.isna().sum()
#ids.id_26.value_counts()
#ids.id_26.min()

ids["id_26" + "_nan"] = ids["id_26"].isna().astype(int)
ids["id_26"].fillna(0.0, inplace=True)

In [None]:
# id_27
#ids.id_27.isna().sum()
#ids.id_27.value_counts()

ids = pd.get_dummies(ids, columns=['id_27'], prefix='id_27', dummy_na=True)

In [None]:
# id_28
#ids.id_28.isna().sum()
#ids.id_28.value_counts()

ids = pd.get_dummies(ids, columns=['id_28'], prefix='id_28', dummy_na=True)

In [None]:
# id_29
#ids.id_29.isna().sum()
#ids.id_29.value_counts()

ids = pd.get_dummies(ids, columns=['id_29'], prefix='id_29', dummy_na=True)

In [None]:
# id_30
#ids.id_30.isna().sum()
#ids.id_30.value_counts()

ids["id_30"].fillna("Missing", inplace=True)
ids["OS_Type"] = ids.id_30.str.split(" ").map(lambda x: x[0])

ids = pd.get_dummies(ids, columns=['id_30'], prefix='id_30', dummy_na=True)
ids = pd.get_dummies(ids, columns=['OS_Type'], prefix='OS_Type', dummy_na=True)

In [None]:
# id_31
#ids.id_31.isna().sum()
#list(ids.id_31.value_counts().index)

ids["id_31"].fillna("Missing", inplace=True)

browser_type = []

for i in range(ids.shape[0]):
    browser = ids.loc[i, "id_31"].lower()
    
    if ("chrome" in browser):
        browser_type.append("chrome")
    elif("safari" in browser):
        browser_type.append("safari")
    elif("ie" in browser):
        browser_type.append("ie")    
    elif("safari" in browser):
        browser_type.append("safari")
    elif("firefox" in browser):
        browser_type.append("firefox")      
    elif("samsung" in browser):
        browser_type.append("samsung")   
    elif("edge" in browser):
        browser_type.append("edge")   
    elif("webview" in browser):
        browser_type.append("webview") 
    elif("opera" in browser):
        browser_type.append("opera")         
    elif("search" in browser):
        browser_type.append("search")     
    elif("microsoft" in browser):
        browser_type.append("microsoft") 
    else:
        browser_type.append(browser)
        
ids = pd.get_dummies(ids, columns=['id_31'], prefix='id_31', dummy_na=True)
ids['Browser_Type'] = browser_type

In [None]:
# id_32
#ids.id_32.isna().sum()
#ids.id_32.value_counts()

ids = pd.get_dummies(ids, columns=['id_32'], prefix='id_32', dummy_na=True)

In [None]:
# id_33
#ids.id_33.isna().sum()
#ids.id_33.value_counts()

ids = pd.get_dummies(ids, columns=['id_33'], prefix='id_33', dummy_na=True)

In [None]:
# id_34
#ids.id_34.isna().sum()
#ids.id_34.value_counts()

ids = pd.get_dummies(ids, columns=['id_34'], prefix='id_34', dummy_na=True)

In [None]:
# id_35
#ids.id_35.isna().sum()
#ids.id_35.value_counts()

ids = pd.get_dummies(ids, columns=['id_35'], prefix='id_35', dummy_na=True)

In [None]:
# id_36
#ids.id_36.isna().sum()
#ids.id_36.value_counts()

ids = pd.get_dummies(ids, columns=['id_36'], prefix='id_36', dummy_na=True)

In [None]:
# id_37
#ids.id_37.isna().sum()
#ids.id_37.value_counts()

ids = pd.get_dummies(ids, columns=['id_37'], prefix='id_37', dummy_na=True)

In [None]:
# id_38
#ids.id_38.isna().sum()
#ids.id_38.value_counts()

ids = pd.get_dummies(ids, columns=['id_38'], prefix='id_38', dummy_na=True)

### Merge Identities and Transactions

In [None]:
data = pd.merge(df, ids, on="TransactionID", how="left")
data.shape

In [None]:
data.head()

In [None]:
del data["TransactionID"]

In [None]:
ids.id_01.isna().sum()

In [None]:
data.id_01.isna().sum()

#### not clear yet what to do after merge.
#### only 145K rows will be full
#### let's first look at the performance on the Transactions set without Identities

### Split to Train and Test Sets

In [66]:
#X_train, X_test, y_train, y_test = train_test_split(df, labels, stratify=labels, test_size=0.3, random_state=8)

X_train = df.iloc[:train_size, :]
X_test = df.iloc[train_size:, :]
y_train = labels

### Gradient Boosting Classifier

In [None]:
%%time
#
# Gradient Boosting Machine
#
GBR = GradientBoostingRegressor(random_state=8)

parameters_grid = {
    "n_estimators": [350, 400, 450],
    "min_samples_split": [2, 3, 4],
    "min_samples_leaf": [1, 2, 3, 4]
}

gcv = GridSearchCV(GBR, parameters_grid, scoring='neg_mean_squared_error')
gcv.fit(features_train, labels_train)
GBR = gcv.best_estimator_
print(GBR)
print("GBR Score:", gcv.best_score_)

### Neural Network

In [70]:
def rocauc(y_true, y_pred):
    return tf.py_function(roc_auc_score, (y_true, y_pred), tf.double)


classifier = Sequential()

classifier.add(Dense(512, activation='relu', kernel_initializer='normal', input_dim=df.shape[1]))
classifier.add(Dropout(0.1))
classifier.add(Dense(128, activation='relu', kernel_initializer='normal'))
classifier.add(Dropout(0.1))
classifier.add(Dense(16, activation='relu', kernel_initializer='normal'))
classifier.add(Dense(1, activation='sigmoid', kernel_initializer='normal'))

classifier.compile(optimizer ='adam', loss='binary_crossentropy', metrics =['accuracy', rocauc])

classifier.summary()

Instructions for updating:
Colocations handled automatically by placer.
Instructions for updating:
Please use `rate` instead of `keep_prob`. Rate should be set to `rate = 1 - keep_prob`.
_________________________________________________________________
Layer (type)                 Output Shape              Param #   
dense (Dense)                (None, 512)               925184    
_________________________________________________________________
dropout (Dropout)            (None, 512)               0         
_________________________________________________________________
dense_1 (Dense)              (None, 128)               65664     
_________________________________________________________________
dropout_1 (Dropout)          (None, 128)               0         
_________________________________________________________________
dense_2 (Dense)              (None, 16)                2064      
_________________________________________________________________
dense_3 (Dense)      

In [72]:
%%time

classifier.fit(X_train, y_train, batch_size=512, epochs=10)

Epoch 1/10
Epoch 2/10
Epoch 3/10
Epoch 4/10
Epoch 5/10
Epoch 6/10
Epoch 7/10
Epoch 8/10
Epoch 9/10
Epoch 10/10
Wall time: 13min 21s


<tensorflow.python.keras.callbacks.History at 0x1b1610c160>

In [80]:
predictions = classifier.predict(X_test)

write_to_submission_file(np.round(predictions, 1), df_test.TransactionID, out_file="submission.01.no_id.csv")

In [None]:
X_train_1 = X_train[y_train.astype(bool).values]
X_train_1['Label'] = 1

In [None]:
X_train_0 = X_train[(1 - y_train).astype(bool).values]
X_train_0['Label'] = 0

In [None]:
X_train_0.shape

In [None]:
%%time

for _ in range(50):
    sample_X_zeros = X_train_0.sample(14464)
    temp_X_train = pd.concat([sample_X_zeros, X_train_1])
    temp_X_train = temp_X_train.sample(frac=1).reset_index(drop=True)
    
    temp_y_train = temp_X_train['Label']
    del temp_X_train['Label']
    
    classifier.fit(temp_X_train, temp_y_train, batch_size=512, epochs=5)

# NN Performance check
print("Accuracy on Test set:")
predictions = classifier.predict(X_test)
print(roc_auc_score(y_test, np.around(predictions)))

In [None]:
predictions = classifier.predict(X_test)

print(accuracy_score(y_test, np.around(predictions)))

In [None]:


print(accuracy_score(y_test, np.around(predictions)))

In [None]:
count = 0

for x in predictions:
    if (x > 0.5):
        count = count + 1
        
print(count)

In [None]:
temp_X_train