In [1]:
import tensorflow as tf
import pandas as pd
import numpy as np

tf.logging.set_verbosity(tf.logging.INFO)

  from ._conv import register_converters as _register_converters


In [3]:
# reading the engineered datasets
df_train = pd.read_csv('../input/train.csv') # training set after train/test split
df_valid = pd.read_csv('../input/valid.csv') # testing set after train/test split
df_infer = pd.read_csv('../input/infer.csv') # inference set provided in Kaggle

In [19]:
df_train['AMT_REQ_CREDIT_BUREAU_HOUR'].value_counts()

0    205102
1       885
2        36
3         5
Name: AMT_REQ_CREDIT_BUREAU_HOUR, dtype: int64

In [None]:
from tensorflow import feature_column

# will be used in the model
ext_src_1 = numeric_column('EXT_SOURCE_1')
ext_src_2 = numeric_column('EXT_SOURCE_2')
ext_src_3 = numeric_column('EXT_SOURCE_3')

num_buckets = 50
score_buckets = np.linspace(0,1,num_buckets)

bucket_ext_src_1 = bucketized_column(ext_src_1, score_buckets)
bucket_ext_src_2 = bucketized_column(ext_src_2, score_buckets)
bucket_ext_src_3 = bucketized_column(ext_src_3, score_buckets)

# will be used in the model
crossed_ext_source = crossed_column([bucket_ext_src_1, bucket_ext_src_2, bucket_ext_src_3], num_buckets**3)
embedding_ext_source = embedding_column(crossed_ext_source, 15)

CATEGORICAL_COLUMNS = [
    categorical_column_with_identity('CODE_GENDER', 2),
    categorical_column_with_identity('NAME_INCOME_TYPE', 8),
    categorical_column_with_identity('NAME_FAMILY_STATUS', 5),
    categorical_column_with_identity('NAME_CONTRACT_TYPE', 2),
    categorical_column_with_identity('FLAG_OWN_CAR', 2),
    categorical_column_with_identity('FLAG_OWN_REALTY', 2),
    categorical_column_with_identity(key='CNT_CHILDREN', num_buckets=5),
    categorical_column_with_identity(key='CNT_FAM_MEMBERS', num_buckets=7),
    categorical_column_with_identity(key='AMT_REQ_CREDIT_BUREAU_HOUR', num_buckets=4),
    categorical_column_with_identity(key='AMT_REQ_CREDIT_BUREAU_DAY', num_buckets=3),
    categorical_column_with_identity(key='AMT_REQ_CREDIT_BUREAU_WEEK', num_buckets=3),
    categorical_column_with_identity(key='AMT_REQ_CREDIT_BUREAU_MON', num_buckets=11),
    categorical_column_with_identity(key='AMT_REQ_CREDIT_BUREAU_QRT', num_buckets=7),
    categorical_column_with_identity(key='AMT_REQ_CREDIT_BUREAU_YEAR', num_buckets=7),
    categorical_column_with_identity(key='OBS_30_CNT_SOCIAL_CIRCLE', num_buckets=18),
    categorical_column_with_identity(key='OBS_60_CNT_SOCIAL_CIRCLE', num_buckets=18),
    categorical_column_with_identity(key='DEF_30_CNT_SOCIAL_CIRCLE', num_buckets=6),
    categorical_column_with_identity(key='DEF_60_CNT_SOCIAL_CIRCLE', num_buckets=5),
    categorical_column_with_identity(key='AMT_INCOME_TOTAL', num_buckets=10),
    categorical_column_with_identity(key='AMT_CREDIT', num_buckets=10),
    categorical_column_with_identity(key='AMT_ANNUITY', num_buckets=10),
    categorical_column_with_identity(key='AMT_GOODS_PRICE', num_buckets=10),
    categorical_column_with_identity(key='DAYS_BIRTH', num_buckets=10),
    categorical_column_with_identity(key='DAYS_ID_PUBLISH', num_buckets=10),
    categorical_column_with_identity(key='DAYS_LAST_PHONE_CHANGE', num_buckets=8),
    categorical_column_with_identity(key='DAYS_REGISTRATION', num_buckets=10),
    categorical_column_with_identity(key='OWN_CAR_AGE', num_buckets=10),
    categorical_column_with_identity(key='REGION_POPULATION_RELATIVE', num_buckets=10)
]




    


# initializing the LABEL and INPUT_COLUMNS variables
LABEL = 'TARGET'
INPUT_COLUMNS = []

# adding the numeric features to the tensorflow graph
for feature in NUMERIC_FEATURES:
    INPUT_COLUMNS.append(tf.feature_column.numeric_column(feature))
    
# adding the categorical features to the tensorflow graph
for feature in CATEGORICAL_FEATURES:
    INPUT_COLUMNS.append(tf.feature_column.indicator_column(feature))

In [None]:
# defining an train input function which feeds the training pandas dataframe
def input_fn_train(df, num_epochs):
    '''
    inputs:
    df - training set after train/test split
    
    output:
    minibatches of x,y
    '''
    return tf.estimator.inputs.pandas_input_fn(
    x = df,
    y = df[LABEL],
    batch_size = 400,
    num_epochs = num_epochs,
    shuffle = True,
    queue_capacity = 1000,
    num_threads = 1
  )

In [None]:
# defining an prediction input function which feeds the inference pandas dataframe
def input_fn_eval(df, num_epochs):
    '''
    inputs:
    df - testing set after train/test split
    
    output:
    minibatches of x,y
    '''
    return tf.estimator.inputs.pandas_input_fn(
    x = df,
    y = df[LABEL],
    batch_size = 128,
    num_epochs = num_epochs,
    shuffle = True,
    queue_capacity = 1000,
    num_threads = 1
  )

In [None]:
# defining an prediction input function which feeds the inference pandas dataframe
def input_fn_infer(df, num_epochs):
    '''
    inputs:
    df - inference dataset provided in Kaggle
    
    output:
    minibatches of x,None
    '''
    return tf.estimator.inputs.pandas_input_fn(
    x = df,
    y = None,
    batch_size = 128,
    num_epochs = num_epochs,
    shuffle = True,
    queue_capacity = 1000,
    num_threads = 1
  )

In [None]:
OUTDIR = 'C:\\Log'

file_writer = tf.summary.FileWriter(OUTDIR)

estimator=tf.estimator.DNNClassifier(
                        activation_fn=tf.nn.relu,
                        hidden_units=[6,6,6,6], 
                        feature_columns=INPUT_COLUMNS, 
                        model_dir=OUTDIR,
                        n_classes=2,
                        optimizer=tf.train.AdamOptimizer(learning_rate=0.00001),
                        dropout=0.2,
                        loss_reduction='weighted_sum')

train_spec=tf.estimator.TrainSpec(
                       input_fn = input_fn_train(df_train, 5),
                       max_steps = 1000000)

eval_spec=tf.estimator.EvalSpec(
                       input_fn = input_fn_eval(df_valid, 5),
                       steps = None,
                       start_delay_secs = 1, # start evaluating after N seconds
                       throttle_secs = 10)  # evaluate every N seconds

tf.estimator.train_and_evaluate(estimator, train_spec, eval_spec)