In [3]:
import tensorflow as tf
import pandas as pd

In [None]:
# reading the engineered datasets
df_train = pd.read_csv('../input/train.csv') # training set after train/test split
df_valid = pd.read_csv('../input/valid.csv') # testing set after train/test split
df_infer = pd.read_csv('../input/infer.csv') # inference set provided in Kaggle

In [None]:
FEATURES = ['DAYS_EMPLOYED', 'CODE_GENDER', 'CNT_CHILDREN', 'CNT_FAM_MEMBERS'] # etc...
LABEL = 'TARGET'
DEFAULTS = [[-365],['M'],[2],[4]]

INPUT_COLUMNS = [
    tf.feature_column.numeric_column('DAYS_EMPLOYED')
    tf.feature_column.categorical_column_with_vocabulary_list(key='CODE_GENDER', vocabulary_list=('M','F'))
    tf.feature_column.categorical_column_with_identity(key='CNT_CHILDREN', num_buckets=15)
    tf.feature_column.categorical_column_with_identity(key='CNT_FAM_MEMBERS', num_buckets=20)
]

In [13]:
# defining an train input function which feeds the training pandas dataframe
def input_fn_train(df, num_epochs):
    '''
    inputs:
    df - training set after train/test split
    
    output:
    minibatches of x,y
    '''
    return tf.estimator.inputs.pandas_input_fn(
    x = df,
    y = df[LABEL],
    batch_size = 128,
    num_epochs = num_epochs,
    shuffle = True,
    queue_capacity = 1000,
    num_threads = 1
  )

In [14]:
# defining an prediction input function which feeds the inference pandas dataframe
def input_fn_eval(df, num_epochs):
    '''
    inputs:
    df - testing set after train/test split
    
    output:
    minibatches of x,y
    '''
    return tf.estimator.inputs.pandas_input_fn(
    x = df,
    y = df[LABEL],
    batch_size = 128,
    num_epochs = num_epochs,
    shuffle = True,
    queue_capacity = 1000,
    num_threads = 1
  )

In [None]:
# defining an prediction input function which feeds the inference pandas dataframe
def input_fn_infer(df, num_epochs):
    '''
    inputs:
    df - inference dataset provided in Kaggle
    
    output:
    minibatches of x,None
    '''
    return tf.estimator.inputs.pandas_input_fn(
    x = df,
    y = None,
    batch_size = 128,
    num_epochs = num_epochs,
    shuffle = True,
    queue_capacity = 1000,
    num_threads = 1
  )

In [17]:
def train_and_evaluate(output_dir, num_train_steps):
    estimator=tf.estimator.DNNClassifier(
                        activation_fn=tf.nn.relu,
                        hidden_units=[100,50,25,10], 
                        feature_columns=INPUT_COLUMNS, 
                        model_dir=OUTDIR,
                        n_classes=2,
                        optimizer=tf.train.ProximalAdagradOptimizer(
                                                learning_rate=0.1,
                                                l1_regularization_strength=0.001),
                        dropout=0.2,
                        loss_reduction='weighted_sum')
    
    train_spec=tf.estimator.TrainSpec(
                       input_fn = input_fn_train(),
                       max_steps = num_train_steps)
    
    eval_spec=tf.estimator.EvalSpec(
                       input_fn = input_fn_eval(),
                       steps = None,
                       start_delay_secs = 1, # start evaluating after N seconds
                       throttle_secs = 10)  # evaluate every N seconds

    
    tf.estimator.train_and_evaluate(estimator, train_spec, eval_spec)

In [None]:
#defining out directory for checkpoints
OUTDIR = 'DNN_CLASSIFIER'

#starting tensorboard for monitoring
TensorBoard().start(OUTDIR)

In [None]:
# running the training
train_and_evaluate(OUTDIR, num_train_steps = 2000)

In [None]:
TensorBoard().list()

In [None]:
TensorBoard().stop()