In [1]:
import numpy as np
import pandas as pd
import tensorflow as tf

tf.logging.set_verbosity(tf.logging.INFO)

  from ._conv import register_converters as _register_converters


In [2]:
def load_datasets():
    # reading the datasets
    train = pd.read_csv('../input/train.csv.gz', compression='infer')
    infer = pd.read_csv('../input/test.csv.gz', compression='infer')
    
    train_features = train.drop(['Id','SalePrice'], axis=1)
    train_labels = train['SalePrice']
    infer_features = infer.drop('Id', axis=1)
    
    from sklearn.model_selection import train_test_split
    X_train, X_eval, y_train, y_eval = train_test_split(train_features, train_labels, test_size=0.15, random_state=42)
    
    def get_bins(df):
        bins = {}
        numeric_columns = [
            'LotFrontage', 'LotArea', 'TotalBsmtSF', 'BsmtFinSF2', 'BsmtUnfSF', 'WoodDeckSF',
            'BsmtFinSF1', 'MasVnrArea', '1stFlrSF', '2ndFlrSF', 'LowQualFinSF', 'GrLivArea', 'GarageArea',
            'OpenPorchSF', 'EnclosedPorch', '3SsnPorch', 'ScreenPorch', 'PoolArea', 'MiscVal']
        
        for column in numeric_columns:
            tempdf, retbins = pd.cut(df[column], 10, labels=None, retbins=True, include_lowest=True)
            bins[column] = retbins

        return bins 
    
    bins = get_bins(X_train)
    
    labels, uniques = pd.factorize(['b', 'b', 'a', 'c', 'b'], sort=True)
    
    return X_train, X_eval, y_train, y_eval, infer_features, bins

X_train, X_eval, y_train, y_eval, infer_features, bins = load_datasets()

In [3]:
from tensorflow.feature_column import *

all_columns = X_train.columns
numeric_columns = [
        'LotFrontage', 'LotArea', 'TotalBsmtSF', 'BsmtFinSF2', 'BsmtUnfSF', 'WoodDeckSF',
        'BsmtFinSF1', 'MasVnrArea', '1stFlrSF', '2ndFlrSF', 'LowQualFinSF', 'GrLivArea', 'GarageArea',
        'OpenPorchSF', 'EnclosedPorch', '3SsnPorch', 'ScreenPorch', 'PoolArea', 'MiscVal']
categorical_columns = set(all_columns) - set(numeric_columns)

BUCKETIZED_FEATURES = [bucketized_column(numeric_column(column), boundaries=bins[column].tolist()) for column in 
                      numeric_columns]

CATEGORICAL_FEATURES = [categorical_column_with_identity(key=column, num_buckets=150) for column in categorical_columns]

FEATURE_COLUMNS = []
for feature in BUCKETIZED_FEATURES:
    FEATURE_COLUMNS.append(feature)
    
for feature in CATEGORICAL_FEATURES:
    FEATURE_COLUMNS.append(indicator_column(feature))
    
for column in categorical_columns:
    X_train[column], junk = pd.factorize(X_train[column])
    X_eval[column], junk = pd.factorize(X_eval[column])
    infer_features[column], junk = pd.factorize(infer_features[column])

In [4]:
def train_input_fn(features, labels):
    return tf.estimator.inputs.pandas_input_fn(
                x = features,
                y = labels,
                num_epochs = 1,
                shuffle = True,
                batch_size=50)

def eval_input_fn(features, labels):
    return tf.estimator.inputs.pandas_input_fn(
                x = features,
                y = labels,
                num_epochs = 1,
                shuffle = True)

def pred_input_fn(features):
    return tf.estimator.inputs.pandas_input_fn(
                x = features,
                num_epochs = 1,
                shuffle = False)

In [5]:
def model_fn(features, labels, mode, params):
    # mapping the feature columns as input layer
    net = tf.feature_column.input_layer(features, params['feature_columns'])
    
    # building the NN based on the list passed in 'hidden_units'
    for units in params['hidden_units']:
        net = tf.layers.dense(net, units=units, activation=tf.nn.relu)
        net = tf.layers.dropout(net, rate=0.1, training=True)
        
    # compute the result
    output_layer = tf.layers.dense(net, units=1)
    
    # Reshape the output layer to a 1-dim Tensor to return predictions
    value = tf.squeeze(output_layer, 1)
    
    # where to terminate if mode == predict
    if mode == tf.estimator.ModeKeys.PREDICT:
        return tf.estimator.EstimatorSpec(mode=mode, predictions=value)
    
    # computing the loss function

    loss = tf.sqrt(tf.losses.mean_squared_error(tf.log1p(labels), tf.log1p(value)))
    
    # where to terminate if mode == train
    if mode == tf.estimator.ModeKeys.TRAIN:
        optimizer = tf.train.AdamOptimizer()
        train_op = optimizer.minimize(
            loss=loss,
            global_step=tf.train.get_global_step())
        return tf.estimator.EstimatorSpec(mode=mode, loss=loss, train_op=train_op)
    
    # where to terminate if mode == eval
    return tf.estimator.EstimatorSpec(mode=mode, loss=loss)

In [6]:
OUTDIR = './DNN_CUSTOM_REGRESSOR'

file_writer = tf.summary.FileWriter(OUTDIR)

estimator = tf.estimator.Estimator(
    model_fn=model_fn, 
    model_dir=OUTDIR, 
    params={
        'feature_columns':FEATURE_COLUMNS,
        'hidden_units':[64,64,64,64,64,64]
    })

INFO:tensorflow:Using default config.
INFO:tensorflow:Using config: {'_model_dir': './DNN_CUSTOM_REGRESSOR', '_tf_random_seed': None, '_save_summary_steps': 100, '_save_checkpoints_steps': None, '_save_checkpoints_secs': 600, '_session_config': None, '_keep_checkpoint_max': 5, '_keep_checkpoint_every_n_hours': 10000, '_log_step_count_steps': 100, '_train_distribute': None, '_device_fn': None, '_service': None, '_cluster_spec': <tensorflow.python.training.server_lib.ClusterSpec object at 0x000001F0AE1167F0>, '_task_type': 'worker', '_task_id': 0, '_global_id_in_cluster': 0, '_master': '', '_evaluation_master': '', '_is_chief': True, '_num_ps_replicas': 0, '_num_worker_replicas': 1}


In [7]:
def train_and_evaluate(estimator, epochs=50):
    for i in range(epochs):
        estimator.train(input_fn=train_input_fn(X_train,y_train.astype('float32')))
        estimator.evaluate(input_fn=eval_input_fn(X_eval,y_eval.astype('float32')))
        
train_and_evaluate(estimator)

INFO:tensorflow:Calling model_fn.
INFO:tensorflow:Done calling model_fn.
INFO:tensorflow:Create CheckpointSaverHook.
INFO:tensorflow:Graph was finalized.
INFO:tensorflow:Running local_init_op.
INFO:tensorflow:Done running local_init_op.
INFO:tensorflow:Saving checkpoints for 0 into ./DNN_CUSTOM_REGRESSOR\model.ckpt.
INFO:tensorflow:loss = 12.012163, step = 1
INFO:tensorflow:Saving checkpoints for 25 into ./DNN_CUSTOM_REGRESSOR\model.ckpt.
INFO:tensorflow:Loss for final step: 8.290607.
INFO:tensorflow:Calling model_fn.
INFO:tensorflow:Done calling model_fn.
INFO:tensorflow:Starting evaluation at 2018-08-12-13:32:56
INFO:tensorflow:Graph was finalized.
INFO:tensorflow:Restoring parameters from ./DNN_CUSTOM_REGRESSOR\model.ckpt-25
INFO:tensorflow:Running local_init_op.
INFO:tensorflow:Done running local_init_op.
INFO:tensorflow:Finished evaluation at 2018-08-12-13:32:58
INFO:tensorflow:Saving dict for global step 25: global_step = 25, loss = 8.01183
INFO:tensorflow:Saving 'checkpoint_path

INFO:tensorflow:Running local_init_op.
INFO:tensorflow:Done running local_init_op.
INFO:tensorflow:Saving checkpoints for 175 into ./DNN_CUSTOM_REGRESSOR\model.ckpt.
INFO:tensorflow:loss = 0.39997995, step = 176
INFO:tensorflow:Saving checkpoints for 200 into ./DNN_CUSTOM_REGRESSOR\model.ckpt.
INFO:tensorflow:Loss for final step: 0.28558227.
INFO:tensorflow:Calling model_fn.
INFO:tensorflow:Done calling model_fn.
INFO:tensorflow:Starting evaluation at 2018-08-12-13:34:49
INFO:tensorflow:Graph was finalized.
INFO:tensorflow:Restoring parameters from ./DNN_CUSTOM_REGRESSOR\model.ckpt-200
INFO:tensorflow:Running local_init_op.
INFO:tensorflow:Done running local_init_op.
INFO:tensorflow:Finished evaluation at 2018-08-12-13:34:51
INFO:tensorflow:Saving dict for global step 200: global_step = 200, loss = 0.40634355
INFO:tensorflow:Saving 'checkpoint_path' summary for global step 200: ./DNN_CUSTOM_REGRESSOR\model.ckpt-200
INFO:tensorflow:Calling model_fn.
INFO:tensorflow:Done calling model_fn

INFO:tensorflow:Saving checkpoints for 350 into ./DNN_CUSTOM_REGRESSOR\model.ckpt.
INFO:tensorflow:loss = 0.22952683, step = 351
INFO:tensorflow:Saving checkpoints for 375 into ./DNN_CUSTOM_REGRESSOR\model.ckpt.
INFO:tensorflow:Loss for final step: 0.17353313.
INFO:tensorflow:Calling model_fn.
INFO:tensorflow:Done calling model_fn.
INFO:tensorflow:Starting evaluation at 2018-08-12-13:36:43
INFO:tensorflow:Graph was finalized.
INFO:tensorflow:Restoring parameters from ./DNN_CUSTOM_REGRESSOR\model.ckpt-375
INFO:tensorflow:Running local_init_op.
INFO:tensorflow:Done running local_init_op.
INFO:tensorflow:Finished evaluation at 2018-08-12-13:36:45
INFO:tensorflow:Saving dict for global step 375: global_step = 375, loss = 0.33599606
INFO:tensorflow:Saving 'checkpoint_path' summary for global step 375: ./DNN_CUSTOM_REGRESSOR\model.ckpt-375
INFO:tensorflow:Calling model_fn.
INFO:tensorflow:Done calling model_fn.
INFO:tensorflow:Create CheckpointSaverHook.
INFO:tensorflow:Graph was finalized.


INFO:tensorflow:loss = 0.21052389, step = 526
INFO:tensorflow:Saving checkpoints for 550 into ./DNN_CUSTOM_REGRESSOR\model.ckpt.
INFO:tensorflow:Loss for final step: 0.24497645.
INFO:tensorflow:Calling model_fn.
INFO:tensorflow:Done calling model_fn.
INFO:tensorflow:Starting evaluation at 2018-08-12-13:38:37
INFO:tensorflow:Graph was finalized.
INFO:tensorflow:Restoring parameters from ./DNN_CUSTOM_REGRESSOR\model.ckpt-550
INFO:tensorflow:Running local_init_op.
INFO:tensorflow:Done running local_init_op.
INFO:tensorflow:Finished evaluation at 2018-08-12-13:38:38
INFO:tensorflow:Saving dict for global step 550: global_step = 550, loss = 0.31608105
INFO:tensorflow:Saving 'checkpoint_path' summary for global step 550: ./DNN_CUSTOM_REGRESSOR\model.ckpt-550
INFO:tensorflow:Calling model_fn.
INFO:tensorflow:Done calling model_fn.
INFO:tensorflow:Create CheckpointSaverHook.
INFO:tensorflow:Graph was finalized.
INFO:tensorflow:Restoring parameters from ./DNN_CUSTOM_REGRESSOR\model.ckpt-550
INF

INFO:tensorflow:Loss for final step: 0.13284148.
INFO:tensorflow:Calling model_fn.
INFO:tensorflow:Done calling model_fn.
INFO:tensorflow:Starting evaluation at 2018-08-12-13:40:27
INFO:tensorflow:Graph was finalized.
INFO:tensorflow:Restoring parameters from ./DNN_CUSTOM_REGRESSOR\model.ckpt-725
INFO:tensorflow:Running local_init_op.
INFO:tensorflow:Done running local_init_op.
INFO:tensorflow:Finished evaluation at 2018-08-12-13:40:29
INFO:tensorflow:Saving dict for global step 725: global_step = 725, loss = 0.3106423
INFO:tensorflow:Saving 'checkpoint_path' summary for global step 725: ./DNN_CUSTOM_REGRESSOR\model.ckpt-725
INFO:tensorflow:Calling model_fn.
INFO:tensorflow:Done calling model_fn.
INFO:tensorflow:Create CheckpointSaverHook.
INFO:tensorflow:Graph was finalized.
INFO:tensorflow:Restoring parameters from ./DNN_CUSTOM_REGRESSOR\model.ckpt-725
INFO:tensorflow:Running local_init_op.
INFO:tensorflow:Done running local_init_op.
INFO:tensorflow:Saving checkpoints for 725 into ./

INFO:tensorflow:Done calling model_fn.
INFO:tensorflow:Starting evaluation at 2018-08-12-13:42:16
INFO:tensorflow:Graph was finalized.
INFO:tensorflow:Restoring parameters from ./DNN_CUSTOM_REGRESSOR\model.ckpt-900
INFO:tensorflow:Running local_init_op.
INFO:tensorflow:Done running local_init_op.
INFO:tensorflow:Finished evaluation at 2018-08-12-13:42:18
INFO:tensorflow:Saving dict for global step 900: global_step = 900, loss = 0.30723
INFO:tensorflow:Saving 'checkpoint_path' summary for global step 900: ./DNN_CUSTOM_REGRESSOR\model.ckpt-900
INFO:tensorflow:Calling model_fn.
INFO:tensorflow:Done calling model_fn.
INFO:tensorflow:Create CheckpointSaverHook.
INFO:tensorflow:Graph was finalized.
INFO:tensorflow:Restoring parameters from ./DNN_CUSTOM_REGRESSOR\model.ckpt-900
INFO:tensorflow:Running local_init_op.
INFO:tensorflow:Done running local_init_op.
INFO:tensorflow:Saving checkpoints for 900 into ./DNN_CUSTOM_REGRESSOR\model.ckpt.
INFO:tensorflow:loss = 0.15759237, step = 901
INFO:t

INFO:tensorflow:Starting evaluation at 2018-08-12-13:44:06
INFO:tensorflow:Graph was finalized.
INFO:tensorflow:Restoring parameters from ./DNN_CUSTOM_REGRESSOR\model.ckpt-1075
INFO:tensorflow:Running local_init_op.
INFO:tensorflow:Done running local_init_op.
INFO:tensorflow:Finished evaluation at 2018-08-12-13:44:08
INFO:tensorflow:Saving dict for global step 1075: global_step = 1075, loss = 0.29339403
INFO:tensorflow:Saving 'checkpoint_path' summary for global step 1075: ./DNN_CUSTOM_REGRESSOR\model.ckpt-1075
INFO:tensorflow:Calling model_fn.
INFO:tensorflow:Done calling model_fn.
INFO:tensorflow:Create CheckpointSaverHook.
INFO:tensorflow:Graph was finalized.
INFO:tensorflow:Restoring parameters from ./DNN_CUSTOM_REGRESSOR\model.ckpt-1075
INFO:tensorflow:Running local_init_op.
INFO:tensorflow:Done running local_init_op.
INFO:tensorflow:Saving checkpoints for 1075 into ./DNN_CUSTOM_REGRESSOR\model.ckpt.
INFO:tensorflow:loss = 0.17443192, step = 1076
INFO:tensorflow:Saving checkpoints

INFO:tensorflow:Starting evaluation at 2018-08-12-13:45:58
INFO:tensorflow:Graph was finalized.
INFO:tensorflow:Restoring parameters from ./DNN_CUSTOM_REGRESSOR\model.ckpt-1250
INFO:tensorflow:Running local_init_op.
INFO:tensorflow:Done running local_init_op.
INFO:tensorflow:Finished evaluation at 2018-08-12-13:46:00
INFO:tensorflow:Saving dict for global step 1250: global_step = 1250, loss = 0.2834935
INFO:tensorflow:Saving 'checkpoint_path' summary for global step 1250: ./DNN_CUSTOM_REGRESSOR\model.ckpt-1250


In [None]:
generator = estimator.predict(input_fn=pred_input_fn(infer_features))
predictions = [next(generator) for i in range(len(infer_features))]

In [None]:
ids = pd.read_csv('../input/test.csv.gz', compression='infer')['Id']

submission = pd.DataFrame()
submission['SalePrice'] = predictions
submission['Id'] = ids
submission.set_index('Id', inplace=True)

submission.to_csv('../output/07.dnn_custom_regressor[128x5]_50_epochs_dropout.csv')