In [None]:
import numpy as np
import pandas as pd
import tensorflow as tf
import matplotlib.pyplot as plt
import seaborn as sns

tf.logging.set_verbosity(tf.logging.INFO)
%matplotlib inline

In [None]:
def load_datasets():
    # reading the datasets
    train = pd.read_csv('../input/train.csv.gz', compression='infer')
    infer = pd.read_csv('../input/test.csv.gz', compression='infer')
    
    train_features = train.drop(['Id','SalePrice'], axis=1)
    train_labels = train['SalePrice']
    infer_features = infer.drop('Id', axis=1)
    
    from sklearn.model_selection import train_test_split
    X_train, X_eval, y_train, y_eval = train_test_split(train_features, train_labels, test_size=0.33, random_state=42)
    
    def get_bins(df):
        bins = {}
        numeric_columns = [
            'LotFrontage', 'LotArea', 'TotalBsmtSF', 'BsmtFinSF2', 'BsmtUnfSF', 'WoodDeckSF',
            'BsmtFinSF1', 'MasVnrArea', '1stFlrSF', '2ndFlrSF', 'LowQualFinSF', 'GrLivArea', 'GarageArea',
            'OpenPorchSF', 'EnclosedPorch', '3SsnPorch', 'ScreenPorch', 'PoolArea', 'MiscVal']
        
        for column in numeric_columns:
            tempdf, retbins = pd.cut(df[column], 10, labels=None, retbins=True, include_lowest=True)
            bins[column] = retbins

        return bins 
    
    bins = get_bins(X_train)
    
    labels, uniques = pd.factorize(['b', 'b', 'a', 'c', 'b'], sort=True)
    
    return X_train, X_eval, y_train, y_eval, infer_features, bins

In [None]:
X_train, X_eval, y_train, y_eval, infer_features, bins = load_datasets()

In [None]:
from tensorflow.feature_column import *

all_columns = X_train.columns
numeric_columns = [
        'LotFrontage', 'LotArea', 'TotalBsmtSF', 'BsmtFinSF2', 'BsmtUnfSF', 'WoodDeckSF',
        'BsmtFinSF1', 'MasVnrArea', '1stFlrSF', '2ndFlrSF', 'LowQualFinSF', 'GrLivArea', 'GarageArea',
        'OpenPorchSF', 'EnclosedPorch', '3SsnPorch', 'ScreenPorch', 'PoolArea', 'MiscVal']
categorical_columns = set(all_columns) - set(numeric_columns)

BUCKETIZED_FEATURES = [bucketized_column(numeric_column(column), boundaries=bins[column].tolist()) for column in 
                      numeric_columns]

CATEGORICAL_FEATURES = [categorical_column_with_identity(key=column, num_buckets=20) for column in categorical_columns]

FEATURE_COLUMNS = []
for feature in BUCKETIZED_FEATURES:
    FEATURE_COLUMNS.append(feature)
    
for feature in CATEGORICAL_FEATURES:
    FEATURE_COLUMNS.append(indicator_column(feature))
    
for column in categorical_columns:
    X_train[column], junk = pd.factorize(X_train[column])
    X_eval[column], junk = pd.factorize(X_eval[column])
    infer_features[column], junk = pd.factorize(infer_features[column])

In [None]:
def train_input_fn(features, labels):
    return tf.estimator.inputs.pandas_input_fn(
                x = features,
                y = labels,
                num_epochs = 2000,
                shuffle = True, 
                batch_size=50)

def eval_input_fn(features, labels):
    return tf.estimator.inputs.pandas_input_fn(
                x = features,
                y = labels,
                num_epochs = 1,
                shuffle = True)

def pred_input_fn(features):
    return tf.estimator.inputs.pandas_input_fn(
                x = features,
                num_epochs = 1,
                shuffle = False)

In [None]:
OUTDIR = './DNN_REGRESSOR'

file_writer = tf.summary.FileWriter(OUTDIR)

estimator = tf.estimator.DNNRegressor(
    feature_columns=FEATURE_COLUMNS, 
    hidden_units=[32,32,32,32,32],
    model_dir=OUTDIR, 
    dropout=0.25,
    optimizer=tf.train.ProximalAdagradOptimizer(
      learning_rate=0.001,
      l1_regularization_strength=0.01
    ))

train_spec = tf.estimator.TrainSpec(input_fn = train_input_fn(X_train, y_train))
eval_spec = tf.estimator.EvalSpec(input_fn = eval_input_fn(X_eval, y_eval))

tf.estimator.train_and_evaluate(estimator, train_spec, eval_spec)