# Lab - Rental Price 
Reference: Dataset was extracted from one of the popular property listing site (updated to Sep 2018)

Exercise: 
1. Try train on all states dataset, then predict the rental based on selected features
2. Try train on only Selangor dataset, then predict the rental based on selected features



In [1]:
import tensorflow as tf
import numpy as np
import pandas as pd
import shutil

## Read dataset


In [2]:
df_orig = pd.read_csv("https://github.com/chailing/trainerdata/raw/master/RentIndexRaw.csv", low_memory=False)
COLUMNS = list(df_orig)
print (COLUMNS)

['ID', 'BuiltUpSize', 'City', 'CreatedDateTime', 'CreatedUser', 'DataSourceID', 'Furnishing', 'Latitude', 'Longitude', 'NoOfBathroom', 'NoOfBedroom', 'NoOfParking', 'Postcode', 'PostedDate', 'PropertyAddress', 'PropertyName', 'PropertyType', 'RentalPerMth', 'SourceUrl', 'State', 'UpdatedDateTime', 'UpdatedUser']


## Preparing dataset 


In [3]:
print('Original dataset: {}'.format(df_orig.shape))

df_orig['State'] = df_orig['State'].str.upper()
df_orig['City'] = df_orig['City'].str.upper()
df_orig['PropertyType'] = df_orig['PropertyType'].str.upper()

# Selangor
df_sel = df_orig[(df_orig.State == 'SELANGOR')]
print('Selangor dataset: {}'.format(df_sel.shape))

# KL
df_kul = df_orig[(df_orig.State == 'KUALA-LUMPUR')]
print('KL dataset: {}'.format(df_kul.shape))

# Use Selangor data
# remove rows with zero value
df = df_sel[(df_sel[['BuiltUpSize','RentalPerMth', 'NoOfBedroom']] != 0).all(axis=1)]
                            #'NoOfBathroom', 'NoOfParking']] != 0).all(axis=1)]


print('Wanted dataset: {}'.format(df.shape))

Original dataset: (54405, 22)
Selangor dataset: (30292, 22)
KL dataset: (13648, 22)
Wanted dataset: (20626, 22)


## Split into dataset of: Train, Validation, Test (70%-20%-10%)


In [4]:
df_train0, df_valid0, df_test0 = np.split(df.sample(frac=1), [int(.7*len(df)), int(.9*len(df))])

# Retrieve only Featured columns
df_train = df_train0[['State','BuiltUpSize','RentalPerMth','NoOfBedroom','Furnishing','PropertyType']]
df_valid = df_valid0[['State','BuiltUpSize','RentalPerMth','NoOfBedroom','Furnishing','PropertyType']]
df_test = df_test0[['State','BuiltUpSize','RentalPerMth','NoOfBedroom','Furnishing','PropertyType']]

print('Train: {}'.format(df_train.shape))
print('Valid: {}'.format(df_valid.shape))
print('Test: {}'.format(df_test.shape))

Train: (14438, 6)
Valid: (4125, 6)
Test: (2063, 6)


## Categorize string value columns


In [5]:
df_train['PropertyType'].unique()

array(['CONDOMINIUM', 'SERVICED RESIDENCE', 'APARTMENT',
       '2-STY TERRACE/LINK HOUSE', 'SEMI-DETACHED HOUSE', 'TOWNHOUSE',
       'FLAT', 'BUNGALOW', '2.5-STY TERRACE/LINK HOUSE',
       '1-STY TERRACE/LINK HOUSE', '3-STY TERRACE/LINK HOUSE',
       '4-STY TERRACE/LINK HOUSE', '1.5-STY TERRACE/LINK HOUSE',
       '3.5-STY TERRACE/LINK HOUSE', 'CLUSTER HOUSE'], dtype=object)

In [6]:
df_train['State'].unique()

array(['SELANGOR'], dtype=object)

## Determine metrics for validation - RMSE

In [7]:
def print_rmse(model, df):
  metrics = model.evaluate(input_fn = tf.estimator.inputs.pandas_input_fn(
    x = df,
    y = df[LABEL],
    batch_size = 128,
    shuffle = False,
    #queue_capacity = 100
      
  ))
  print('RMSE on dataset = {}'.format(np.sqrt(metrics['average_loss'])))
    
#print_rmse(model, df_valid)

## Model: DNNRegressor 


In [8]:
# DNNRegressor

LABEL = 'RentalPerMth'
OUTDIR = 'rentalDNNRegression_trained'
tf.logging.set_verbosity(tf.logging.INFO)
shutil.rmtree(OUTDIR, ignore_errors = True) # start fresh each time

# Categorize string features
categorical_column1 = tf.feature_column.categorical_column_with_vocabulary_list(
    key="State", vocabulary_list=['SELANGOR', 'JOHOR', 'KUALA-LUMPUR', 'PUTRAJAYA'], 
    default_value=0)

categorical_column2 = tf.feature_column.categorical_column_with_vocabulary_list(
    key="Furnishing", vocabulary_list=['Unfurnished', 'Fully Furnished', 'Partly Furnished', 'Unknown'], 
    default_value=0)

categorical_column3 = tf.feature_column.categorical_column_with_vocabulary_list(
    key="PropertyType", vocabulary_list=['CONDOMINIUM', 'SERVICED RESIDENCE', 'APARTMENT',
       '2-STY TERRACE/LINK HOUSE', '3-STY TERRACE/LINK HOUSE',
       '1-STY TERRACE/LINK HOUSE', 'TOWNHOUSE', 'SEMI-DETACHED HOUSE',
       'BUNGALOW', '1.5-STY TERRACE/LINK HOUSE',
       '3.5-STY TERRACE/LINK HOUSE', 'CLUSTER HOUSE', 'FLAT',
       '2.5-STY TERRACE/LINK HOUSE', '4-STY TERRACE/LINK HOUSE',
       'RESIDENTIAL LAND'], 
    default_value=0)


model = tf.estimator.DNNRegressor(    
    hidden_units = [32, 512, 128],
    feature_columns = [tf.feature_column.numeric_column('BuiltUpSize'),
                       tf.feature_column.numeric_column('NoOfBedroom'),
                       #tf.feature_column.indicator_column(categorical_column1),
                       tf.feature_column.indicator_column(categorical_column2),
                       tf.feature_column.indicator_column(categorical_column3)
                      ], 
    activation_fn = tf.nn.relu,
  optimizer=tf.train.AdamOptimizer(
        learning_rate=0.001,
        #l1_regularization_strength=0.01
    ),

    model_dir = OUTDIR
)
 

model.train(input_fn = tf.estimator.inputs.pandas_input_fn(
    x = df_train,
    y = df_train[LABEL],
    batch_size = 512,
    num_epochs = 5,
    shuffle = True,
  )
  );

print_rmse(model, df_valid)

INFO:tensorflow:Using default config.
INFO:tensorflow:Using config: {'_model_dir': 'rentalDNNRegression_trained', '_tf_random_seed': None, '_save_summary_steps': 100, '_save_checkpoints_steps': None, '_save_checkpoints_secs': 600, '_session_config': allow_soft_placement: true
graph_options {
  rewrite_options {
    meta_optimizer_iterations: ONE
  }
}
, '_keep_checkpoint_max': 5, '_keep_checkpoint_every_n_hours': 10000, '_log_step_count_steps': 100, '_train_distribute': None, '_device_fn': None, '_protocol': None, '_eval_distribute': None, '_experimental_distribute': None, '_experimental_max_worker_delay_secs': None, '_service': None, '_cluster_spec': <tensorflow.python.training.server_lib.ClusterSpec object at 0x0000000012146358>, '_task_type': 'worker', '_task_id': 0, '_global_id_in_cluster': 0, '_master': '', '_evaluation_master': '', '_is_chief': True, '_num_ps_replicas': 0, '_num_worker_replicas': 1}
Instructions for updating:
Use Variable.read_value. Variables in 2.X are initiali

Instructions for updating:
Use `tf.cast` instead.
INFO:tensorflow:Done calling model_fn.
INFO:tensorflow:Create CheckpointSaverHook.
INFO:tensorflow:Graph was finalized.
INFO:tensorflow:Running local_init_op.
INFO:tensorflow:Done running local_init_op.
Instructions for updating:
To construct input pipelines, use the `tf.data` module.
INFO:tensorflow:Saving checkpoints for 0 into rentalDNNRegression_trained\model.ckpt.
INFO:tensorflow:loss = 2313351700.0, step = 1
INFO:tensorflow:global_step/sec: 88.1834
INFO:tensorflow:loss = 1018126460.0, step = 101 (1.131 sec)
INFO:tensorflow:Saving checkpoints for 141 into rentalDNNRegression_trained\model.ckpt.
INFO:tensorflow:Loss for final step: 1018701700.0.
INFO:tensorflow:Calling model_fn.


INFO:tensorflow:Done calling model_fn.
INFO:tensorflow:Starting evaluation at 2019-12-13T16:31:56Z
INFO:tensorflow:Graph was finalized.
Instructions for updating:
Use standard file APIs to check for files with this prefix.
INFO:tensorflow:Restoring parameters from rentalDNNRegression_trained\model.ckpt-141
INFO:tensorflow:Running local_init_op.
INFO:tensorflow:Done running local_init_op.
INFO:tensorflow:Finished evaluation at 2019-12-13-16:31:57
INFO:tensorflow:Saving dict for global step 141: average_loss = 466784930.0, global_step = 141, label/mean = 2758.84, loss = 58348114000.0, prediction/mean = 2096.0527
INFO:tensorflow:Saving 'checkpoint_path' summary for global step 141: rentalDNNRegression_trained\model.ckpt-141
RMSE on dataset = 21605.205078125


## Prediction 

In [9]:
predictions = model.predict(input_fn = tf.estimator.inputs.pandas_input_fn(
#    x = pd.DataFrame({
#                      'BuiltUpSize':[560,1200,1600],
#                      'NoOfBedroom':[2,3,3],
#                      'PropertyType':['CONDOMINIUM', 'SERVICED RESIDENCE', 'APARTMENT'],
#                      'Furnishing':['Unfurnished', 'Fully Furnished', 'Partly Furnished']}),
    x = df_test,
    y = None,
    batch_size = 128,
    shuffle = False
  ))

for items in predictions:
  print(items)

INFO:tensorflow:Calling model_fn.
INFO:tensorflow:Done calling model_fn.
INFO:tensorflow:Graph was finalized.
INFO:tensorflow:Restoring parameters from rentalDNNRegression_trained\model.ckpt-141
INFO:tensorflow:Running local_init_op.
INFO:tensorflow:Done running local_init_op.
{'predictions': array([1274.0582], dtype=float32)}
{'predictions': array([1257.6813], dtype=float32)}
{'predictions': array([2225.0554], dtype=float32)}
{'predictions': array([2950.614], dtype=float32)}
{'predictions': array([1017.2314], dtype=float32)}
{'predictions': array([1732.3352], dtype=float32)}
{'predictions': array([1799.6702], dtype=float32)}
{'predictions': array([1692.4247], dtype=float32)}
{'predictions': array([1630.7362], dtype=float32)}
{'predictions': array([1440.9702], dtype=float32)}
{'predictions': array([4055.0076], dtype=float32)}
{'predictions': array([1568.4421], dtype=float32)}
{'predictions': array([1858.9897], dtype=float32)}
{'predictions': array([1110.3618], dtype=float32)}
{'predict

{'predictions': array([1806.8724], dtype=float32)}
{'predictions': array([1216.1128], dtype=float32)}
{'predictions': array([2795.602], dtype=float32)}
{'predictions': array([1755.549], dtype=float32)}
{'predictions': array([1689.1804], dtype=float32)}
{'predictions': array([3722.807], dtype=float32)}
{'predictions': array([1911.2153], dtype=float32)}
{'predictions': array([1354.1031], dtype=float32)}
{'predictions': array([1760.4432], dtype=float32)}
{'predictions': array([1329.4248], dtype=float32)}
{'predictions': array([2896.8843], dtype=float32)}
{'predictions': array([1491.1595], dtype=float32)}
{'predictions': array([1567.8867], dtype=float32)}
{'predictions': array([1626.5696], dtype=float32)}
{'predictions': array([1625.7837], dtype=float32)}
{'predictions': array([1955.5333], dtype=float32)}
{'predictions': array([2040.3506], dtype=float32)}
{'predictions': array([1202.4365], dtype=float32)}
{'predictions': array([1846.5681], dtype=float32)}
{'predictions': array([1015.7068],

{'predictions': array([1936.7412], dtype=float32)}
{'predictions': array([1685.8892], dtype=float32)}
{'predictions': array([1764.6816], dtype=float32)}
{'predictions': array([2283.6663], dtype=float32)}
{'predictions': array([1961.7217], dtype=float32)}
{'predictions': array([2454.9255], dtype=float32)}
{'predictions': array([4402.05], dtype=float32)}
{'predictions': array([1567.8867], dtype=float32)}
{'predictions': array([1782.5414], dtype=float32)}
{'predictions': array([2102.414], dtype=float32)}
{'predictions': array([970.8337], dtype=float32)}
{'predictions': array([1662.8103], dtype=float32)}
{'predictions': array([1967.9849], dtype=float32)}
{'predictions': array([1531.6725], dtype=float32)}
{'predictions': array([1807.2811], dtype=float32)}
{'predictions': array([1441.8308], dtype=float32)}
{'predictions': array([1929.3135], dtype=float32)}
{'predictions': array([2141.2073], dtype=float32)}
{'predictions': array([1321.742], dtype=float32)}
{'predictions': array([1697.3468], d

{'predictions': array([1108.2817], dtype=float32)}
{'predictions': array([2325.552], dtype=float32)}
{'predictions': array([2164.347], dtype=float32)}
{'predictions': array([2602.2449], dtype=float32)}
{'predictions': array([2094.3093], dtype=float32)}
{'predictions': array([1123.2522], dtype=float32)}
{'predictions': array([1567.8867], dtype=float32)}
{'predictions': array([4643.5547], dtype=float32)}
{'predictions': array([1033.019], dtype=float32)}
{'predictions': array([2767.5107], dtype=float32)}
{'predictions': array([1772.9084], dtype=float32)}
{'predictions': array([1431.1907], dtype=float32)}
{'predictions': array([1531.6725], dtype=float32)}
{'predictions': array([1076.0372], dtype=float32)}
{'predictions': array([1157.063], dtype=float32)}
{'predictions': array([1529.5925], dtype=float32)}
{'predictions': array([1983.5026], dtype=float32)}
{'predictions': array([2532.2683], dtype=float32)}
{'predictions': array([5865.721], dtype=float32)}
{'predictions': array([4956.677], dt

{'predictions': array([2214.0066], dtype=float32)}
{'predictions': array([3453.9966], dtype=float32)}
{'predictions': array([1419.7329], dtype=float32)}
{'predictions': array([1753.6327], dtype=float32)}
{'predictions': array([1929.3135], dtype=float32)}
{'predictions': array([2583.8298], dtype=float32)}
{'predictions': array([1579.0824], dtype=float32)}
{'predictions': array([2125.0593], dtype=float32)}
{'predictions': array([1810.4021], dtype=float32)}
{'predictions': array([3136.2786], dtype=float32)}
{'predictions': array([1567.8867], dtype=float32)}
{'predictions': array([1858.9897], dtype=float32)}
{'predictions': array([2099.5168], dtype=float32)}
{'predictions': array([2017.7053], dtype=float32)}
{'predictions': array([2029.1929], dtype=float32)}
{'predictions': array([2398.5984], dtype=float32)}
{'predictions': array([4425.9893], dtype=float32)}
{'predictions': array([3687.8184], dtype=float32)}
{'predictions': array([1548.1858], dtype=float32)}
{'predictions': array([1711.353

{'predictions': array([1927.2336], dtype=float32)}
{'predictions': array([1837.5558], dtype=float32)}
{'predictions': array([2141.6328], dtype=float32)}
{'predictions': array([5528.6396], dtype=float32)}
{'predictions': array([2299.0623], dtype=float32)}
{'predictions': array([1789.6768], dtype=float32)}
{'predictions': array([2337.3865], dtype=float32)}
{'predictions': array([1384.0059], dtype=float32)}
{'predictions': array([3476.0466], dtype=float32)}
{'predictions': array([1782.4326], dtype=float32)}
{'predictions': array([2336.284], dtype=float32)}
{'predictions': array([1691.8826], dtype=float32)}
{'predictions': array([1323.9004], dtype=float32)}
{'predictions': array([1880.379], dtype=float32)}
{'predictions': array([2582.3054], dtype=float32)}
{'predictions': array([1936.7412], dtype=float32)}
{'predictions': array([1763.5789], dtype=float32)}
{'predictions': array([2454.687], dtype=float32)}
{'predictions': array([2215.9229], dtype=float32)}
{'predictions': array([1110.3618],

{'predictions': array([1606.3131], dtype=float32)}
{'predictions': array([927.5402], dtype=float32)}
{'predictions': array([1571.3995], dtype=float32)}
{'predictions': array([1033.019], dtype=float32)}
{'predictions': array([2780.4011], dtype=float32)}
{'predictions': array([1663.4744], dtype=float32)}
{'predictions': array([1764.6816], dtype=float32)}
{'predictions': array([3366.7825], dtype=float32)}
{'predictions': array([1461.6956], dtype=float32)}
{'predictions': array([2767.071], dtype=float32)}
{'predictions': array([1293.4556], dtype=float32)}
{'predictions': array([2775.3455], dtype=float32)}
{'predictions': array([1827.2926], dtype=float32)}
{'predictions': array([1978.2953], dtype=float32)}
{'predictions': array([1491.1595], dtype=float32)}
{'predictions': array([4940.6997], dtype=float32)}
{'predictions': array([3484.6382], dtype=float32)}
{'predictions': array([1725.8467], dtype=float32)}
{'predictions': array([2390.4731], dtype=float32)}
{'predictions': array([4792.509], 

{'predictions': array([3430.9175], dtype=float32)}
{'predictions': array([1771.3838], dtype=float32)}
{'predictions': array([834.13763], dtype=float32)}
{'predictions': array([1867.0197], dtype=float32)}
{'predictions': array([2073.389], dtype=float32)}
{'predictions': array([1925.6305], dtype=float32)}
{'predictions': array([1597.8444], dtype=float32)}
{'predictions': array([3484.6382], dtype=float32)}
{'predictions': array([2206.3235], dtype=float32)}
{'predictions': array([1478.6608], dtype=float32)}
{'predictions': array([2582.9219], dtype=float32)}
{'predictions': array([2029.8569], dtype=float32)}
{'predictions': array([1814.7937], dtype=float32)}
{'predictions': array([1262.6924], dtype=float32)}
{'predictions': array([2123.7732], dtype=float32)}
{'predictions': array([1295.1753], dtype=float32)}
{'predictions': array([1325.425], dtype=float32)}
{'predictions': array([1909.0571], dtype=float32)}
{'predictions': array([3135.3704], dtype=float32)}
{'predictions': array([1713.8585]

{'predictions': array([1771.0066], dtype=float32)}
{'predictions': array([1076.0372], dtype=float32)}
{'predictions': array([1818.3302], dtype=float32)}
{'predictions': array([2081.1018], dtype=float32)}
{'predictions': array([1218.2712], dtype=float32)}
{'predictions': array([2492.8113], dtype=float32)}
{'predictions': array([2253.4167], dtype=float32)}
{'predictions': array([2123.7732], dtype=float32)}
{'predictions': array([2398.5984], dtype=float32)}
{'predictions': array([1017.2314], dtype=float32)}
{'predictions': array([2572.781], dtype=float32)}
{'predictions': array([1773.5723], dtype=float32)}
{'predictions': array([2325.552], dtype=float32)}
{'predictions': array([2767.9797], dtype=float32)}
{'predictions': array([2030.3403], dtype=float32)}
