# Lab - CarPark Vacancy
Reference: Dataset was extracted from one of the KlangValley shopping mall (updated to Nov 2018)

Exercise: 
1. Predict the carpark vacant status in Zone A.




In [1]:
import tensorflow as tf
import numpy as np
import pandas as pd
import shutil

## Reading dataset

In [3]:
df_sep = pd.read_csv("./data/carpark/2018-09/vacantBays_2018-9.csv")
print('Sep dataset: {}'.format(df_sep.shape))

df_oct = pd.read_csv("./data/carpark/2018-10/vacantBays_2018-10.csv")
#df_oct.dtypes
print('Oct dataset: {}'.format(df_oct.shape))

df_nov = pd.read_csv("./data/carpark/2018-11/vacantBays_2018-11.csv")
print('Nov dataset: {}'.format(df_nov.shape))

df = df_sep.append(df_oct.append(df_nov))
print('Sep - Oct dataset: {}'.format(df.shape))

Sep dataset: (1051, 9)
Oct dataset: (2969, 9)
Nov dataset: (2871, 9)
Sep - Oct dataset: (6891, 9)


## Cleanup & prepare data metadata

In [4]:
#Cleanup numbers

cleanup_nums = {"A": {"FULL": 0},
                "B": {"FULL": 0},
                "C": {"FULL": 0},
                "G": {"FULL": 0},
                "H": {"FULL": 0},
                "P": {"FULL": 0},
                "U": {"FULL": 0},
               }

df.replace(cleanup_nums, inplace=True)
df.dropna(subset=['A','B','C','G','H','P','U'])
print(df.shape)

(6891, 9)


In [5]:
# Convert to datetime

df['A'] = df['A'].astype(np.float64)
df['B'] = df['B'].astype(np.float64)
df['C'] = df['C'].astype(np.float64)
df['G'] = df['G'].astype(np.float64)
df['H'] = df['H'].astype(np.float64)
df['P'] = df['P'].astype(np.float64)
df['U'] = df['U'].astype(np.float64)
df['DateTime'] =  pd.to_datetime(df['DateTime'])
df.dtypes

DateTime      datetime64[ns]
IsoWeekday             int64
A                    float64
B                    float64
C                    float64
G                    float64
H                    float64
P                    float64
U                    float64
dtype: object

In [6]:
# Extract Hour and Minute
df['Hour'] = df.DateTime.apply(lambda x: x.hour)
df['Minute'] = df.DateTime.apply(lambda x: x.minute)
df.head()

Unnamed: 0,DateTime,IsoWeekday,A,B,C,G,H,P,U,Hour,Minute
0,2018-09-20 01:32:30.850890,4,770.0,2592.0,875.0,1410.0,1455.0,310.0,811.0,1,32
1,2018-09-20 01:39:01.932373,4,773.0,2590.0,876.0,1415.0,1459.0,310.0,810.0,1,39
2,2018-09-20 01:39:40.024931,4,773.0,2591.0,876.0,1414.0,1459.0,310.0,810.0,1,39
3,2018-09-20 01:39:52.258908,4,773.0,2591.0,876.0,1415.0,1459.0,310.0,810.0,1,39
4,2018-09-20 01:40:49.225890,4,773.0,2590.0,876.0,1412.0,1458.0,310.0,810.0,1,40


## Classify the carpark vacant status 


In [7]:
df.describe()

Unnamed: 0,IsoWeekday,A,B,C,G,H,P,U,Hour,Minute
count,6891.0,6857.0,6856.0,6857.0,6853.0,6851.0,6855.0,6855.0,6891.0,6891.0
mean,4.010013,697.385883,1440.016044,723.30261,772.057931,779.246679,209.576222,494.911451,11.4844,22.52184
std,1.974669,604.831289,1074.659781,667.102289,569.929676,588.127668,106.128075,287.413354,6.928633,16.762178
min,1.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0
25%,2.0,63.0,292.0,54.0,183.0,166.0,121.0,233.0,5.0,15.0
50%,4.0,739.0,1592.5,726.0,826.0,823.0,254.0,553.0,11.0,30.0
75%,6.0,1057.0,2575.0,1071.0,1372.0,1399.0,305.0,779.0,17.0,39.5
max,7.0,1786.0,2646.0,1921.0,1495.0,1519.0,317.0,831.0,23.0,46.0


In [8]:
# Define status for Carpark Zone A
def A_state(x):
    if x > 1057:
        return 4 # you are early!
    elif x > 739:
        return 3 # plenty of vacancy
    elif x > 63:
        return 2 # limited bay
    elif x > 0:
        return 1 # good luck
    else:
        return 0
    
df['A_State'] = df['A'].apply(A_state)
df.head()

Unnamed: 0,DateTime,IsoWeekday,A,B,C,G,H,P,U,Hour,Minute,A_State
0,2018-09-20 01:32:30.850890,4,770.0,2592.0,875.0,1410.0,1455.0,310.0,811.0,1,32,3
1,2018-09-20 01:39:01.932373,4,773.0,2590.0,876.0,1415.0,1459.0,310.0,810.0,1,39,3
2,2018-09-20 01:39:40.024931,4,773.0,2591.0,876.0,1414.0,1459.0,310.0,810.0,1,39,3
3,2018-09-20 01:39:52.258908,4,773.0,2591.0,876.0,1415.0,1459.0,310.0,810.0,1,39,3
4,2018-09-20 01:40:49.225890,4,773.0,2590.0,876.0,1412.0,1458.0,310.0,810.0,1,40,3


## Split into dataset of: Train, Validation, Test (70%-20%-10%)

In [9]:
df_train0, df_valid0, df_test0 = np.split(df.sample(frac=1), [int(.7*len(df)), int(.9*len(df))])

# select only Featured columns
df_train = df_train0[['A_State','Hour','Minute','IsoWeekday']]
df_valid = df_valid0[['A_State','Hour','Minute','IsoWeekday']]
df_test = df_test0[['A_State','Hour','Minute','IsoWeekday']]

print('Train set: {}'.format(df_train.shape))
print('Validate set: {}'.format(df_valid.shape))
print('Test set: {}'.format(df_test.shape))



Train set: (4823, 4)
Validate set: (1378, 4)
Test set: (690, 4)


## Feature & Label 

In [10]:
FEATURES_NUM = ['Hour','Minute','IsoWeekday']
LABEL = 'A_State'

## Determine metrics for validation - Accuracy

In [11]:
def print_accuracy(model, df):
  metrics = model.evaluate(input_fn = tf.estimator.inputs.pandas_input_fn(
    x = df,
    y = df[LABEL],
    batch_size = 128,
    shuffle = False
  ))
  print('Accuracy on dataset = {}'.format(metrics['accuracy']))
    
#print_accuracy(model, df_valid)

## Model: DNN Classifier

In [13]:
# DNNClassifier
OUTDIR = 'model/lab3_class_carpark'
tf.logging.set_verbosity(tf.logging.INFO)
shutil.rmtree(OUTDIR, ignore_errors = True) # start fresh each time

model = tf.estimator.DNNClassifier(
    hidden_units = [512, 256, 128], 
    feature_columns = [tf.feature_column.numeric_column('Hour'),
                       tf.feature_column.numeric_column('Minute'),
                       tf.feature_column.numeric_column('IsoWeekday')
                      ], 
    activation_fn = tf.nn.relu,
    n_classes=5,
    optimizer=tf.train.AdamOptimizer(
      learning_rate=0.01,
      #l1_regularization_strength=0.001
    ),
    #loss_reduction='weighted_sum',
    model_dir = OUTDIR,
#    config=tf.estimator.RunConfig(save_summary_steps=2,
#                                  save_checkpoints_steps=2)
)

model.train(input_fn = tf.estimator.inputs.pandas_input_fn(
    x = df_train,
    y = df_train[LABEL],
    batch_size = 128,
    num_epochs = 50,
    shuffle = True
  ));

print_accuracy(model, df_valid)

INFO:tensorflow:Using default config.
INFO:tensorflow:Using config: {'_model_dir': 'model/lab3_class_carpark', '_tf_random_seed': None, '_save_summary_steps': 100, '_save_checkpoints_steps': None, '_save_checkpoints_secs': 600, '_session_config': allow_soft_placement: true
graph_options {
  rewrite_options {
    meta_optimizer_iterations: ONE
  }
}
, '_keep_checkpoint_max': 5, '_keep_checkpoint_every_n_hours': 10000, '_log_step_count_steps': 100, '_train_distribute': None, '_device_fn': None, '_protocol': None, '_eval_distribute': None, '_experimental_distribute': None, '_service': None, '_cluster_spec': <tensorflow.python.training.server_lib.ClusterSpec object at 0x129097f60>, '_task_type': 'worker', '_task_id': 0, '_global_id_in_cluster': 0, '_master': '', '_evaluation_master': '', '_is_chief': True, '_num_ps_replicas': 0, '_num_worker_replicas': 1}
INFO:tensorflow:Calling model_fn.
INFO:tensorflow:Done calling model_fn.
INFO:tensorflow:Create CheckpointSaverHook.
INFO:tensorflow:Gra

## Prediction 

In [14]:
predictions = model.predict(input_fn = tf.estimator.inputs.pandas_input_fn(
#    x = pd.DataFrame({
#                      'Hour':[9,10,12],
#                      'Minute':[0,0,0],
#                      'IsoWeekday':[4,4,5]}),
    x = df_test,
    y = None,
    batch_size = 64,
    shuffle = False
  ))

for items in predictions:
  print(items)

INFO:tensorflow:Calling model_fn.
INFO:tensorflow:Done calling model_fn.
INFO:tensorflow:Graph was finalized.
INFO:tensorflow:Restoring parameters from model/lab3_class_carpark/model.ckpt-1884
INFO:tensorflow:Running local_init_op.
INFO:tensorflow:Done running local_init_op.
{'logits': array([-1.2516205, -2.161142 ,  2.0486822, -0.8526667,  1.0988829],
      dtype=float32), 'probabilities': array([0.0246885 , 0.00994247, 0.6695732 , 0.03679241, 0.2590034 ],
      dtype=float32), 'class_ids': array([2]), 'classes': array([b'2'], dtype=object)}
{'logits': array([ -3.267331 , -10.292105 ,  -2.482467 ,   1.0915042,   3.195289 ],
      dtype=float32), 'probabilities': array([1.3848564e-03, 1.2319252e-06, 3.0357558e-03, 1.0824874e-01,
       8.8732940e-01], dtype=float32), 'class_ids': array([4]), 'classes': array([b'4'], dtype=object)}
{'logits': array([  0.48752907, -11.403676  ,  -1.1975534 ,   2.896935  ,
         5.3631115 ], dtype=float32), 'probabilities': array([6.9752904e-03, 4.7783

       2.5021802e-03], dtype=float32), 'class_ids': array([3]), 'classes': array([b'3'], dtype=object)}
{'logits': array([-5.4722667 , -7.237625  ,  0.20896405,  0.14845264,  2.5334358 ],
      dtype=float32), 'probabilities': array([2.8022376e-04, 4.7953439e-05, 8.2192533e-02, 7.7366441e-02,
       8.4011281e-01], dtype=float32), 'class_ids': array([4]), 'classes': array([b'4'], dtype=object)}
{'logits': array([-4.1280947 , -4.917024  ,  0.31164032,  0.40154535,  1.3158112 ],
      dtype=float32), 'probabilities': array([0.00243738, 0.00110738, 0.2065743 , 0.22600682, 0.5638741 ],
      dtype=float32), 'class_ids': array([4]), 'classes': array([b'4'], dtype=object)}
{'logits': array([-3.350757  , -9.219654  ,  0.0813123 ,  2.7674367 ,  0.22775805],
      dtype=float32), 'probabilities': array([1.9164155e-03, 5.4157658e-06, 5.9295055e-02, 8.7013656e-01,
       6.8646602e-02], dtype=float32), 'class_ids': array([3]), 'classes': array([b'3'], dtype=object)}
{'logits': array([-0.36066362,

       1.56478211e-01], dtype=float32), 'class_ids': array([2]), 'classes': array([b'2'], dtype=object)}
{'logits': array([ 5.619867 ,  5.898937 ,  1.0929143, -6.6394053, -1.443691 ],
      dtype=float32), 'probabilities': array([4.2852715e-01, 5.6647021e-01, 4.6339142e-03, 2.0316277e-06,
       3.6670262e-04], dtype=float32), 'class_ids': array([1]), 'classes': array([b'1'], dtype=object)}
{'logits': array([-2.8050177, -5.1624866,  1.3002973,  2.3404682,  1.0404239],
      dtype=float32), 'probabilities': array([3.5689981e-03, 3.3783968e-04, 2.1650200e-01, 6.1263585e-01,
       1.6695534e-01], dtype=float32), 'class_ids': array([3]), 'classes': array([b'3'], dtype=object)}
{'logits': array([-2.1643014, -2.662145 ,  2.926461 , -0.8371478, -1.5043019],
      dtype=float32), 'probabilities': array([0.00588836, 0.00357918, 0.9569388 , 0.02220085, 0.01139274],
      dtype=float32), 'class_ids': array([2]), 'classes': array([b'2'], dtype=object)}
{'logits': array([ 4.03375  ,  5.1777673,  3

       3.1441547e-02], dtype=float32), 'class_ids': array([1]), 'classes': array([b'1'], dtype=object)}
{'logits': array([-1.502237  , -8.476299  , -0.29628628,  2.9502933 ,  0.59290755],
      dtype=float32), 'probabilities': array([1.0171764e-02, 9.5191772e-06, 3.3973005e-02, 8.7318367e-01,
       8.2662024e-02], dtype=float32), 'class_ids': array([3]), 'classes': array([b'3'], dtype=object)}
{'logits': array([ 1.8720708,  3.4684281,  2.5167854, -4.7768464, -3.3925197],
      dtype=float32), 'probabilities': array([1.2743831e-01, 6.2891102e-01, 2.4282646e-01, 1.6508665e-04,
       6.5904873e-04], dtype=float32), 'class_ids': array([1]), 'classes': array([b'1'], dtype=object)}
{'logits': array([ 3.7689455,  4.1486263,  2.8357549, -6.098409 , -5.326722 ],
      dtype=float32), 'probabilities': array([3.5022849e-01, 5.1197034e-01, 1.3774377e-01, 1.8155726e-05,
       3.9278351e-05], dtype=float32), 'class_ids': array([1]), 'classes': array([b'1'], dtype=object)}
{'logits': array([ 4.247

       8.2493347e-01], dtype=float32), 'class_ids': array([4]), 'classes': array([b'4'], dtype=object)}
{'logits': array([-2.161856  ,  0.46863508,  2.4363153 , -0.9095691 , -2.9329457 ],
      dtype=float32), 'probabilities': array([0.00846425, 0.1174887 , 0.8405214 , 0.02961077, 0.00391479],
      dtype=float32), 'class_ids': array([2]), 'classes': array([b'2'], dtype=object)}
{'logits': array([-3.0180767, -2.4213452,  1.9107502,  0.9685877,  2.0665023],
      dtype=float32), 'probabilities': array([0.00280568, 0.0050956 , 0.38779297, 0.1511555 , 0.45315027],
      dtype=float32), 'class_ids': array([4]), 'classes': array([b'4'], dtype=object)}
{'logits': array([ 4.922859  ,  4.526724  ,  0.8179875 , -5.6527576 , -0.70851815],
      dtype=float32), 'probabilities': array([5.9066153e-01, 3.9746538e-01, 9.7412691e-03, 1.5080184e-05,
       2.1167162e-03], dtype=float32), 'class_ids': array([0]), 'classes': array([b'0'], dtype=object)}
{'logits': array([-2.2099135 , -3.3196008 ,  0.5999

{'logits': array([-5.4722667 , -7.237625  ,  0.20896405,  0.14845264,  2.5334358 ],
      dtype=float32), 'probabilities': array([2.8022376e-04, 4.7953439e-05, 8.2192533e-02, 7.7366441e-02,
       8.4011281e-01], dtype=float32), 'class_ids': array([4]), 'classes': array([b'4'], dtype=object)}
{'logits': array([ 3.6813984,  4.4639425,  2.9613981, -5.964963 , -5.4370127],
      dtype=float32), 'probabilities': array([2.7218619e-01, 5.9527916e-01, 1.3248721e-01, 1.7599672e-05,
       2.9839426e-05], dtype=float32), 'class_ids': array([1]), 'classes': array([b'1'], dtype=object)}
{'logits': array([-0.12122652,  0.22154017,  2.3352036 , -0.82752603, -2.1292028 ],
      dtype=float32), 'probabilities': array([0.06802874, 0.09584162, 0.7934261 , 0.03356993, 0.00913354],
      dtype=float32), 'class_ids': array([2]), 'classes': array([b'2'], dtype=object)}
{'logits': array([-1.9771712, -2.6053731,  2.8101976,  2.2112482, -1.4756185],
      dtype=float32), 'probabilities': array([0.00528853, 0.

{'logits': array([ 3.3189766,  4.6521444,  2.3828192, -6.1891203, -3.963685 ],
      dtype=float32), 'probabilities': array([1.9282934e-01, 7.3140937e-01, 7.5614505e-02, 1.4317232e-05,
       1.3254194e-04], dtype=float32), 'class_ids': array([1]), 'classes': array([b'1'], dtype=object)}
{'logits': array([ 2.619414 ,  3.2014203,  2.5702612, -5.880436 , -4.6103725],
      dtype=float32), 'probabilities': array([2.6719472e-01, 4.7817844e-01, 2.5437891e-01, 5.4373857e-05,
       1.9362949e-04], dtype=float32), 'class_ids': array([1]), 'classes': array([b'1'], dtype=object)}
{'logits': array([ 0.46108273, -9.807383  , -0.03999203,  3.4462674 ,  3.759515  ],
      dtype=float32), 'probabilities': array([2.0632919e-02, 7.1618126e-07, 1.2501054e-02, 4.0832871e-01,
       5.5853659e-01], dtype=float32), 'class_ids': array([4]), 'classes': array([b'4'], dtype=object)}
{'logits': array([-11.082168  ,  -5.0786066 ,   2.2954242 ,   0.53799176,
         4.005224  ], dtype=float32), 'probabilities':

       2.9486869e-02], dtype=float32), 'class_ids': array([3]), 'classes': array([b'3'], dtype=object)}
{'logits': array([ 3.1608012 ,  4.1945634 ,  0.9965245 , -5.195709  , -0.25056207],
      dtype=float32), 'probabilities': array([2.5254515e-01, 7.1006197e-01, 2.9000510e-02, 5.9313410e-05,
       8.3330264e-03], dtype=float32), 'class_ids': array([1]), 'classes': array([b'1'], dtype=object)}
{'logits': array([ 4.1934295 ,  2.9148746 ,  0.34765607, -4.711631  ,  0.16518617],
      dtype=float32), 'probabilities': array([7.58869052e-01, 2.11299017e-01, 1.62169300e-02, 1.02978964e-04,
       1.35121075e-02], dtype=float32), 'class_ids': array([0]), 'classes': array([b'0'], dtype=object)}
{'logits': array([  6.0583863, -18.57157  , -11.198004 ,  -6.533806 ,  15.656945 ],
      dtype=float32), 'probabilities': array([6.7821842e-05, 1.3636881e-15, 2.1727711e-12, 2.3048900e-10,
       9.9993217e-01], dtype=float32), 'class_ids': array([4]), 'classes': array([b'4'], dtype=object)}
{'logits'

{'logits': array([-2.9402845, -1.7217233,  2.865963 ,  1.4968688, -1.6131699],
      dtype=float32), 'probabilities': array([0.00235263, 0.00795734, 0.781943  , 0.19887725, 0.00886977],
      dtype=float32), 'class_ids': array([2]), 'classes': array([b'2'], dtype=object)}
{'logits': array([ 3.9485917,  4.727165 ,  3.1556377, -6.5243297, -5.8052483],
      dtype=float32), 'probabilities': array([2.7540961e-01, 5.9994197e-01, 1.2462453e-01, 7.7919631e-06,
       1.5993377e-05], dtype=float32), 'class_ids': array([1]), 'classes': array([b'1'], dtype=object)}
{'logits': array([-1.8859828 , -7.890307  ,  0.29480433,  2.6721733 ,  3.056554  ],
      dtype=float32), 'probabilities': array([4.0751724e-03, 1.0057754e-05, 3.6078606e-02, 3.8880152e-01,
       5.7103461e-01], dtype=float32), 'class_ids': array([4]), 'classes': array([b'4'], dtype=object)}
{'logits': array([-18.358063 , -14.559309 ,   4.713544 ,   1.6144474,  13.855154 ],
      dtype=float32), 'probabilities': array([1.0231240e-14,

{'logits': array([  2.9020836, -15.36886  ,  -8.1586895,  -4.2643247,  11.778659 ],
      dtype=float32), 'probabilities': array([1.3960197e-04, 1.6215188e-12, 2.1941149e-09, 1.0778538e-07,
       9.9986029e-01], dtype=float32), 'class_ids': array([4]), 'classes': array([b'4'], dtype=object)}
{'logits': array([ 4.863526  ,  4.05403   , -0.049312  , -6.3327208 ,  0.48171604],
      dtype=float32), 'probabilities': array([6.8261695e-01, 3.0382076e-01, 5.0183232e-03, 9.3693343e-06,
       8.5345628e-03], dtype=float32), 'class_ids': array([0]), 'classes': array([b'0'], dtype=object)}
{'logits': array([-3.00068  , -1.4252074,  2.6905868,  1.3135889, -1.7159212],
      dtype=float32), 'probabilities': array([0.0026283 , 0.01270263, 0.77868223, 0.19648874, 0.00949813],
      dtype=float32), 'class_ids': array([2]), 'classes': array([b'2'], dtype=object)}
{'logits': array([ -1.3956602, -10.703215 ,  -4.7473664,  -1.0112844,   5.59923  ],
      dtype=float32), 'probabilities': array([9.1445487

In [15]:
df_test.head()

Unnamed: 0,A_State,Hour,Minute,IsoWeekday
1476,2,10,15,2
2500,4,2,45,6
1624,4,23,30,3
1638,3,3,0,4
2095,2,20,45,4
