# Lab - CarPark Vacancy
Reference: Dataset was extracted from one of the KlangValley shopping mall (updated to Nov 2018)

Exercise: 
1. Predict the carpark vacant status in Zone A.




In [1]:
import tensorflow as tf
import numpy as np
import pandas as pd
import shutil

  from ._conv import register_converters as _register_converters


## Reading dataset

In [2]:
df_sep = pd.read_csv("./data/carpark/2018-09/vacantBays_2018-9.csv")
print('Sep dataset: {}'.format(df_sep.shape))

df_oct = pd.read_csv("./data/carpark/2018-10/vacantBays_2018-10.csv")
#df_oct.dtypes
print('Oct dataset: {}'.format(df_oct.shape))

df_nov = pd.read_csv("./data/carpark/2018-11/vacantBays_2018-11.csv")
print('Nov dataset: {}'.format(df_nov.shape))

df = df_sep.append(df_oct.append(df_nov))
print('Sep - Oct dataset: {}'.format(df.shape))

Sep dataset: (1051, 9)
Oct dataset: (2969, 9)
Nov dataset: (2871, 9)
Sep - Oct dataset: (6891, 9)


## Cleanup & prepare data metadata

In [3]:
#Cleanup numbers

cleanup_nums = {"A": {"FULL": 0},
                "B": {"FULL": 0},
                "C": {"FULL": 0},
                "G": {"FULL": 0},
                "H": {"FULL": 0},
                "P": {"FULL": 0},
                "U": {"FULL": 0},
               }

df.replace(cleanup_nums, inplace=True)
df.dropna(subset=['A','B','C','G','H','P','U'])
print(df.shape)

(6891, 9)


In [4]:
# Convert to datetime

df['A'] = df['A'].astype(np.float64)
df['B'] = df['B'].astype(np.float64)
df['C'] = df['C'].astype(np.float64)
df['G'] = df['G'].astype(np.float64)
df['H'] = df['H'].astype(np.float64)
df['P'] = df['P'].astype(np.float64)
df['U'] = df['U'].astype(np.float64)
df['DateTime'] =  pd.to_datetime(df['DateTime'])
df.dtypes

DateTime      datetime64[ns]
IsoWeekday             int64
A                    float64
B                    float64
C                    float64
G                    float64
H                    float64
P                    float64
U                    float64
dtype: object

In [5]:
# Extract Hour and Minute
df['Hour'] = df.DateTime.apply(lambda x: x.hour)
df['Minute'] = df.DateTime.apply(lambda x: x.minute)
df.head()

Unnamed: 0,DateTime,IsoWeekday,A,B,C,G,H,P,U,Hour,Minute
0,2018-09-20 01:32:30.850890,4,770.0,2592.0,875.0,1410.0,1455.0,310.0,811.0,1,32
1,2018-09-20 01:39:01.932373,4,773.0,2590.0,876.0,1415.0,1459.0,310.0,810.0,1,39
2,2018-09-20 01:39:40.024931,4,773.0,2591.0,876.0,1414.0,1459.0,310.0,810.0,1,39
3,2018-09-20 01:39:52.258908,4,773.0,2591.0,876.0,1415.0,1459.0,310.0,810.0,1,39
4,2018-09-20 01:40:49.225890,4,773.0,2590.0,876.0,1412.0,1458.0,310.0,810.0,1,40


## Classify the carpark vacant status 


In [6]:
df.describe()

Unnamed: 0,IsoWeekday,A,B,C,G,H,P,U,Hour,Minute
count,6891.0,6857.0,6856.0,6857.0,6853.0,6851.0,6855.0,6855.0,6891.0,6891.0
mean,4.010013,697.385883,1440.016044,723.30261,772.057931,779.246679,209.576222,494.911451,11.4844,22.52184
std,1.974669,604.831289,1074.659781,667.102289,569.929676,588.127668,106.128075,287.413354,6.928633,16.762178
min,1.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0
25%,2.0,63.0,292.0,54.0,183.0,166.0,121.0,233.0,5.0,15.0
50%,4.0,739.0,1592.5,726.0,826.0,823.0,254.0,553.0,11.0,30.0
75%,6.0,1057.0,2575.0,1071.0,1372.0,1399.0,305.0,779.0,17.0,39.5
max,7.0,1786.0,2646.0,1921.0,1495.0,1519.0,317.0,831.0,23.0,46.0


In [8]:
# Define status for Carpark Zone A
def A_state(x):
    if x > 1057:
        return 4 # you are early!
    elif x > 739:
        return 3 # plenty of vacancy
    elif x > 63:
        return 2 # limited bay
    elif x > 0:
        return 1 # good luck
    else:
        return 0
    
df['A_State'] = df['A'].apply(A_state)
df.head()

Unnamed: 0,DateTime,IsoWeekday,A,B,C,G,H,P,U,Hour,Minute,A_State
0,2018-09-20 01:32:30.850890,4,770.0,2592.0,875.0,1410.0,1455.0,310.0,811.0,1,32,3
1,2018-09-20 01:39:01.932373,4,773.0,2590.0,876.0,1415.0,1459.0,310.0,810.0,1,39,3
2,2018-09-20 01:39:40.024931,4,773.0,2591.0,876.0,1414.0,1459.0,310.0,810.0,1,39,3
3,2018-09-20 01:39:52.258908,4,773.0,2591.0,876.0,1415.0,1459.0,310.0,810.0,1,39,3
4,2018-09-20 01:40:49.225890,4,773.0,2590.0,876.0,1412.0,1458.0,310.0,810.0,1,40,3


## Split into dataset of: Train, Validation, Test (70%-20%-10%)

In [9]:
df_train0, df_valid0, df_test0 = np.split(df.sample(frac=1), [int(.7*len(df)), int(.9*len(df))])

# select only Featured columns
df_train = df_train0[['A_State','Hour','Minute','IsoWeekday']]
df_valid = df_valid0[['A_State','Hour','Minute','IsoWeekday']]
df_test = df_test0[['A_State','Hour','Minute','IsoWeekday']]

print('Train set: {}'.format(df_train.shape))
print('Validate set: {}'.format(df_valid.shape))
print('Test set: {}'.format(df_test.shape))



Train set: (4823, 4)
Validate set: (1378, 4)
Test set: (690, 4)


## Feature & Label 

In [12]:
FEATURES_NUM = ['Hour','Minute','IsoWeekday']
LABEL = 'A_State'

## Determine metrics for validation - Accuracy

In [13]:
def print_accuracy(model, df):
  metrics = model.evaluate(input_fn = tf.estimator.inputs.pandas_input_fn(
    x = df,
    y = df[LABEL],
    batch_size = 128,
    shuffle = False
  ))
  print('Accuracy on dataset = {}'.format(metrics['accuracy']))
    
#print_accuracy(model, df_valid)

## Model: DNN Classifier

In [31]:
# DNNClassifier
OUTDIR = 'model/lab3_class_carpark'
tf.logging.set_verbosity(tf.logging.INFO)
shutil.rmtree(OUTDIR, ignore_errors = True) # start fresh each time

model = tf.estimator.DNNClassifier(
    hidden_units = [512, 312, 128], 
    feature_columns = [tf.feature_column.numeric_column('Hour'),
                       tf.feature_column.numeric_column('Minute'),
                       tf.feature_column.numeric_column('IsoWeekday')
                      ], 
    activation_fn = tf.nn.sigmoid,
    n_classes=5,
    optimizer=tf.train.AdamOptimizer(
      learning_rate=0.01,
      #l1_regularization_strength=0.001
    ),
    #loss_reduction='weighted_sum',
    model_dir = OUTDIR,
#    config=tf.estimator.RunConfig(save_summary_steps=2,
#                                  save_checkpoints_steps=2)
)

model.train(input_fn = tf.estimator.inputs.pandas_input_fn(
    x = df_train,
    y = df_train[LABEL],
    batch_size = 128,
    num_epochs = 50,
    shuffle = True
  ));

print_accuracy(model, df_valid)

INFO:tensorflow:Using default config.
INFO:tensorflow:Using config: {'_model_dir': 'model/lab3_class_carpark', '_tf_random_seed': None, '_save_summary_steps': 100, '_save_checkpoints_steps': None, '_save_checkpoints_secs': 600, '_session_config': allow_soft_placement: true
graph_options {
  rewrite_options {
    meta_optimizer_iterations: ONE
  }
}
, '_keep_checkpoint_max': 5, '_keep_checkpoint_every_n_hours': 10000, '_log_step_count_steps': 100, '_train_distribute': None, '_device_fn': None, '_protocol': None, '_eval_distribute': None, '_experimental_distribute': None, '_service': None, '_cluster_spec': <tensorflow.python.training.server_lib.ClusterSpec object at 0x000001D6796BD8D0>, '_task_type': 'worker', '_task_id': 0, '_global_id_in_cluster': 0, '_master': '', '_evaluation_master': '', '_is_chief': True, '_num_ps_replicas': 0, '_num_worker_replicas': 1}
INFO:tensorflow:Calling model_fn.
INFO:tensorflow:Done calling model_fn.
INFO:tensorflow:Create CheckpointSaverHook.
INFO:tensorf

## Prediction 

In [18]:
predictions = model.predict(input_fn = tf.estimator.inputs.pandas_input_fn(
#    x = pd.DataFrame({
#                      'Hour':[9,10,12],
#                      'Minute':[0,0,0],
#                      'IsoWeekday':[4,4,5]}),
    x = df_test,
    y = None,
    batch_size = 64,
    shuffle = False
  ))

for items in predictions:
  print(items)

INFO:tensorflow:Calling model_fn.
INFO:tensorflow:Done calling model_fn.
INFO:tensorflow:Graph was finalized.
INFO:tensorflow:Restoring parameters from model/lab3_class_carpark\model.ckpt-1884
INFO:tensorflow:Running local_init_op.
INFO:tensorflow:Done running local_init_op.
{'logits': array([ 0.26504046,  2.8733363 ,  1.4843765 , -3.7997832 , -2.557054  ],
      dtype=float32), 'probabilities': array([0.0554401 , 0.7526491 , 0.18766141, 0.00095169, 0.00329765],
      dtype=float32), 'class_ids': array([1], dtype=int64), 'classes': array([b'1'], dtype=object)}
{'logits': array([-4.3693743, -9.77875  , -0.3319178,  5.4110975,  1.8310905],
      dtype=float32), 'probabilities': array([5.4837608e-05, 2.4536709e-07, 3.1083047e-03, 9.6980280e-01,
       2.7033739e-02], dtype=float32), 'class_ids': array([3], dtype=int64), 'classes': array([b'3'], dtype=object)}
{'logits': array([ 3.55969   ,  2.7824867 , -0.48370007, -5.7998214 , -2.1324072 ],
      dtype=float32), 'probabilities': array([6

       7.0450321e-02], dtype=float32), 'class_ids': array([3], dtype=int64), 'classes': array([b'3'], dtype=object)}
{'logits': array([-4.9454737 , -6.836342  ,  0.8744346 ,  4.3889694 ,  0.38586307],
      dtype=float32), 'probabilities': array([8.4273488e-05, 1.2720300e-05, 2.8395202e-02, 9.5408726e-01,
       1.7420523e-02], dtype=float32), 'class_ids': array([3], dtype=int64), 'classes': array([b'3'], dtype=object)}
{'logits': array([-0.8218971,  1.1933823,  1.4958076, -1.955532 , -1.364913 ],
      dtype=float32), 'probabilities': array([0.05112992, 0.3836187 , 0.51908857, 0.01645673, 0.02970614],
      dtype=float32), 'class_ids': array([2], dtype=int64), 'classes': array([b'2'], dtype=object)}
{'logits': array([-2.7526882 , -0.64849585,  2.0273001 , -0.53087974, -0.16023159],
      dtype=float32), 'probabilities': array([0.00662734, 0.05434738, 0.7893363 , 0.06113061, 0.08855827],
      dtype=float32), 'class_ids': array([2], dtype=int64), 'classes': array([b'2'], dtype=object)}

In [25]:
items

{'class_ids': array([4], dtype=int64),
 'classes': array([b'4'], dtype=object),
 'logits': array([-1.2666327 , -8.36466   ,  0.16994056,  2.745716  ,  5.44993   ],
       dtype=float32),
 'probabilities': array([1.1280862e-03, 9.3262702e-07, 4.7450196e-03, 6.2356699e-02,
        9.3176931e-01], dtype=float32)}

In [19]:
df_test.head()

Unnamed: 0,A_State,Hour,Minute,IsoWeekday
453,1,17,30,5
1460,3,6,15,2
2446,0,13,15,5
1323,2,20,0,7
475,4,23,0,5
