In [1]:
import pandas as pd
import pygeohash as gh
import matplotlib.pyplot as plt
import numpy as np
import scipy

In [2]:
def denoise(series, pctile):

    """
    Denoises a time series by projecting the series to the frequency domain and
    silencing frequencies less than the frequency threshold.

    Parameters:

    series: ndarray
        - series to denoised

    pctile: int
        - percentile threshold of denoising. In the frequency domain, all
          frequency components with intensities less than the threshold.
    """

    sff = scipy.fft(series)    
    abs_sff = abs(sff)
    sff[abs_sff < np.percentile(abs_sff, q=pctile)] = 0
    cleaned_series = np.abs(scipy.ifft(sff))

    return cleaned_series

In [3]:
df_train = pd.read_csv('Traffic Management/training.csv')
df_train['data_type'] = 'training'

df_test = pd.read_csv('Traffic Management/dummytest.csv')
df_test['data_type'] = 'holdout'

df_traintest = pd.concat([df_train, df_test])
df_traintest['timestamp'] = pd.to_datetime(df_traintest['timestamp'], format= '%H:%M').dt.time
df_traintest.head()

Unnamed: 0,geohash6,day,timestamp,demand,data_type
0,qp03wc,18,20:00:00,0.020072,training
1,qp03pn,10,14:30:00,0.024721,training
2,qp09sw,9,06:15:00,0.102821,training
3,qp0991,32,05:00:00,0.088755,training
4,qp090q,15,04:00:00,0.074468,training


In [4]:
# CREATE DUMMY DATETIME VALUES
dates = pd.DataFrame()
dates['dummy_date'] = pd.date_range(start=pd.datetime(2019, 1, 1),
                              periods=len(df_traintest.day.unique())+1)
dates['day'] = np.arange(1, dates.shape[0] + 1)

In [5]:
timenum = pd.DataFrame(pd.date_range(start=dates.dummy_date.min(),
                                     end=dates.dummy_date.max(),
                                     freq='15min'), columns=['dummy_datetime'])

timenum['dummy_date'] = pd.to_datetime(timenum['dummy_datetime'].dt.date)
timenum['timestamp'] = timenum['dummy_datetime'].dt.time

timenum['T_n'] = np.arange(timenum.shape[0])
timenum = timenum.merge(dates, on='dummy_date', how='left')

del timenum['dummy_datetime'], timenum['dummy_date']
timenum.head()

Unnamed: 0,timestamp,T_n,day
0,00:00:00,0,1
1,00:15:00,1,1
2,00:30:00,2,1
3,00:45:00,3,1
4,01:00:00,4,1


In [7]:
pctile = 45
window = 92

In [9]:
# SET AS DATETIME INDICES
df_traintest2 = df_traintest.merge(timenum, on=['day', 'timestamp'], how='left')

g = df_traintest2.groupby(['geohash6'])

all_data = []

for loc in g.groups.keys():
    
    test = g.get_group(loc)
    dummy = timenum[timenum.T_n <= test.T_n.max()]
    dummy = dummy.merge(test[['data_type','T_n', 'demand']], on='T_n', how='inner').fillna(0)

    dummy['geohash6'] = loc
    dummy['lat'] = gh.decode(loc)[0]
    dummy['long'] = gh.decode(loc)[1]
    
    dummy['demand_fft'] = denoise(dummy.demand.values, pctile)
    
    for fwd in range(-2,-6,-1):
        dummy[f'demand+{-1*fwd}'] = dummy['demand'].shift(fwd+1)

    for bwd in range(1, window + 1):
        dummy[f'demand_fft-{bwd}'] = dummy['demand_fft'].shift(bwd)
    
    dummy = dummy.dropna()    
    dummy = dummy[dummy['data_type'] == 'holdout']    
    
    all_data.append(dummy)

test_data = pd.concat(all_data)
test_data.head()

Unnamed: 0,timestamp,T_n,day,data_type,demand,geohash6,lat,long,demand_fft,demand+2,...,demand_fft-83,demand_fft-84,demand_fft-85,demand_fft-86,demand_fft-87,demand_fft-88,demand_fft-89,demand_fft-90,demand_fft-91,demand_fft-92
577,02:45:00,5867,62,holdout,0.020592,qp02yc,-5.48,90.7,0.027832,0.010292,...,0.003353,0.030117,0.016688,0.029827,0.009256,0.034402,0.018682,0.044374,0.00964,0.01028
578,03:00:00,5868,62,holdout,0.010292,qp02yc,-5.48,90.7,0.000855,0.006676,...,0.039929,0.003353,0.030117,0.016688,0.029827,0.009256,0.034402,0.018682,0.044374,0.00964
579,04:00:00,5872,62,holdout,0.006676,qp02yc,-5.48,90.7,0.013886,0.003822,...,0.01848,0.039929,0.003353,0.030117,0.016688,0.029827,0.009256,0.034402,0.018682,0.044374
580,04:30:00,5874,62,holdout,0.003822,qp02yc,-5.48,90.7,0.004838,0.011131,...,0.000338,0.01848,0.039929,0.003353,0.030117,0.016688,0.029827,0.009256,0.034402,0.018682
581,06:45:00,5883,62,holdout,0.011131,qp02yc,-5.48,90.7,0.015758,0.013487,...,0.02806,0.000338,0.01848,0.039929,0.003353,0.030117,0.016688,0.029827,0.009256,0.034402


In [26]:
compare_actual = test_data[['geohash6', 'day','timestamp', 'T_n',
                        'demand','demand+2','demand+3','demand+4',
                        'demand+5']].copy()
compare_actual.head()

Unnamed: 0,geohash6,day,timestamp,T_n,demand,demand+2,demand+3,demand+4,demand+5
577,qp02yc,62,02:45:00,5867,0.020592,0.010292,0.006676,0.003822,0.011131
578,qp02yc,62,03:00:00,5868,0.010292,0.006676,0.003822,0.011131,0.013487
579,qp02yc,62,04:00:00,5872,0.006676,0.003822,0.011131,0.013487,0.003709
580,qp02yc,62,04:30:00,5874,0.003822,0.011131,0.013487,0.003709,0.011041
581,qp02yc,62,06:45:00,5883,0.011131,0.013487,0.003709,0.011041,0.040743


In [13]:
to_predict = test_data.drop(['timestamp', 'T_n', 'day', 'data_type',
                             'demand', 'geohash6','demand_fft',
                             'demand+2','demand+3','demand+4','demand+5'],
                            axis=1)
to_predict.head()

Unnamed: 0,lat,long,demand_fft-1,demand_fft-2,demand_fft-3,demand_fft-4,demand_fft-5,demand_fft-6,demand_fft-7,demand_fft-8,...,demand_fft-83,demand_fft-84,demand_fft-85,demand_fft-86,demand_fft-87,demand_fft-88,demand_fft-89,demand_fft-90,demand_fft-91,demand_fft-92
577,-5.48,90.7,0.010659,0.020209,0.034297,0.068202,0.004567,0.02538,0.001686,0.04026,...,0.003353,0.030117,0.016688,0.029827,0.009256,0.034402,0.018682,0.044374,0.00964,0.01028
578,-5.48,90.7,0.027832,0.010659,0.020209,0.034297,0.068202,0.004567,0.02538,0.001686,...,0.039929,0.003353,0.030117,0.016688,0.029827,0.009256,0.034402,0.018682,0.044374,0.00964
579,-5.48,90.7,0.000855,0.027832,0.010659,0.020209,0.034297,0.068202,0.004567,0.02538,...,0.01848,0.039929,0.003353,0.030117,0.016688,0.029827,0.009256,0.034402,0.018682,0.044374
580,-5.48,90.7,0.013886,0.000855,0.027832,0.010659,0.020209,0.034297,0.068202,0.004567,...,0.000338,0.01848,0.039929,0.003353,0.030117,0.016688,0.029827,0.009256,0.034402,0.018682
581,-5.48,90.7,0.004838,0.013886,0.000855,0.027832,0.010659,0.020209,0.034297,0.068202,...,0.02806,0.000338,0.01848,0.039929,0.003353,0.030117,0.016688,0.029827,0.009256,0.034402


In [14]:
from keras.models import load_model

In [23]:
model = load_model('model1.hdf5')
preds = model.predict(to_predict.values)

In [27]:
compare_actual['pred_demand'] = preds[:,0]
compare_actual['pred_demand+2'] = preds[:,1]
compare_actual['pred_demand+3'] = preds[:,2]
compare_actual['pred_demand+4'] = preds[:,3]
compare_actual['pred_demand+5'] = preds[:,4]

compare_actual

Unnamed: 0,geohash6,day,timestamp,T_n,demand,demand+2,demand+3,demand+4,demand+5,pred_demand,pred_demand+2,pred_demand+3,pred_demand+4,pred_demand+5
577,qp02yc,62,02:45:00,5867,0.020592,0.010292,0.006676,0.003822,0.011131,0.023836,0.023909,0.025075,0.025549,0.025957
578,qp02yc,62,03:00:00,5868,0.010292,0.006676,0.003822,0.011131,0.013487,0.025623,0.025715,0.026089,0.026962,0.027151
579,qp02yc,62,04:00:00,5872,0.006676,0.003822,0.011131,0.013487,0.003709,0.020764,0.022543,0.023680,0.024522,0.025512
580,qp02yc,62,04:30:00,5874,0.003822,0.011131,0.013487,0.003709,0.011041,0.020393,0.021332,0.021505,0.022597,0.022828
581,qp02yc,62,06:45:00,5883,0.011131,0.013487,0.003709,0.011041,0.040743,0.017685,0.019102,0.018623,0.019930,0.020291
582,qp02yc,62,11:15:00,5901,0.013487,0.003709,0.011041,0.040743,0.026320,0.018003,0.019934,0.018781,0.019866,0.020679
583,qp02yc,62,12:00:00,5904,0.003709,0.011041,0.040743,0.026320,0.006989,0.016041,0.017773,0.017913,0.018744,0.019391
584,qp02yc,63,04:30:00,5970,0.011041,0.040743,0.026320,0.006989,0.031419,0.014043,0.015789,0.015368,0.016346,0.017170
585,qp02yc,63,05:00:00,5972,0.040743,0.026320,0.006989,0.031419,0.012687,0.017039,0.018474,0.018184,0.019420,0.019756
586,qp02yc,63,08:45:00,5987,0.026320,0.006989,0.031419,0.012687,0.027531,0.023936,0.023509,0.023333,0.024846,0.024926
