In [2]:
import datetime
import pathlib

import pandas as pd


from statsforecast import StatsForecast
from statsforecast.models import AutoARIMA, MSTL
#obtain hierarchical reconciliation methods and evaluation
from hierarchicalforecast.core import HierarchicalReconciliation
from hierarchicalforecast.evaluation import HierarchicalEvaluation
from hierarchicalforecast.methods import BottomUp, TopDown, MiddleOut, MinTrace, ERM, Normality
from hierarchicalforecast.utils import aggregate

  from tqdm.autonotebook import tqdm


In [4]:
root_dir = pathlib.Path("../data")
dir_311 = root_dir/"311"/"2018"

df = pd.read_parquet(dir_311/"processed"/"agency=NYPD")
df = df[df['created_H']>=datetime.date(2018,1,1)]
df

Unnamed: 0,borough,complaint_type,created_H,closed_H,created_date,closed_date,descriptor,status,resolution_description,resolution_action_updated_date,...,created_ag,created_co,created_bo_ag,created_bo_co,open,open_bo,open_ag,open_co,open_bo_ag,open_bo_co
350,BRONX,Blocked Driveway,2018-01-01 00:00:00,2018-01-01 01:00:00,2018-01-01 00:58:30,2018-01-01 01:52:26,Partial Access,Closed,The Police Department responded to the complai...,2018-01-01 01:52:26,...,153,20,16,3,100322,22069,457,93,40,11
351,BRONX,Blocked Driveway,2018-01-01 00:00:00,2018-01-01 07:00:00,2018-01-01 00:39:48,2018-01-01 07:21:11,No Access,Closed,The Police Department responded and upon arriv...,2018-01-01 07:21:11,...,153,20,16,3,100322,22069,457,93,40,11
352,BRONX,Blocked Driveway,2018-01-01 00:00:00,2018-01-01 15:00:00,2018-01-01 00:22:59,2018-01-01 15:58:22,No Access,Closed,The Police Department responded to the complai...,2018-01-01 15:58:22,...,153,20,16,3,100322,22069,457,93,40,11
353,BRONX,Illegal Parking,2018-01-01 00:00:00,2018-01-01 06:00:00,2018-01-01 00:18:13,2018-01-01 06:54:38,Blocked Hydrant,Closed,The Police Department issued a summons in resp...,2018-01-01 06:54:38,...,153,10,16,1,100322,22069,457,65,40,1
354,BRONX,Noise - Residential,2018-01-01 00:00:00,2018-01-01 01:00:00,2018-01-01 00:38:54,2018-01-01 01:48:19,Loud Music/Party,Closed,The Police Department responded to the complai...,2018-01-01 01:48:19,...,153,101,16,12,100322,22069,457,207,40,26
...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...
752257,QUEENS,Non-Emergency Police Matter,2018-12-31 23:00:00,2019-01-01 05:00:00,2018-12-31 23:21:48,2019-01-01 05:57:02,Other (complaint details),Closed,The Police Department responded to the complai...,2019-01-01 05:57:02,...,147,1,37,1,94966,21241,953,12,141,6
752258,STATEN ISLAND,Illegal Parking,2018-12-31 23:00:00,2019-01-01 04:00:00,2018-12-31 23:32:04,2019-01-01 04:00:00,Blocked Hydrant,Closed,The Police Department responded to the complai...,2019-01-01 04:00:00,...,147,22,4,1,94966,5077,953,232,28,8
752259,STATEN ISLAND,Noise - Residential,2018-12-31 23:00:00,2018-12-31 23:00:00,2018-12-31 23:08:31,2018-12-31 23:18:44,Loud Music/Party,Closed,The Police Department responded to the complai...,2018-12-31 23:18:44,...,147,80,4,3,94966,5077,953,326,28,9
752260,STATEN ISLAND,Noise - Residential,2018-12-31 23:00:00,2019-01-01 04:00:00,2018-12-31 23:39:46,2019-01-01 04:09:14,Banging/Pounding,Closed,The Police Department responded to the complai...,2019-01-01 04:09:14,...,147,80,4,3,94966,5077,953,326,28,9


In [None]:
# heirarchies = [
#     ['borough'],
#     [ 'borough','incident_zip'],
#     [ 'borough','incident_zip', 'agency'],
#     [ 'borough','incident_zip', 'agency', 'complaint_type'],
# ]

hdf = df[df['borough']!= 'Unspecified']
bdf, H_df, tags = aggregate(
    hdf,
    spec=[['agency','borough']]
)
hrec = HierarchicalReconciliation(
    reconcilers =
        [
            # MinTrace('ols', nonnegative=True,num_threads=-1),
            ERM('closed'),
            # BottomUp()
        ]
)
Y_df = bdf[(bdf['ds']>datetime.date(year=2018, month=10, day=1)) & (bdf['ds']<datetime.date(year=2018, month=12, day=1))]

models = [MSTL(
    season_length=[24, 24 * 7], # seasonalities of the time series
    trend_forecaster=AutoARIMA(trace=True) # model used to forecast trend
)]
sf = StatsForecast(
    models = models,
    freq = 'H',
    df=Y_df,
    n_jobs=-1
    )
sf.fit()
fcts  = sf.forecast(h=1,df=bdf, level=[90])
StatsForecast.plot(df=Y_df, forecasts_df=fcts[fcts.index.isin(bdf.index)].fillna(0))