In [53]:
import datetime
from itertools import chain, combinations
from time import perf_counter

import pandas as pd
import seaborn as sns
import matplotlib.pyplot as plt
import pyarrow as pa
import numpy as np
from utils import load_saved_data, get_connection, get_data
from statsforecast import StatsForecast
from statsforecast.models import AutoARIMA, MSTL
#obtain hierarchical reconciliation methods and evaluation
from hierarchicalforecast.core import HierarchicalReconciliation
from hierarchicalforecast.evaluation import HierarchicalEvaluation
from hierarchicalforecast.methods import BottomUp, TopDown, MiddleOut, MinTrace, ERM, Normality
from hierarchicalforecast.utils import aggregate

In [54]:
# df = load_saved_data()
# conn = get_connection()
# df = get_data(conn, 2018, 2018, 2_000_000, 0)
df = pd.read_feather("../data/2018.feather")

In [55]:
current_date = pd.Timestamp.now().normalize()
df = df.drop(df[df["closed_date"]>current_date].index)
df["borough"].fillna("Unspecified", inplace=True)

In [56]:
df.dtypes

created_date                      timestamp[ns][pyarrow]
closed_date                       timestamp[ns][pyarrow]
agency                                   string[pyarrow]
complaint_type                           string[pyarrow]
status                                   string[pyarrow]
resolution_action_updated_date    timestamp[ns][pyarrow]
borough                                  string[pyarrow]
descriptor                               string[pyarrow]
resolution_description                   string[pyarrow]
incident_zip                             string[pyarrow]
city                                     string[pyarrow]
latitude                                 double[pyarrow]
longitude                                double[pyarrow]
due_date                          timestamp[ns][pyarrow]
bbl                                      string[pyarrow]
dtype: object

In [57]:
display(df.describe())
display(df.describe(include='string'))


Unnamed: 0,created_date,closed_date,resolution_action_updated_date,latitude,longitude,due_date
count,1999999,1965899,1983911,1854230.0,1854230.0,913147
mean,2018-08-22 09:45:46.341392896,2018-09-09 20:00:57.793048576,2018-09-09 12:56:04.531892736,40.72587,-73.92462,2018-09-02 06:03:13.878392064
min,2018-04-13 18:06:41,2007-12-04 00:00:00,2013-05-17 00:00:00,40.49895,-74.25495,2018-04-13 19:07:03
25%,2018-06-19 00:18:28.500000,2018-06-26 11:17:00,2018-06-26 16:39:58.500000,40.6643,-73.97041,2018-06-24 05:16:25.500000
50%,2018-08-22 15:04:00,2018-09-02 08:50:00,2018-09-03 08:47:10,40.71663,-73.92741,2018-08-27 10:05:02
75%,2018-10-24 22:29:08.500000,2018-11-06 15:55:07,2018-11-07 10:52:21.500000,40.79664,-73.87562,2018-10-30 09:39:17.500000
max,2019-01-31 14:56:00,2024-02-12 11:40:56,2024-02-12 11:40:59,40.91294,-73.70038,2022-07-21 02:22:26
std,,,,0.08583189,0.08448542,


Unnamed: 0,agency,complaint_type,status,borough,descriptor,resolution_description,incident_zip,city,bbl
count,1999999,1999999,1999999,1999999,1983581,1856568,1914078,1914606,1561122
unique,29,247,9,6,1174,807,588,414,362441
top,NYPD,Noise - Residential,Closed,BROOKLYN,Loud Music/Party,The Police Department responded to the complai...,11226,BROOKLYN,4015110001
freq,576240,161779,1953447,635678,185466,208711,32007,620153,1562


In [58]:
df = df.loc[df["closed_date"]>df["created_date"]]
df["hours_to_complete"] =  pd.to_datetime(df["closed_date"]) - pd.to_datetime(df["created_date"])
df['hours_to_complete'] = df.loc[:,"hours_to_complete"].astype("float64") / (3.6e12)


In [59]:
# df[(df["status"]!="Closed")].groupby("agency")["closed_date"].count()
# df[(df["borough"]=="Unspecified") &  (df["latitude"].isnull()) & (df["incident_zip"].isnull())].count()
# unclosed = df[(df["status"]!="Closed")|(df["closed_date"] == pd.NaT)]["agency"].value_counts()
# unclosed.plot.bar(title="Number of Unclosed Requests by Agency", xlabel="Agency", ylabel="Unclosed Request Count")
# unclosed = df[df["status"]!="Closed"]["agency"].value_counts()
# unclosed.plot.bar(title="Number of Unclosed Requests by Agency", xlabel="Agency", ylabel="Unclosed Request Count")

In [60]:
# cl_hr_1 = df["closed_date"].groupby([df["closed_date"].dt.date, df["closed_date"].dt.hour]).value_counts().cumsum()
# conver to hours
# create cumulative sum by summing value count by date for closed date and created date
# subtract the series of cumsums from each other to get number of open per hour

In [61]:
def add_created_count_feat(df:pd.DataFrame,
                           features:list[str],
                           inplace = False):
    trunc_name = "_".join(["created"]+[name[:2] for name in features])
    created_by_date = df.groupby(
    by=[
        pd.to_datetime(df["created_date"].astype("int64")).dt.floor("H"),
        *features
        ]
    )["created_date"].count()

    # ab["time"] = pd.to_datetime(ab["created_date"].astype(str)+" "+ab["hour"].astype(str)+":00:00")
    # ab.drop(["created_date", "hour"], axis=1, inplace=True)
    # ab.set_index("time", inplace=True)
    if not inplace:
        return created_by_date.unstack().fillna(0)
    else:
        df.set_index(
            [
                pd.to_datetime(df["closed_date"].astype("int64")).dt.floor("H"),
                *features
            ],
            inplace=True)
        df[trunc_name] = created_by_date.fillna(0)


# add_created_count_feat(df, ["agency", "borough"])

In [62]:
# df["closed_hour"] = pd.to_datetime(df["closed_date"].astype("int64")).dt.floor("H")
# df.set_index(['closed_hour'], inplace=True)
# df['closed_date'].count(level=df.index.names)

In [63]:
def add_open_count_feat(df:pd.DataFrame, features:list[str]):
    # col name for new feature
    trunc_name = "_".join(["open"]+[name[:2] for name in features])


    # set the index to features and date closed

    df.set_index(['closed_hour']+features, inplace=True)

    # sort by closed date
    df.sort_index(axis=0, level='closed_hour', ascending=True, inplace=True)

    # count number closed per hour
    t_num_closed = df["closed_date"].groupby(level=list(range(df.index.nlevels))).count()
    t_num_closed:pd.Series = t_num_closed[t_num_closed.index.get_level_values('closed_hour') < df['created_hour'].max()]
    t_num_closed = t_num_closed.unstack(level=features, fill_value=0).cumsum()

    # remove closed date from index
    df.reset_index("closed_hour", drop=False, inplace=True)
    # df.reset_index(features, drop=False, inplace=True)

    # Add created date to index
    df.set_index("created_hour", append=True, inplace=True)
    # if features:
    #     df = df.reorder_levels(['created_hour']+features)
    df.sort_index(level="created_hour", ascending=True, inplace=True)

    # count number created per hour
    t_num_created = df["created_date"].groupby(level=list(range(df.index.nlevels))).count()

    # unstack to get columns for all features
    # get cumsum over the features then restack into a series
    t_num_created = t_num_created.unstack(level=features, fill_value=0).cumsum()

    # change index of closed to be more like created
    t_num_closed.index.rename("created_hour", inplace=True)
    missing_indices = t_num_closed.index.union(t_num_created.index)
    # extra_indices = t_num_closed.index.difference(t_num_created.index)

    t_num_closed = t_num_closed.reindex(missing_indices, method='ffill').stack(level=features)
    # # t_num_closed = t_num_closed[t_num_created.index]#.fillna(0)
    # t_num_closed = t_num_closed[~t_num_closed.index.isin(extra_indices)]#.fillna(0)

    # # stack back into a series
    # t_num_closed = t_num_closed.stack(level=features)
    t_num_created = t_num_created.stack(level=features)
    t_num_closed.rename(t_num_created.name, inplace=True)

    # difference to get number open at a given hour
    diff = t_num_created - t_num_closed
    # diff:pd.Series = t_num_created.combine(t_num_closed, lambda x,y: x-y, fill_value=0)
    if not all([a == b for a, b in zip(diff.index.names, df.index.names)]):
        diff = diff.reorder_levels(df.index.names)

    # add to the dataframe and reset the index
    df[trunc_name] = diff
    df.reset_index(inplace=True)
    # df.reset_index("created_hour", inplace=True)

In [64]:
# calculate number of open requests divided by a powerset of s
s = ["borough", "agency","complaint_type"]
df["created_hour"] = pd.to_datetime(df["created_date"].astype("int64")).dt.floor("H")
df["closed_hour"] = pd.to_datetime(df["closed_date"].astype("int64")).dt.floor("H")
for features in chain.from_iterable(combinations(s, r) for r in range(len(s)+1)):
    if not features:
        continue
    if 'agency' in features and 'complaint_type' in features:
        continue
    print(features)
    start = perf_counter()
    add_open_count_feat(df, list(features))
    print(perf_counter()-start,"seconds")
    print()
    # break

('borough',)
2.4226456000005783 seconds

('agency',)
2.2377831000003425 seconds

('complaint_type',)
5.288200599999982 seconds

('borough', 'agency')
5.632052900000417 seconds

('borough', 'complaint_type')
32.824607900000046 seconds



In [69]:
df

Unnamed: 0,borough,complaint_type,created_hour,closed_hour,agency,created_date,closed_date,status,resolution_action_updated_date,descriptor,...,latitude,longitude,due_date,bbl,hours_to_complete,open_bo,open_ag,open_co,open_bo_ag,open_bo_co
0,BRONX,Animal Abuse,2018-04-13 18:00:00,2018-04-13 18:00:00,NYPD,2018-04-13 18:22:30,2018-04-13 18:36:30,Closed,2018-04-13 18:36:30,Tortured,...,40.858047,-73.899575,2018-04-14 02:22:30,2031590007,0.233333,40,104,3,20,0
1,BRONX,Blocked Driveway,2018-04-13 18:00:00,2018-04-13 23:00:00,NYPD,2018-04-13 18:55:19,2018-04-13 23:20:04,Closed,2018-04-13 23:20:04,No Access,...,40.835039,-73.861072,2018-04-14 02:55:19,,4.412500,40,104,15,20,1
2,BRONX,Consumer Complaint,2018-04-13 18:00:00,2018-04-16 11:00:00,DCA,2018-04-13 18:28:46,2018-04-16 11:34:38,Closed,2018-04-16 11:34:38,Labor Violation,...,40.877235,-73.838846,2018-05-22 11:33:37,2047540011,65.097778,40,4,4,3,3
3,BRONX,Consumer Complaint,2018-04-13 18:00:00,2018-04-16 14:00:00,DCA,2018-04-13 18:25:13,2018-04-16 14:34:12,Closed,2018-04-16 14:34:12,Receipt Incomplete/Not Given,...,40.828693,-73.877949,2018-04-17 18:25:13,2037740006,68.149722,40,4,4,3,3
4,BRONX,Consumer Complaint,2018-04-13 18:00:00,2018-04-29 01:00:00,DCA,2018-04-13 18:21:40,2018-04-29 01:06:33,Closed,2018-04-29 01:06:33,Damaged/Defective Goods,...,40.853031,-73.905272,2018-05-01 11:03:44,2031690059,366.748056,40,4,4,3,3
...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...
1886364,STATEN ISLAND,Noise - Residential,2018-12-31 23:00:00,2018-12-31 23:00:00,NYPD,2018-12-31 23:08:31,2018-12-31 23:18:44,Closed,2018-12-31 23:18:44,Loud Music/Party,...,40.586026,-74.102371,2019-01-01 07:08:31,5035320500,0.170278,4821,953,326,28,9
1886365,STATEN ISLAND,Noise - Residential,2018-12-31 23:00:00,2019-01-01 04:00:00,NYPD,2018-12-31 23:39:46,2019-01-01 04:09:14,Closed,2019-01-01 04:09:14,Banging/Pounding,...,40.604853,-74.076186,2019-01-01 07:39:46,5030340022,4.491111,4821,953,326,28,9
1886366,STATEN ISLAND,Noise - Residential,2018-12-31 23:00:00,2019-01-01 07:00:00,NYPD,2018-12-31 23:14:00,2019-01-01 07:40:25,Closed,2019-01-01 07:40:25,Loud Music/Party,...,40.545260,-74.218247,2019-01-01 07:14:00,5070810014,8.440278,4821,953,326,28,9
1886367,STATEN ISLAND,Sewer,2018-12-31 23:00:00,2019-01-02 09:00:00,DEP,2018-12-31 23:36:00,2019-01-02 09:30:00,Closed,2019-01-02 09:30:00,Street Flooding (SJ),...,40.560476,-74.166889,,,33.900000,4821,3598,1368,175,61


In [None]:
created_hour         borough
2018-04-13 18:00:00  BRONX               40
                     BROOKLYN            73
                     MANHATTAN           48
                     QUEENS              55
                     STATEN ISLAND       16
                                      ...
2018-12-31 23:00:00  BROOKLYN         28979
                     MANHATTAN        20712
                     QUEENS           19795
                     STATEN ISLAND     4821
                     Unspecified       1364
Length: 37764, dtype: int64[pyarrow]

In [40]:
df

Unnamed: 0,borough,created_hour,closed_hour,created_date,closed_date,agency,complaint_type,status,resolution_action_updated_date,descriptor,resolution_description,incident_zip,city,latitude,longitude,due_date,bbl,hours_to_complete,open_bo
0,BRONX,2018-04-13 18:00:00,2018-04-13 18:00:00,2018-04-13 18:50:09,2018-04-13 18:55:40,NYPD,Noise - Commercial,Closed,2018-04-13 18:55:40,Banging/Pounding,The Police Department responded to the complai...,10452,BRONX,40.844081,-73.916359,2018-04-14 02:50:09,2028590030,0.091944,40
1,BRONX,2018-04-13 18:00:00,2018-04-13 18:00:00,2018-04-13 18:22:30,2018-04-13 18:36:30,NYPD,Animal Abuse,Closed,2018-04-13 18:36:30,Tortured,Your request can not be processed at this time...,10458,BRONX,40.858047,-73.899575,2018-04-14 02:22:30,2031590007,0.233333,40
2,BRONX,2018-04-13 18:00:00,2018-04-13 18:00:00,2018-04-13 18:08:41,2018-04-13 18:19:20,NYPD,Noise - Vehicle,Closed,2018-04-13 18:19:20,Car/Truck Music,The Police Department reviewed your complaint ...,10453,BRONX,40.854215,-73.913613,2018-04-14 02:08:41,2032210088,0.177500,40
3,BRONX,2018-04-13 18:00:00,2018-04-13 19:00:00,2018-04-13 18:07:03,2018-04-13 19:44:21,DHS,Homeless Person Assistance,Closed,2018-04-13 19:44:21,,The mobile outreach response team offered serv...,10469,BRONX,40.870897,-73.847059,2018-04-13 19:07:03,2047590034,1.621667,40
4,BRONX,2018-04-13 18:00:00,2018-04-13 20:00:00,2018-04-13 18:52:58,2018-04-13 20:27:46,NYPD,Noise - Vehicle,Closed,2018-04-13 20:27:46,Car/Truck Music,The Police Department responded to the complai...,10460,BRONX,40.849537,-73.883658,2018-04-14 02:52:58,2031120030,1.580000,40
...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...
1886364,STATEN ISLAND,2018-12-31 23:00:00,2019-01-01 04:00:00,2018-12-31 23:39:46,2019-01-01 04:09:14,NYPD,Noise - Residential,Closed,2019-01-01 04:09:14,Banging/Pounding,The Police Department responded to the complai...,10305,STATEN ISLAND,40.604853,-74.076186,2019-01-01 07:39:46,5030340022,4.491111,4821
1886365,STATEN ISLAND,2018-12-31 23:00:00,2019-01-01 04:00:00,2018-12-31 23:32:04,2019-01-01 04:00:00,NYPD,Illegal Parking,Closed,2019-01-01 04:00:00,Blocked Hydrant,The Police Department responded to the complai...,10301,STATEN ISLAND,40.632584,-74.098126,2019-01-01 07:32:04,,4.465556,4821
1886366,STATEN ISLAND,2018-12-31 23:00:00,2019-01-01 07:00:00,2018-12-31 23:14:00,2019-01-01 07:40:25,NYPD,Noise - Residential,Closed,2019-01-01 07:40:25,Loud Music/Party,The Police Department responded to the complai...,10309,STATEN ISLAND,40.545260,-74.218247,2019-01-01 07:14:00,5070810014,8.440278,4821
1886367,STATEN ISLAND,2018-12-31 23:00:00,2019-01-02 09:00:00,2018-12-31 23:36:00,2019-01-02 09:30:00,DEP,Sewer,Closed,2019-01-02 09:30:00,Street Flooding (SJ),The Department of Environmental Protection ins...,10312,STATEN ISLAND,40.560476,-74.166889,,,33.900000,4821


In [None]:
# df["created_hour"] = pd.to_datetime(df["created_date"].astype("int64")).dt.floor("H")
# df.set_index('created_hour', inplace=True)

# for features in chain.from_iterable(combinations(s, r) for r in range(len(s)+1)):
#     if len(features) in {0,3}:
#         continue

#     trunc_name = "_".join(["open"]+[name[:2] for name in features])
#     df.set_index([*features], inplace=True, append=True)
#     df[trunc_name] = df['open_bo_ag_co'].groupby(level=list(range(df.index.nlevels))).sum()
#     df.reset_index([*features], inplace=True)

# df.reset_index(drop=True,inplace=True)

In [72]:
df.to_feather('../data/2018_tf.feather')

In [None]:
df[df['resolution_description'].str.contains("Your request can not be processed at this time")]

In [70]:
# average completion time
completion_time_by_type = df.groupby("complaint_type")["hours_to_complete"].mean()
# number of complaint types
num_of_type = df.groupby("complaint_type")["complaint_type"].value_counts()

In [71]:
bin_lbls = ["Low","Mid","High"]
# binning freqency of complaint
q1, bins = pd.qcut(num_of_type, 3, labels=bin_lbls, retbins=True,duplicates="raise")
candidates = q1[q1=="High"].index
# binning completion time
# q2, bins = pd.qcut(completion_time_by_type[completion_time_by_type.index.isin(candidates)], 3, labels=bin_lbls, retbins=True,duplicates="raise")

In [74]:

# get high frequency records
high_freq_df = df[(df["created_date"]>df["created_date"].median())&(df["complaint_type"].isin(candidates))]

# row key: [correlation, y_column, [x_columns]]
high_corr = {}
# minimum absolute correlation to be added to the dict
min_corr = 0.3
for col_names in chain.from_iterable(combinations(s, r) for r in range(len(s)+1)):
    if col_names == ():
        continue
    trunc_name = "_".join(["open"]+[name[:2] for name in col_names])
    if trunc_name not in high_freq_df.columns:
        continue
    col_names = list(col_names)
    # get the correlation between completion time and number open for high freq
    consider_df = high_freq_df.groupby(col_names)[["hours_to_complete",trunc_name]]
    correlation:pd.DataFrame = consider_df.corr()
    correlation.drop(trunc_name, axis=1, inplace=True)
    correlation.reset_index(level=-1, drop=True, inplace=True)
    abs_correlation = correlation["hours_to_complete"].abs().iloc[1::2]
    # display(abs_correlation)
    hcorr:pd.Series = abs_correlation[(abs_correlation<1)&(abs_correlation>min_corr)]

    for n, v in hcorr.items():
        tdic = {k:v for k, v in zip(col_names, n)}
        if consider_df.get_group(n)["hours_to_complete"].count() < 1000:
            continue
        if "complaint_type" in tdic and tdic["complaint_type"] in high_corr:
            continue
        if n not in high_corr:
            high_corr[n] = [v, trunc_name, col_names]
        else:
            high_corr[n] = max(high_corr[n], [v, trunc_name, col_names])
# select complaint types whose completion time is dependent on number of open requests
# high_freq_df = high_freq_df[high_freq_df["complaint_type"].isin(high_corr)]


high_corr

{'DFTA': [0.308960097523829, 'open_ag', ['agency']],
 'HRA': [0.49616743366322463, 'open_ag', ['agency']],
 'Benefit Card Replacement': [0.49616743366322463,
  'open_co',
  ['complaint_type']],
 'DOF Property - Reduction Issue': [0.39155673775319827,
  'open_co',
  ['complaint_type']],
 'Housing - Low Income Senior': [0.39926254714531517,
  'open_co',
  ['complaint_type']],
 'SCRIE': [0.6280286997260813, 'open_co', ['complaint_type']],
 ('Unspecified', 'HRA'): [0.49616743366322463,
  'open_bo_ag',
  ['borough', 'agency']]}

In [None]:
# remove outlier records
upper = high_freq_df.groupby("complaint_type")["hours_to_complete"].quantile(0.99)
high_freq_df = pd.merge(high_freq_df, upper.rename('upper_bound'), left_on="complaint_type", right_index=True)
high_freq_df = high_freq_df[high_freq_df["hours_to_complete"] < high_freq_df["upper_bound"]]
high_freq_df.drop("upper_bound", axis=1, inplace=True)

In [None]:
df.loc[(df["borough"]=="BRONX") & (df["complaint_type"] == "Graffiti")]

In [None]:
df[df['agency']=='HRA']['complaint_type'].unique()

In [None]:
final_set = []
corr_nums = set()
# graph hexbins for high corr, high frequency complaint types
for n, (corr, x, c) in high_corr.items():
    if corr not in corr_nums:
        corr_nums.add(corr)
    else:
        continue
    try:
        grp = high_freq_df.groupby(c).get_group(n)
    except KeyError:
        continue
    if isinstance(n, tuple):
        name = ' :: '.join(n)
    else:
        name = n

    title = f"{name}\ncorr: {corr}\n{len(grp)}"
    final_set.append((n,title, x, c))

def nearest_rectangle(num_vars:int):
    length = width = round(num_vars**0.5)

    if length * width < num_vars:
        width += 1

    return length, width

height, width = nearest_rectangle(len(final_set))

fig, axes = plt.subplots(
            nrows=height,
            ncols=width,
            squeeze=False
            # subplot_titles=subplot_titles
        )
fig.set_size_inches(20,20)
for (i, ax), (n, title, x, c) in zip(np.ndenumerate(axes), final_set):
    grp = high_freq_df.groupby(c).get_group(n)
    sns.scatterplot(
        y="hours_to_complete",
        x=x,
        hue="borough",
        data=grp,
        alpha=0.5,
        ax = ax,

        )
    ax.set_title(title)




In [None]:
# graph number of open requests by agency for the sample
# df[df["open_bo"]>0].groupby("borough").plot(x="created_date", y="open_bo")
high_freq = ['HPD', 'NYPD', 'DSNY', 'DOT', 'DEP', 'DOB', 'DOF', 'DOHMH']
for n, grp in df[df["agency"].isin(high_freq)].groupby(["agency"]):
    # forward fill for missing days and fill first day w/ 0
    grp['open_ag'] = grp['open_ag'].ffill().fillna(0)
    ax = grp.plot(x="created_date", y='open_ag', title=f"Open Requests by Hour for {n[0]}", legend= False)
    ax.set_xlabel("Date Created")
    ax.set_ylabel("Number of Open Requests")
    ax.tick_params(axis='x',rotation=45)


In [None]:
df = df.dropna(axis=0)


In [None]:
# df.columns = ['borough', 'agency', 'created_date', 'closed_date', 'complaint_type',
#        'status', 'resolution_action_updated_date', 'descriptor',
#        'resolution_description', 'incident_zip', 'city', 'latitude',
#        'longitude', 'due_date', 'bbl', 'hours_to_complete', 'open',
#        'open_ag_br', 'open_ag', 'open_br']
df.columns = [
       'borough', 'agency',
       'ds', 'closed_date',
       'complaint_type', 'status',
       'resolution_action_updated_date', 'descriptor',
       'resolution_description', 'incident_zip',
       'city', 'latitude',
       'longitude', 'due_date',
       'bbl', 'hours_to_complete',
       'open', 'y',
       'open_ag', 'open_br'
]


In [None]:
# heirarchies = [
#     ["borough"],
#     [ "borough","incident_zip"],
#     [ "borough","incident_zip", "agency"],
#     [ "borough","incident_zip", "agency", "complaint_type"],
# ]
hdf = df[df["borough"]!= "Unspecified"]
bdf, H_df, tags = aggregate(
    hdf,
    spec=[["agency","borough"]]
)

In [None]:
bdf

In [None]:
hrec = HierarchicalReconciliation(
    reconcilers =
        [
            MinTrace("ols", nonnegative=True,num_threads=-1),
            # ERM("closed"),
            # BottomUp()
        ]
)


In [None]:


Y_df = bdf[(bdf['ds']>datetime.date(year=2018, month=10, day=1)) & (bdf['ds']<datetime.date(year=2018, month=12, day=1))]
Y_df

In [None]:

models = [MSTL(
    season_length=[24, 24 * 7], # seasonalities of the time series
    trend_forecaster=AutoARIMA(trace=True) # model used to forecast trend
)]
sf = StatsForecast(
    models = models,
    freq = 'H',
    df=Y_df,
    n_jobs=-1
    )
sf.fit()

In [None]:
fcts  = sf.forecast(h=1,df=bdf, level=[90])

In [None]:
fcts

In [None]:
StatsForecast.plot(df=Y_df, forecasts_df=fcts[fcts.index.isin(bdf.index)].fillna(0))