In [None]:
#

In [25]:
import pandas as pd
import numpy as np

import plotly.express as px
from plotly.subplots import make_subplots
import plotly.graph_objects as go

from prophet import Prophet
from prophet.diagnostics import cross_validation

# CONSTANT
DATA_PATH = "https://github.com/fakhrirobi/ML_Pacmann_5/blob/main/final/20221120_1110_final_data_2018_2022.xlsx?raw=true"
CITIES = [
    "balikpapan",
    "bandung",
    "batam",
    "jakarta",
    "makassar",
    "medan",
    "palembang",
    "pekanbaru",
    "surabaya",
    "yogyakarta",
]

## Data Loading 

In [67]:
def load_data():
    data = pd.read_excel(DATA_PATH).rename(columns={"Unnamed: 0": "tanggal"})
    data["tanggal"] = pd.to_datetime(data["tanggal"])
    return data

In [68]:
data = load_data()

In [13]:
# extract datetime property : day name , we want to dig in on before and after holiday e.g. sunday saturday
data["day"] = data["tanggal"].dt.day_name()

## Analyzing Missing Values

In [None]:
parsed_data_log = parsed_data.copy()
parsed_data_log.index = [x for x in range(parsed_data_log.shape[0])]

In [35]:
parsed_data = parsed_data.set_index("tanggal").sort_index()[cities].diff(1)

In [None]:
fig = make_subplots(rows=10, cols=1)
for idx, city in enumerate(CITIES):

    fig.add_trace(
        go.Scatter(
            y=parsed_data[f"{city}"], x=parsed_data.index, mode="lines", name=f"{city}"
        ),
        row=idx + 1,
        col=1,
    )


fig.update_layout(
    width=1000,
    height=2000,
    margin=dict(l=0, r=0, t=0, b=0),
    paper_bgcolor="#ffffff",
    title="Price Change from Friday to Monday (Skipped Sat and Sun) From Different Cities",
)

In [69]:
def pipe_log_division(data, cities=CITIES):
    """Helper Function to implement log(dayh/day)"""

    for city in cities:
        data[f"{city}_log_ratio"] = np.NaN
        for idx in data.index:
            try:
                data.loc[idx, f"{city}_log_ratio"] = np.log1p(
                    data.at[idx, city] / data.at[idx - 1, city]
                )
            except:
                continue
    return None

In [63]:
parsed_data_log.pipe(pipe_log_division)

In [65]:
parsed_data_log

Unnamed: 0,tanggal,balikpapan,bandung,batam,jakarta,makassar,medan,palembang,pekanbaru,surabaya,...,balikpapan_log_ratio,bandung_log_ratio,batam_log_ratio,jakarta_log_ratio,makassar_log_ratio,medan_log_ratio,palembang_log_ratio,pekanbaru_log_ratio,surabaya_log_ratio,yogyakarta_log_ratio
0,2018-01-01,,,,,,,,,,...,,,,,,,,,,
1,2018-01-05,68300.0,48200.0,63650.0,61350.0,26550.0,50000.0,58500.0,70950.0,54700.0,...,,,,,,,,,,
2,2018-01-08,68300.0,48200.0,63650.0,61350.0,26550.0,50000.0,58500.0,70950.0,54700.0,...,0.693147,0.693147,0.693147,0.693147,0.693147,0.693147,0.693147,0.693147,0.693147,0.693147
3,2018-01-12,72300.0,46300.0,64250.0,66100.0,28000.0,52100.0,58500.0,72550.0,64900.0,...,0.722009,0.673241,0.697849,0.731129,0.720088,0.713930,0.693147,0.704360,0.782289,0.693147
4,2018-01-15,72300.0,46300.0,64250.0,66100.0,28000.0,52100.0,58500.0,72550.0,64900.0,...,0.693147,0.693147,0.693147,0.693147,0.693147,0.693147,0.693147,0.693147,0.693147,0.693147
...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...
505,2022-11-04,130200.0,86950.0,87200.0,98100.0,50050.0,44750.0,86450.0,84350.0,82350.0,...,0.693147,0.693147,0.647223,0.594850,0.690659,0.693147,0.693147,0.693147,0.693147,0.696305
506,2022-11-07,130200.0,86950.0,87200.0,98100.0,50050.0,44750.0,86450.0,84350.0,82350.0,...,0.693147,0.693147,0.693147,0.693147,0.693147,0.693147,0.693147,0.693147,0.693147,0.693147
507,2022-11-11,130200.0,82450.0,83500.0,99350.0,48550.0,40750.0,89500.0,74600.0,82350.0,...,0.693147,0.666930,0.671703,0.699498,0.678049,0.647425,0.710634,0.633615,0.693147,0.725004
508,2022-11-14,130200.0,82450.0,83500.0,99350.0,48550.0,40750.0,89500.0,74600.0,82350.0,...,0.693147,0.693147,0.693147,0.693147,0.693147,0.693147,0.693147,0.693147,0.693147,0.693147


In [None]:
# plot the log (H/H-1)
log_ratio_df = parsed_data

In [None]:
data = pd.read_excel(DATA_PATH, converters={"tanggal": np.datetime64})

In [None]:
data = data.replace("-", np.NaN)

In [None]:
# there are lots of missing values, those were resulted from holiday calendar such as sunday,saturday,idul fitri , etc, assumed data is the same like previous date
# the missing values imputation in this part will use pandas.backfill and pandas.ffill()
data.isnull().sum()

tanggal         0
medan         353
pekanbaru     356
batam         358
palembang     356
bandung       359
jakarta       359
yogyakarta    359
surabaya      358
balikpapan    348
makassar      359
dtype: int64

In [None]:
# imputation
city_columns = [x for x in data.columns if x != "tanggal"]
data[city_columns] = data[city_columns].fillna(method="ffill")
data[city_columns] = data[city_columns].fillna(method="bfill")

In [None]:
data[city_columns] = data[city_columns].astype("float")

In [None]:
data.dtypes

tanggal       datetime64[ns]
medan                 object
pekanbaru             object
batam                 object
palembang             object
bandung               object
jakarta               object
yogyakarta            object
surabaya              object
balikpapan           float64
makassar              object
dtype: object

## Loading data to Prophet

In [71]:
# Since we are predicting price difference by degree one shift 1 , we need to diff in pandas
data_diffed = data.set_index("tanggal")[CITIES].diff(1)

In [72]:
data_diffed.index

DatetimeIndex(['2018-01-01', '2018-01-02', '2018-01-03', '2018-01-04',
               '2018-01-05', '2018-01-06', '2018-01-07', '2018-01-08',
               '2018-01-09', '2018-01-10',
               ...
               '2022-11-09', '2022-11-10', '2022-11-11', '2022-11-12',
               '2022-11-13', '2022-11-14', '2022-11-15', '2022-11-16',
               '2022-11-17', '2022-11-18'],
              dtype='datetime64[ns]', name='tanggal', length=1783, freq=None)

In [None]:
## Exploratory Data Analysis

In [74]:
# Trendline of Price Change Overtime
fig = make_subplots(rows=10, cols=1)

In [77]:
for idx, col in enumerate(CITIES):

    fig.add_trace(
        go.Scatter(
            y=data_diffed[f"{col}"], x=data_diffed.index, mode="lines", name=f"{col}"
        ),
        row=idx + 1,
        col=1,
    )


fig.update_layout(
    width=1000,
    height=2000,
    margin=dict(l=0, r=0, t=0, b=0),
    paper_bgcolor="#ffffff",
    title="Cabai Merah Price Change From Different Cities",
)

In [None]:
# Add Decomposition Chart

In [78]:
# define stationarity test
from statsmodels.tsa.stattools import adfuller
from statsmodels.tsa.stattools import kpss


def adf_test(timeseries):
    print("Results of Dickey-Fuller Test:")
    dftest = adfuller(timeseries, autolag="AIC")
    dfoutput = pd.Series(
        dftest[0:4],
        index=[
            "Test Statistic",
            "p-value",
            "#Lags Used",
            "Number of Observations Used",
        ],
    )
    for key, value in dftest[4].items():
        dfoutput["Critical Value (%s)" % key] = value
    print(dfoutput)


def kpss_test(timeseries):
    print("Results of KPSS Test:")
    kpsstest = kpss(timeseries, regression="c", nlags="auto")
    kpss_output = pd.Series(
        kpsstest[0:3], index=["Test Statistic", "p-value", "Lags Used"]
    )
    for key, value in kpsstest[3].items():
        kpss_output["Critical Value (%s)" % key] = value
    print(kpss_output)

In [None]:
data_diffed = data_diffed.dropna()

In [79]:
# Stationarity using AdFuller Test
for city in CITIES:
    print(city)
    adf_test(data_diffed[f"{city}"])
    print("=====================================")

balikpapan
Results of Dickey-Fuller Test:


MissingDataError: ignored

In [None]:
data_diffed

Unnamed: 0_level_0,medan,pekanbaru,batam,palembang,bandung,jakarta,yogyakarta,surabaya,balikpapan,makassar
tanggal,Unnamed: 1_level_1,Unnamed: 2_level_1,Unnamed: 3_level_1,Unnamed: 4_level_1,Unnamed: 5_level_1,Unnamed: 6_level_1,Unnamed: 7_level_1,Unnamed: 8_level_1,Unnamed: 9_level_1,Unnamed: 10_level_1
2020-01-02,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0
2020-01-03,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0
2020-01-04,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0
2020-01-05,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0
2020-01-06,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0
...,...,...,...,...,...,...,...,...,...,...
2022-11-14,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0
2022-11-15,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0
2022-11-16,0.0,0.0,500.0,-9450.0,-7000.0,-3000.0,-14100.0,-4750.0,-5050.0,1500.0
2022-11-17,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0


In [80]:
def pipe_parsing_data(data, city):
    data = data[[city]]
    data = data.reset_index()
    data = data.rename(columns={"tanggal": "ds", city: "y"})  # required by prophet
    return data

In [None]:
data_diffed

Unnamed: 0_level_0,medan,pekanbaru,batam,palembang,bandung,jakarta,yogyakarta,surabaya,balikpapan,makassar
tanggal,Unnamed: 1_level_1,Unnamed: 2_level_1,Unnamed: 3_level_1,Unnamed: 4_level_1,Unnamed: 5_level_1,Unnamed: 6_level_1,Unnamed: 7_level_1,Unnamed: 8_level_1,Unnamed: 9_level_1,Unnamed: 10_level_1
2020-01-02,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0
2020-01-03,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0
2020-01-04,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0
2020-01-05,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0
2020-01-06,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0
...,...,...,...,...,...,...,...,...,...,...
2022-11-14,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0
2022-11-15,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0
2022-11-16,0.0,0.0,500.0,-9450.0,-7000.0,-3000.0,-14100.0,-4750.0,-5050.0,1500.0
2022-11-17,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0


In [91]:
def compute_resid(y_true, y_hat):
    # create empty dataframe to contain squared error of requested city
    error_container = pd.DataFrame()
    error_container["residual"] = y_true - y_hat
    error_container["squared_error"] = (y_true - y_hat) ** 2
    return error_container

In [92]:
## Prophet Modelling
# create dictionary to store multiple trained model
cities_dictionary = {}
for city in CITIES:
    # instanciate prophet
    m = Prophet()

    parsed_df = data_diffed.pipe(pipe_parsing_data, city)
    print(parsed_df.columns)
    # fit the model
    m.fit(parsed_df)
    forecast_ = m.predict(parsed_df)
    resid = compute_resid(y_true=parsed_df["y"].values, y_hat=forecast_["yhat"])
    cities_dictionary[city] = [m, forecast_, resid]

INFO:prophet:Disabling daily seasonality. Run prophet with daily_seasonality=True to override this.
DEBUG:cmdstanpy:input tempfile: /tmp/tmpt7ddupqe/ipckl0g5.json
DEBUG:cmdstanpy:input tempfile: /tmp/tmpt7ddupqe/gz7j71hs.json
DEBUG:cmdstanpy:idx 0
DEBUG:cmdstanpy:running CmdStan, num_threads: None
DEBUG:cmdstanpy:CmdStan args: ['/usr/local/lib/python3.7/dist-packages/prophet/stan_model/prophet_model.bin', 'random', 'seed=99604', 'data', 'file=/tmp/tmpt7ddupqe/ipckl0g5.json', 'init=/tmp/tmpt7ddupqe/gz7j71hs.json', 'output', 'file=/tmp/tmpt7ddupqe/prophet_model7d7boov7/prophet_model-20221120055036.csv', 'method=optimize', 'algorithm=lbfgs', 'iter=10000']
05:50:36 - cmdstanpy - INFO - Chain [1] start processing
INFO:cmdstanpy:Chain [1] start processing
05:50:36 - cmdstanpy - INFO - Chain [1] done processing


Index(['ds', 'y'], dtype='object')


INFO:cmdstanpy:Chain [1] done processing
INFO:prophet:Disabling daily seasonality. Run prophet with daily_seasonality=True to override this.
DEBUG:cmdstanpy:input tempfile: /tmp/tmpt7ddupqe/2j6nhgen.json
DEBUG:cmdstanpy:input tempfile: /tmp/tmpt7ddupqe/mfnanaap.json
DEBUG:cmdstanpy:idx 0
DEBUG:cmdstanpy:running CmdStan, num_threads: None
DEBUG:cmdstanpy:CmdStan args: ['/usr/local/lib/python3.7/dist-packages/prophet/stan_model/prophet_model.bin', 'random', 'seed=23244', 'data', 'file=/tmp/tmpt7ddupqe/2j6nhgen.json', 'init=/tmp/tmpt7ddupqe/mfnanaap.json', 'output', 'file=/tmp/tmpt7ddupqe/prophet_modelt40otkhi/prophet_model-20221120055037.csv', 'method=optimize', 'algorithm=lbfgs', 'iter=10000']
05:50:37 - cmdstanpy - INFO - Chain [1] start processing
INFO:cmdstanpy:Chain [1] start processing
05:50:38 - cmdstanpy - INFO - Chain [1] done processing


Index(['ds', 'y'], dtype='object')


INFO:cmdstanpy:Chain [1] done processing
INFO:prophet:Disabling daily seasonality. Run prophet with daily_seasonality=True to override this.
DEBUG:cmdstanpy:input tempfile: /tmp/tmpt7ddupqe/lu6e3t8h.json
DEBUG:cmdstanpy:input tempfile: /tmp/tmpt7ddupqe/l2oe1bkp.json
DEBUG:cmdstanpy:idx 0
DEBUG:cmdstanpy:running CmdStan, num_threads: None
DEBUG:cmdstanpy:CmdStan args: ['/usr/local/lib/python3.7/dist-packages/prophet/stan_model/prophet_model.bin', 'random', 'seed=8587', 'data', 'file=/tmp/tmpt7ddupqe/lu6e3t8h.json', 'init=/tmp/tmpt7ddupqe/l2oe1bkp.json', 'output', 'file=/tmp/tmpt7ddupqe/prophet_modelpznlgtnu/prophet_model-20221120055039.csv', 'method=optimize', 'algorithm=lbfgs', 'iter=10000']
05:50:39 - cmdstanpy - INFO - Chain [1] start processing
INFO:cmdstanpy:Chain [1] start processing
05:50:39 - cmdstanpy - INFO - Chain [1] done processing
INFO:cmdstanpy:Chain [1] done processing


Index(['ds', 'y'], dtype='object')


INFO:prophet:Disabling daily seasonality. Run prophet with daily_seasonality=True to override this.
DEBUG:cmdstanpy:input tempfile: /tmp/tmpt7ddupqe/av0hvgue.json
DEBUG:cmdstanpy:input tempfile: /tmp/tmpt7ddupqe/gii6kymj.json
DEBUG:cmdstanpy:idx 0
DEBUG:cmdstanpy:running CmdStan, num_threads: None
DEBUG:cmdstanpy:CmdStan args: ['/usr/local/lib/python3.7/dist-packages/prophet/stan_model/prophet_model.bin', 'random', 'seed=61922', 'data', 'file=/tmp/tmpt7ddupqe/av0hvgue.json', 'init=/tmp/tmpt7ddupqe/gii6kymj.json', 'output', 'file=/tmp/tmpt7ddupqe/prophet_modelnaxohwqa/prophet_model-20221120055040.csv', 'method=optimize', 'algorithm=lbfgs', 'iter=10000']
05:50:40 - cmdstanpy - INFO - Chain [1] start processing
INFO:cmdstanpy:Chain [1] start processing


Index(['ds', 'y'], dtype='object')


05:50:40 - cmdstanpy - INFO - Chain [1] done processing
INFO:cmdstanpy:Chain [1] done processing
INFO:prophet:Disabling daily seasonality. Run prophet with daily_seasonality=True to override this.
DEBUG:cmdstanpy:input tempfile: /tmp/tmpt7ddupqe/t56vv_ot.json
DEBUG:cmdstanpy:input tempfile: /tmp/tmpt7ddupqe/l1qnzy2f.json
DEBUG:cmdstanpy:idx 0
DEBUG:cmdstanpy:running CmdStan, num_threads: None
DEBUG:cmdstanpy:CmdStan args: ['/usr/local/lib/python3.7/dist-packages/prophet/stan_model/prophet_model.bin', 'random', 'seed=76769', 'data', 'file=/tmp/tmpt7ddupqe/t56vv_ot.json', 'init=/tmp/tmpt7ddupqe/l1qnzy2f.json', 'output', 'file=/tmp/tmpt7ddupqe/prophet_model_rfzgvbd/prophet_model-20221120055041.csv', 'method=optimize', 'algorithm=lbfgs', 'iter=10000']
05:50:41 - cmdstanpy - INFO - Chain [1] start processing
INFO:cmdstanpy:Chain [1] start processing
05:50:41 - cmdstanpy - INFO - Chain [1] done processing
INFO:cmdstanpy:Chain [1] done processing


Index(['ds', 'y'], dtype='object')


INFO:prophet:Disabling daily seasonality. Run prophet with daily_seasonality=True to override this.
DEBUG:cmdstanpy:input tempfile: /tmp/tmpt7ddupqe/zuvgtc5n.json
DEBUG:cmdstanpy:input tempfile: /tmp/tmpt7ddupqe/ba48leb2.json
DEBUG:cmdstanpy:idx 0
DEBUG:cmdstanpy:running CmdStan, num_threads: None
DEBUG:cmdstanpy:CmdStan args: ['/usr/local/lib/python3.7/dist-packages/prophet/stan_model/prophet_model.bin', 'random', 'seed=24594', 'data', 'file=/tmp/tmpt7ddupqe/zuvgtc5n.json', 'init=/tmp/tmpt7ddupqe/ba48leb2.json', 'output', 'file=/tmp/tmpt7ddupqe/prophet_modellrhgvm8n/prophet_model-20221120055043.csv', 'method=optimize', 'algorithm=lbfgs', 'iter=10000']
05:50:43 - cmdstanpy - INFO - Chain [1] start processing
INFO:cmdstanpy:Chain [1] start processing
05:50:43 - cmdstanpy - INFO - Chain [1] done processing
INFO:cmdstanpy:Chain [1] done processing


Index(['ds', 'y'], dtype='object')


INFO:prophet:Disabling daily seasonality. Run prophet with daily_seasonality=True to override this.
DEBUG:cmdstanpy:input tempfile: /tmp/tmpt7ddupqe/l92e37ba.json
DEBUG:cmdstanpy:input tempfile: /tmp/tmpt7ddupqe/4wwu4vno.json
DEBUG:cmdstanpy:idx 0
DEBUG:cmdstanpy:running CmdStan, num_threads: None
DEBUG:cmdstanpy:CmdStan args: ['/usr/local/lib/python3.7/dist-packages/prophet/stan_model/prophet_model.bin', 'random', 'seed=65634', 'data', 'file=/tmp/tmpt7ddupqe/l92e37ba.json', 'init=/tmp/tmpt7ddupqe/4wwu4vno.json', 'output', 'file=/tmp/tmpt7ddupqe/prophet_modelc8po_vpc/prophet_model-20221120055044.csv', 'method=optimize', 'algorithm=lbfgs', 'iter=10000']
05:50:44 - cmdstanpy - INFO - Chain [1] start processing
INFO:cmdstanpy:Chain [1] start processing


Index(['ds', 'y'], dtype='object')


05:50:44 - cmdstanpy - INFO - Chain [1] done processing
INFO:cmdstanpy:Chain [1] done processing
INFO:prophet:Disabling daily seasonality. Run prophet with daily_seasonality=True to override this.
DEBUG:cmdstanpy:input tempfile: /tmp/tmpt7ddupqe/0d1q03nm.json
DEBUG:cmdstanpy:input tempfile: /tmp/tmpt7ddupqe/nhml42eh.json
DEBUG:cmdstanpy:idx 0
DEBUG:cmdstanpy:running CmdStan, num_threads: None
DEBUG:cmdstanpy:CmdStan args: ['/usr/local/lib/python3.7/dist-packages/prophet/stan_model/prophet_model.bin', 'random', 'seed=75413', 'data', 'file=/tmp/tmpt7ddupqe/0d1q03nm.json', 'init=/tmp/tmpt7ddupqe/nhml42eh.json', 'output', 'file=/tmp/tmpt7ddupqe/prophet_modelyia_ccw7/prophet_model-20221120055045.csv', 'method=optimize', 'algorithm=lbfgs', 'iter=10000']
05:50:45 - cmdstanpy - INFO - Chain [1] start processing
INFO:cmdstanpy:Chain [1] start processing


Index(['ds', 'y'], dtype='object')


05:50:45 - cmdstanpy - INFO - Chain [1] done processing
INFO:cmdstanpy:Chain [1] done processing
INFO:prophet:Disabling daily seasonality. Run prophet with daily_seasonality=True to override this.
DEBUG:cmdstanpy:input tempfile: /tmp/tmpt7ddupqe/eu9lt946.json
DEBUG:cmdstanpy:input tempfile: /tmp/tmpt7ddupqe/wm1eawj0.json
DEBUG:cmdstanpy:idx 0
DEBUG:cmdstanpy:running CmdStan, num_threads: None
DEBUG:cmdstanpy:CmdStan args: ['/usr/local/lib/python3.7/dist-packages/prophet/stan_model/prophet_model.bin', 'random', 'seed=34330', 'data', 'file=/tmp/tmpt7ddupqe/eu9lt946.json', 'init=/tmp/tmpt7ddupqe/wm1eawj0.json', 'output', 'file=/tmp/tmpt7ddupqe/prophet_modelk73agyq2/prophet_model-20221120055046.csv', 'method=optimize', 'algorithm=lbfgs', 'iter=10000']
05:50:46 - cmdstanpy - INFO - Chain [1] start processing
INFO:cmdstanpy:Chain [1] start processing


Index(['ds', 'y'], dtype='object')


05:50:46 - cmdstanpy - INFO - Chain [1] done processing
INFO:cmdstanpy:Chain [1] done processing
INFO:prophet:Disabling daily seasonality. Run prophet with daily_seasonality=True to override this.
DEBUG:cmdstanpy:input tempfile: /tmp/tmpt7ddupqe/6arvywfr.json
DEBUG:cmdstanpy:input tempfile: /tmp/tmpt7ddupqe/gok955jy.json
DEBUG:cmdstanpy:idx 0
DEBUG:cmdstanpy:running CmdStan, num_threads: None
DEBUG:cmdstanpy:CmdStan args: ['/usr/local/lib/python3.7/dist-packages/prophet/stan_model/prophet_model.bin', 'random', 'seed=27892', 'data', 'file=/tmp/tmpt7ddupqe/6arvywfr.json', 'init=/tmp/tmpt7ddupqe/gok955jy.json', 'output', 'file=/tmp/tmpt7ddupqe/prophet_modelp2umg801/prophet_model-20221120055047.csv', 'method=optimize', 'algorithm=lbfgs', 'iter=10000']
05:50:47 - cmdstanpy - INFO - Chain [1] start processing
INFO:cmdstanpy:Chain [1] start processing
05:50:47 - cmdstanpy - INFO - Chain [1] done processing
INFO:cmdstanpy:Chain [1] done processing


Index(['ds', 'y'], dtype='object')


In [None]:
cities_dictionary

In [103]:
fig = make_subplots(rows=10, cols=1)
for idx, city in enumerate(CITIES):

    resid = cities_dictionary.get(city)[2]
    fig.add_trace(
        go.Scatter(
            y=resid["squared_error"],
            x=data_diffed.index,
            mode="markers",
            name=f"{city}",
        ),
        row=idx + 1,
        col=1,
    )

    fig.update_layout(
        width=1000,
        height=2000,
        margin=dict(l=0, r=0, t=0, b=0),
        paper_bgcolor="#ffffff",
        title="Residual From Different Cities",
    )
fig.show()

In [None]:
# Isolation Forest

In [101]:
fig = make_subplots(rows=10, cols=1)
for idx, city in enumerate(CITIES):

    resid = cities_dictionary.get(city)[2]
    fig.add_trace(
        go.Scatter(
            y=resid["residual"], x=data_diffed.index, mode="markers", name=f"{city}"
        ),
        row=idx + 1,
        col=1,
    )

    fig.update_layout(
        width=1000,
        height=2000,
        margin=dict(l=0, r=0, t=0, b=0),
        paper_bgcolor="#ffffff",
        title="Residual From Different Cities",
    )
fig.show()