In [1]:
%pip install kaleido

Looking in indexes: https://pypi.org/simple, https://us-python.pkg.dev/colab-wheels/public/simple/


In [2]:
import math
import numpy as np
import pandas as pd
import plotly.express as px
import plotly.graph_objects as go
from plotly.subplots import make_subplots
from prophet import Prophet
from datetime import datetime, timedelta

In [3]:
pd.options.plotting.backend = 'plotly'

In [4]:
def clean_fig(fig):
    fig.update_layout(
        plot_bgcolor='#FFF', title={'x': .5}, font_size=15,
        font_family='Arial', title_font_family="Times New Roman",
        font={'color': black}
    )
    fig.update_coloraxes(showscale=False)
    return fig

def write_image(fig, image_name, width=1200, height=600):
    image_path = f'images/{image_name}.png'
    fig.write_image(image_path, width=width, height=height)

In [5]:
check_dates = [datetime(2022, 3, 11) + timedelta(days=x) for x in range(15)]

In [6]:
np.random.seed(42)

black = '#363833'
orange = '#c86a3e'
pink = '#d38bba'
lgreen = '#c8d7ab'
dgreen = '#4d5e38'
white = '#f0ece1'

In [7]:
def generate_df(day_map):
    df = pd.DataFrame([
        [datetime(2022, 1, 1) + timedelta(days=x), np.random.normal(loc=6000, scale=500) + x*20]
        for x in range(100)
    ], columns=['date', 'nbr_sessions'])
    df.loc[:, 'weekday'] = df.date.dt.day_name()
    for day, val in day_map.items():
        df.loc[df.weekday == day, 'nbr_sessions'] = df.loc[df.weekday == day, 'nbr_sessions'] * val
    df.loc[:, 'nbr_sessions'] = df.nbr_sessions.astype(int)
    df.loc[:, 'mean_seconds'] = np.random.normal(180, 10, len(df))
    df.loc[:, 'total_seconds'] = df.nbr_sessions * df.mean_seconds
    return df

df = generate_df({'Wednesday': .9, 'Thursday': .8, 'Saturday': 1.1, 'Sunday': 1.2})
df.head(3)

Unnamed: 0,date,nbr_sessions,weekday,mean_seconds,total_seconds
0,2022-01-01,6873,Saturday,165.846293,1139862.0
1,2022-01-02,7141,Sunday,175.793547,1255342.0
2,2022-01-03,6363,Monday,176.572855,1123533.0


In [8]:
anomaly_df = df.copy()

dates, orig, lower, upper = [], [], [], []


for d in check_dates:
    subset_df = (
        anomaly_df[anomaly_df.date <= d]
        .tail(45)
        .copy()
        .rename(columns={'date': 'ds', 'nbr_sessions': 'y'})
    )
    m = Prophet(daily_seasonality=False, yearly_seasonality=False)
    m.fit(subset_df[:-1])
    pred = m.predict(subset_df[-1:][['ds']])
    dates.append(d)
    orig.append(subset_df[-1:].y.values[0])
    lower.append(pred.yhat_lower.values[0])
    upper.append(pred.yhat_upper.values[0])

DEBUG:cmdstanpy:input tempfile: /tmp/tmpqqj7i24d/axhcrltt.json
DEBUG:cmdstanpy:input tempfile: /tmp/tmpqqj7i24d/89e3dbwz.json
DEBUG:cmdstanpy:idx 0
DEBUG:cmdstanpy:running CmdStan, num_threads: None
DEBUG:cmdstanpy:CmdStan args: ['/usr/local/lib/python3.7/dist-packages/prophet/stan_model/prophet_model.bin', 'random', 'seed=83992', 'data', 'file=/tmp/tmpqqj7i24d/axhcrltt.json', 'init=/tmp/tmpqqj7i24d/89e3dbwz.json', 'output', 'file=/tmp/tmpqqj7i24d/prophet_model496n785o/prophet_model-20221108141040.csv', 'method=optimize', 'algorithm=newton', 'iter=10000']
14:10:40 - cmdstanpy - INFO - Chain [1] start processing
INFO:cmdstanpy:Chain [1] start processing
14:10:40 - cmdstanpy - INFO - Chain [1] done processing
INFO:cmdstanpy:Chain [1] done processing
DEBUG:cmdstanpy:input tempfile: /tmp/tmpqqj7i24d/ekti3u1_.json
DEBUG:cmdstanpy:input tempfile: /tmp/tmpqqj7i24d/pui8rbnn.json
DEBUG:cmdstanpy:idx 0
DEBUG:cmdstanpy:running CmdStan, num_threads: None
DEBUG:cmdstanpy:CmdStan args: ['/usr/local/

In [9]:
pred[['yhat', 'yhat_lower', 'yhat_upper']]

Unnamed: 0,yhat,yhat_lower,yhat_upper
0,7699.499255,7157.890741,8210.419103


In [12]:
fig = go.Figure([
    go.Scatter(x=dates, y=lower, mode='lines', marker={'color': black}, name='upper', showlegend=False),
    go.Scatter(x=dates, y=upper, mode='lines', marker={'color': black}, name='upper', fill='tonexty', fillcolor='rgba(240,236,225,20)', showlegend=False),
    go.Scatter(
        x=dates, y=orig, name='Nbr Sessions', line={'width': 4, 'color': dgreen},
        text=df[df.date.isin(dates)].weekday
        ),
])
fig.update_layout(title='<b>Boundaries built using Prophet</b>')
fig = clean_fig(fig)
write_image(fig, 'anomaly_prophet')
fig.show()

In [14]:
forecast_df = df.rename(columns={'date': 'ds', 'nbr_sessions': 'y'}).copy()
m = Prophet()
m.fit(forecast_df[:-15])
pred_df = m.predict(forecast_df[-15:])

vis_df = forecast_df[-30:].copy()

fig = go.Figure([
    go.Scatter(x=vis_df.ds, y=vis_df.y, mode='lines', line={'color': dgreen, 'width': 3}, name='True Values'),
    go.Scatter(x=pred_df.ds, y=pred_df.yhat, mode='lines', line={'color': orange, 'width': 3, 'dash': 'dash'}, name='Prophet Forecast')
])
fig.update_layout(title='<b>15 days forecasting using Prophet</b>')
fig = clean_fig(fig)
write_image(fig, 'forecast_prophet')
fig.show()

INFO:prophet:Disabling yearly seasonality. Run prophet with yearly_seasonality=True to override this.
INFO:prophet:Disabling daily seasonality. Run prophet with daily_seasonality=True to override this.
DEBUG:cmdstanpy:input tempfile: /tmp/tmpqqj7i24d/z4x821jy.json
DEBUG:cmdstanpy:input tempfile: /tmp/tmpqqj7i24d/2442k0co.json
DEBUG:cmdstanpy:idx 0
DEBUG:cmdstanpy:running CmdStan, num_threads: None
DEBUG:cmdstanpy:CmdStan args: ['/usr/local/lib/python3.7/dist-packages/prophet/stan_model/prophet_model.bin', 'random', 'seed=52119', 'data', 'file=/tmp/tmpqqj7i24d/z4x821jy.json', 'init=/tmp/tmpqqj7i24d/2442k0co.json', 'output', 'file=/tmp/tmpqqj7i24d/prophet_modellviq76c3/prophet_model-20221108141855.csv', 'method=optimize', 'algorithm=newton', 'iter=10000']
14:18:55 - cmdstanpy - INFO - Chain [1] start processing
INFO:cmdstanpy:Chain [1] start processing
14:18:55 - cmdstanpy - INFO - Chain [1] done processing
INFO:cmdstanpy:Chain [1] done processing
