In [1]:
import pandas as pd
pd.set_option('display.max_colwidth', None)

import numpy as np

import plotly.express as px
import plotly.graph_objects as go

from plotly.subplots import make_subplots
from prophet import Prophet

  from .autonotebook import tqdm as notebook_tqdm


In [None]:
df_raw = pd.read_csv('random_date.csv')
df_raw.head(4)

Unnamed: 0,Date,First-Time Downloads
0,1/1/23,2424.0
1,1/2/23,2768.0
2,1/3/23,2411.0
3,1/4/23,2220.0


In [4]:
def data_preprocess(df_raw):
    df = df_raw.rename(columns={
        'Date': 'act_date',
        'First-Time Downloads': 'ft_dl'
    })
    df['act_date'] = pd.to_datetime(df['act_date'], format="%m/%d/%y")
    return df 

df1 = data_preprocess(df_raw)

In [9]:
# visually inspect the data 
px.line(df1, x='act_date', y='ft_dl')

In [None]:
# finding outliers with isolation forest 
from sklearn.ensemble import IsolationForest

iso_forest = IsolationForest(contamination=0.004, random_state=42)
df1['outlier_score'] = iso_forest.fit_predict(df1[['ft_dl']]) 
df1['is_outlier'] = df1['outlier_score'] == -1 
df1['outlier_viz'] = np.where(df1['outlier_score'] == 1, 0, np.where(df1['outlier_score'] == -1, -400, np.nan))

px.line(df1, x='act_date', y=['ft_dl', 'outlier_viz'])
# lowest moments are xmas day, which make sense - no need to do do any outlier replacement 

In [14]:
# outlier handling 
# df1['smoothed_ft_dl'] = df1['ft_dl'].copy()
# df1.loc[df1['is_outlier'], 'smoothed_ft_dl'] = np.nan  # Mark outliers as NaN
# df1['smoothed_ft_dl'] = df1['smoothed_ft_dl'].interpolate(method='linear',limit=5, limit_direction='backward' )
# px.line(df1, x='act_date', y=['ft_dl', 'outlier_viz', 'smoothed_ft_dl'])

In [19]:
intervention_date = '2024-08-23'

uk_data = df1[['act_date', 'ft_dl']].copy()
uk_data['treatment'] = (uk_data['act_date'] > "2024-08-23").astype(int)
uk_data.set_index('act_date', inplace=True) 
uk_weekly = uk_data.resample('W').sum().reset_index()
uk_weekly.head(4)

Unnamed: 0,act_date,ft_dl,treatment
0,2023-01-01,2424.0,0
1,2023-01-08,16987.0,0
2,2023-01-15,21564.0,0
3,2023-01-22,21801.0,0


In [22]:
uk_weekly.iloc[1:-1].tail(4)

Unnamed: 0,act_date,ft_dl,treatment
111,2025-02-16,21663.0,7
112,2025-02-23,24498.0,7
113,2025-03-02,20520.0,7
114,2025-03-09,19829.0,7


In [None]:
uk_weekly1 = uk_weekly.iloc[1:-1].reset_index(drop=True)
# treatment_week_index = uk_weekly1[uk_weekly1['treatment'] != 7].index


uk_weekly1.rename(columns={
    'act_date':'ds',
    'ft_dl': 'y'
}, inplace=True)

In [28]:
px.line(uk_weekly1, x='ds', y='y')

In [None]:
uk_weekly1[84:88]

In [58]:

def prophet_modelling(df): 
    model = Prophet(
    yearly_seasonality=4,  
    n_changepoints=2,
                    interval_width=0.80,
                    mcmc_samples=3000,
                    seasonality_mode='additive',
                    weekly_seasonality=True)
    model.fit(df)
    return model 

def prophet_predict(model, num_weeks): 
    future = model.make_future_dataframe(freq='W', periods=num_weeks)
    forecast = model.predict(future)
    return forecast 

def prophet_plot(forecast, train_data, test_data, title):
    # Create the base scatter plot for training data
    fig = px.scatter(train_data, x='ds', y='y')
    
    # Add forecasted values (yhat) as a line trace
    fig.add_trace(go.Scatter(x=forecast['ds'], y=forecast['yhat'], 
                             mode='lines', name='Prophet Forecast', line=dict(color='blue')))
    
    # Add the shaded confidence interval (between yhat_lower and yhat_upper)
    fig.add_trace(go.Scatter(
        x=forecast['ds'], y=forecast['yhat_lower'],
        mode='lines', line=dict(color='cornflowerblue', dash='dot'), name='Lower Bound'
    ))
    
    fig.add_trace(go.Scatter(
        x=forecast['ds'], y=forecast['yhat_upper'],
        mode='lines', line=dict(color='cornflowerblue', dash='dot'), name='Upper Bound',
        fill='tonexty',  # Fill area between yhat_lower and yhat_upper
        fillcolor='rgba(100, 149, 237, 0.3)'  # Lighter blue (RGBA)
    ))
    fig.add_trace(go.Scatter(x=test_data['ds'], y=test_data['y'], 
                             mode='markers', name='Actual', marker=dict(color='red')))
    fig.update_layout({
    "xaxis_title": None,
    "yaxis_title": None,
    "title": title,
    "title_x": 0.5, 
    },
    legend=dict(x=0.23, y=1.08, orientation="h"))
    fig.show()


In [None]:
treatment_week_index = 86
df_train = uk_weekly1[:treatment_week_index]
df_test = uk_weekly1[treatment_week_index:]

num_weeks_to_predict = len(uk_weekly1[treatment_week_index:])

uk_model = prophet_modelling(df_train)


14:43:04 - cmdstanpy - INFO - CmdStan start processing
chain 1 |[33m          [0m| 00:00 Status
[A

chain 1 |[33m▋         [0m| 00:00 Iteration:    1 / 3000 [  0%]  (Warmup)

[A[A
chain 1 |[33m▉         [0m| 00:00 Iteration:  100 / 3000 [  3%]  (Warmup)

[A[A
chain 1 |[33m█▎        [0m| 00:01 Iteration:  200 / 3000 [  6%]  (Warmup)

[A[A
[A

chain 1 |[33m█▌        [0m| 00:01 Iteration:  300 / 3000 [ 10%]  (Warmup)
[A

chain 1 |[33m█▉        [0m| 00:02 Iteration:  400 / 3000 [ 13%]  (Warmup)
[A

chain 1 |[33m██▏       [0m| 00:03 Iteration:  500 / 3000 [ 16%]  (Warmup)
[A

chain 1 |[33m██▌       [0m| 00:03 Iteration:  600 / 3000 [ 20%]  (Warmup)
chain 1 |[33m██▊       [0m| 00:04 Iteration:  700 / 3000 [ 23%]  (Warmup)

[A[A
[A

chain 1 |[33m███▏      [0m| 00:04 Iteration:  800 / 3000 [ 26%]  (Warmup)
[A

chain 1 |[33m███▍      [0m| 00:05 Iteration:  900 / 3000 [ 30%]  (Warmup)
[A

chain 1 |[33m███▊      [0m| 00:05 Iteration: 1000 / 3000 [ 33%]  (Wa

                                                                                                                                                                                                                                                                                                                                


14:43:21 - cmdstanpy - INFO - CmdStan done processing.
Exception: normal_id_glm_lpdf: Scale vector is 0, but must be positive finite! (in '/private/var/folders/k1/30mswbxs7r1g6zwn8y4fyt500000gp/T/abs_2enbhzq__a/croot/prophet_1718022785156/work/build/lib.macosx-11.1-arm64-cpython-311/prophet/stan_model/prophet.stan', line 137, column 2 to line 142, column 4)
Exception: normal_id_glm_lpdf: Scale vector is 0, but must be positive finite! (in '/private/var/folders/k1/30mswbxs7r1g6zwn8y4fyt500000gp/T/abs_2enbhzq__a/croot/prophet_1718022785156/work/build/lib.macosx-11.1-arm64-cpython-311/prophet/stan_model/prophet.stan', line 137, column 2 to line 142, column 4)
Exception: normal_id_glm_lpdf: Scale vector is 0, but must be positive finite! (in '/private/var/folders/k1/30mswbxs7r1g6zwn8y4fyt500000gp/T/abs_2enbhzq__a/croot/prophet_1718022785156/work/build/lib.macosx-11.1-arm64-cpython-311/prophet/stan_model/prophet.stan', line 137, column 2 to line 142, column 4)
Exception: normal_id_glm_lpdf




In [59]:
prophet_plot(prophet_predict(uk_model, num_weeks_to_predict), df_train, df_test, 'UK iOS App Store Connect First Time Downloads')

In [60]:
px.line(uk_weekly1, x='ds', y='y')