# Definitions

In [1]:
from pathlib import Path

from tqdm.notebook import tqdm

import pandas as pd

from sklearn.metrics import mean_absolute_error
from sklearn.model_selection import TimeSeriesSplit
from prophet import Prophet

In [2]:
data_path = Path('..', 'data')

In [3]:
def trunc_to_week(dt_series):
    return pd.PeriodIndex(dt_series, freq='W-Sun')

# Data

In [4]:
train = pd.read_csv(data_path.joinpath('train_data.csv'))

In [5]:
train.head()

Unnamed: 0,id,timestamp,rto_day,traffic,region_nm
0,1,2020-08-13,117135.29,638.0,Аваллонэ
1,1,2020-11-26,116102.2,619.0,Аваллонэ
2,2,2021-01-13,102750.37,461.0,Валимар
3,3,2019-07-03,100223.2,603.0,Ильмарин
4,3,2020-12-10,126296.59,624.0,Ильмарин


In [6]:
train.shape

(818000, 5)

In [7]:
train.timestamp = pd.to_datetime(train.timestamp)

In [8]:
train = train.groupby([trunc_to_week(train.timestamp), train.id, train.region_nm]).sum().reset_index()

In [9]:
train.timestamp = pd.to_datetime(train.timestamp.astype(str).str[:10])

# Raw RTO Prophet. model per id

## Test Routine

In [10]:
tdf = train[train['id'] == 1].sort_values('timestamp')

In [11]:
tdf.shape

(117, 5)

In [12]:
train_tdf = tdf.iloc[:-4]
test_tdf = tdf.iloc[-4:]

In [13]:
model = Prophet()

In [14]:
prophet_tdf = train_tdf[['timestamp',
                        'rto_day']].rename({'timestamp': 'ds',
                                            'rto_day': 'y'}, axis=1)

In [15]:
prophet_tdf.head()

Unnamed: 0,ds,y
0,2018-12-31,467010.11
1000,2019-01-07,686159.59
2000,2019-01-14,723778.15
3000,2019-01-21,707086.52
4000,2019-01-28,676487.97


In [16]:
model.fit(prophet_tdf)

INFO:prophet:Disabling weekly seasonality. Run prophet with weekly_seasonality=True to override this.
INFO:prophet:Disabling daily seasonality. Run prophet with daily_seasonality=True to override this.


<prophet.forecaster.Prophet at 0x7f8d0f0a6730>

In [17]:
prophet_test_tdf = test_tdf[['timestamp',
                        'rto_day']].rename({'timestamp': 'ds',
                                            'rto_day': 'y'}, axis=1)

In [18]:
predicts = model.predict(prophet_test_tdf)
predicts.head()

Unnamed: 0,ds,trend,yhat_lower,yhat_upper,trend_lower,trend_upper,additive_terms,additive_terms_lower,additive_terms_upper,yearly,yearly_lower,yearly_upper,multiplicative_terms,multiplicative_terms_lower,multiplicative_terms_upper,yhat
0,2021-03-01,944770.125498,961968.592734,1075169.0,944770.125498,944770.125498,72728.254072,72728.254072,72728.254072,72728.254072,72728.254072,72728.254072,0.0,0.0,0.0,1017498.0
1,2021-03-08,947610.89706,963682.930721,1075277.0,947610.766342,947611.018431,71593.742083,71593.742083,71593.742083,71593.742083,71593.742083,71593.742083,0.0,0.0,0.0,1019205.0
2,2021-03-15,950451.668623,959736.529801,1080537.0,950451.261643,950452.052334,72290.259903,72290.259903,72290.259903,72290.259903,72290.259903,72290.259903,0.0,0.0,0.0,1022742.0
3,2021-03-22,953292.440186,980114.206107,1093481.0,953291.71851,953293.121909,83144.03629,83144.03629,83144.03629,83144.03629,83144.03629,83144.03629,0.0,0.0,0.0,1036436.0


In [19]:
validate_tdf = pd.merge(prophet_test_tdf, predicts[['ds', 'yhat']], on='ds')

In [20]:
validate_tdf.shape

(4, 3)

In [21]:
week_val_tdf = validate_tdf.groupby(trunc_to_week(validate_tdf.ds)).sum()

In [22]:
mean_absolute_error(week_val_tdf.y, week_val_tdf.yhat)

25881.88688088153

## Routine

In [23]:
splitter = TimeSeriesSplit(n_splits=5, test_size=4)

In [24]:
model_dict = {}
for id_, df in tqdm(train.sort_values('timestamp').groupby('id')):
    prophet_df = df[['timestamp',
                        'rto_day']].rename({'timestamp': 'ds',
                                            'rto_day': 'y'}, axis=1).reset_index(drop=True)
    model_mae = []
    for train_ix, test_ix in splitter.split(prophet_df):
        train_df = prophet_df.loc[train_ix]
        test_df = prophet_df.loc[test_ix]
        
        model = Prophet()
        model.fit(train_df)
        
        predicts = model.predict(test_df[['ds']])
        val_df = pd.merge(test_df, predicts[['ds', 'yhat']],
                          on='ds', how='inner')
        model_mae.append(mean_absolute_error(val_df['y'],
                                             val_df['yhat']))
    model_dict[id_] = model_mae

HBox(children=(HTML(value=''), FloatProgress(value=0.0, max=1000.0), HTML(value='')))

INFO:prophet:Disabling yearly seasonality. Run prophet with yearly_seasonality=True to override this.
INFO:prophet:Disabling weekly seasonality. Run prophet with weekly_seasonality=True to override this.
INFO:prophet:Disabling daily seasonality. Run prophet with daily_seasonality=True to override this.
INFO:prophet:Disabling yearly seasonality. Run prophet with yearly_seasonality=True to override this.
INFO:prophet:Disabling weekly seasonality. Run prophet with weekly_seasonality=True to override this.
INFO:prophet:Disabling daily seasonality. Run prophet with daily_seasonality=True to override this.
INFO:prophet:Disabling yearly seasonality. Run prophet with yearly_seasonality=True to override this.
INFO:prophet:Disabling weekly seasonality. Run prophet with weekly_seasonality=True to override this.
INFO:prophet:Disabling daily seasonality. Run prophet with daily_seasonality=True to override this.
INFO:prophet:Disabling weekly seasonality. Run prophet with weekly_seasonality=True to o

INFO:prophet:Disabling yearly seasonality. Run prophet with yearly_seasonality=True to override this.
INFO:prophet:Disabling weekly seasonality. Run prophet with weekly_seasonality=True to override this.
INFO:prophet:Disabling daily seasonality. Run prophet with daily_seasonality=True to override this.
INFO:prophet:Disabling yearly seasonality. Run prophet with yearly_seasonality=True to override this.
INFO:prophet:Disabling weekly seasonality. Run prophet with weekly_seasonality=True to override this.
INFO:prophet:Disabling daily seasonality. Run prophet with daily_seasonality=True to override this.
INFO:prophet:Disabling weekly seasonality. Run prophet with weekly_seasonality=True to override this.
INFO:prophet:Disabling daily seasonality. Run prophet with daily_seasonality=True to override this.
INFO:prophet:Disabling weekly seasonality. Run prophet with weekly_seasonality=True to override this.
INFO:prophet:Disabling daily seasonality. Run prophet with daily_seasonality=True to ove

INFO:prophet:Disabling yearly seasonality. Run prophet with yearly_seasonality=True to override this.
INFO:prophet:Disabling weekly seasonality. Run prophet with weekly_seasonality=True to override this.
INFO:prophet:Disabling daily seasonality. Run prophet with daily_seasonality=True to override this.
INFO:prophet:Disabling weekly seasonality. Run prophet with weekly_seasonality=True to override this.
INFO:prophet:Disabling daily seasonality. Run prophet with daily_seasonality=True to override this.
INFO:prophet:Disabling weekly seasonality. Run prophet with weekly_seasonality=True to override this.
INFO:prophet:Disabling daily seasonality. Run prophet with daily_seasonality=True to override this.
INFO:prophet:Disabling yearly seasonality. Run prophet with yearly_seasonality=True to override this.
INFO:prophet:Disabling weekly seasonality. Run prophet with weekly_seasonality=True to override this.
INFO:prophet:Disabling daily seasonality. Run prophet with daily_seasonality=True to ove

INFO:prophet:Disabling weekly seasonality. Run prophet with weekly_seasonality=True to override this.
INFO:prophet:Disabling daily seasonality. Run prophet with daily_seasonality=True to override this.
INFO:prophet:Disabling weekly seasonality. Run prophet with weekly_seasonality=True to override this.
INFO:prophet:Disabling daily seasonality. Run prophet with daily_seasonality=True to override this.
INFO:prophet:Disabling yearly seasonality. Run prophet with yearly_seasonality=True to override this.
INFO:prophet:Disabling weekly seasonality. Run prophet with weekly_seasonality=True to override this.
INFO:prophet:Disabling daily seasonality. Run prophet with daily_seasonality=True to override this.
INFO:prophet:Disabling yearly seasonality. Run prophet with yearly_seasonality=True to override this.
INFO:prophet:Disabling weekly seasonality. Run prophet with weekly_seasonality=True to override this.
INFO:prophet:Disabling daily seasonality. Run prophet with daily_seasonality=True to ove

INFO:prophet:Disabling daily seasonality. Run prophet with daily_seasonality=True to override this.
INFO:prophet:Disabling yearly seasonality. Run prophet with yearly_seasonality=True to override this.
INFO:prophet:Disabling weekly seasonality. Run prophet with weekly_seasonality=True to override this.
INFO:prophet:Disabling daily seasonality. Run prophet with daily_seasonality=True to override this.
INFO:prophet:Disabling yearly seasonality. Run prophet with yearly_seasonality=True to override this.
INFO:prophet:Disabling weekly seasonality. Run prophet with weekly_seasonality=True to override this.
INFO:prophet:Disabling daily seasonality. Run prophet with daily_seasonality=True to override this.
INFO:prophet:Disabling yearly seasonality. Run prophet with yearly_seasonality=True to override this.
INFO:prophet:Disabling weekly seasonality. Run prophet with weekly_seasonality=True to override this.
INFO:prophet:Disabling daily seasonality. Run prophet with daily_seasonality=True to ove

INFO:prophet:Disabling daily seasonality. Run prophet with daily_seasonality=True to override this.





KeyboardInterrupt: 

# Predict

In [25]:
train.tail()

Unnamed: 0,timestamp,id,region_nm,rto_day,traffic
116995,2021-03-22,996,Валимар,1478178.43,5582.0
116996,2021-03-22,997,Хоббитон,736838.25,2920.0
116997,2021-03-22,998,Утумно,774279.29,2623.0
116998,2021-03-22,999,Хоббитон,1401230.57,4011.0
116999,2021-03-22,1000,Ниндамос,763220.55,4166.0


In [42]:
base_df = pd.DataFrame({'ds': pd.date_range('2021-03-29', '2021-04-19', 4)})

In [43]:
base_df

Unnamed: 0,ds
0,2021-03-29
1,2021-04-05
2,2021-04-12
3,2021-04-19


In [46]:
model_dict = {}
for id_, df in tqdm(train.sort_values('timestamp').groupby('id')):
    prophet_df = df[['timestamp',
                        'rto_day']].rename({'timestamp': 'ds',
                                            'rto_day': 'y'}, axis=1)

    model = Prophet(weekly_seasonality=False, daily_seasonality=False)
    model.fit(prophet_df)
    predicts = model.predict(base_df)[['ds', 'yhat']]
    model_dict[id_] = {'model': model,
                       'predicts': predicts}

HBox(children=(HTML(value=''), FloatProgress(value=0.0, max=1000.0), HTML(value='')))




In [55]:
predicts = []
ids = []
for id_, model_pack in model_dict.items():
    predicts.append(model_pack['predicts'].set_index('ds'))
    ids.append(id_)

In [64]:
submit = pd.concat(predicts, axis=1)
submit.columns = ids
submit = submit.T
submit.columns = [f'week_{i}' for i in range(1, 5)]
submit

Unnamed: 0,week_1,week_2,week_3,week_4
1,1.052623e+06,1.068391e+06,1.064888e+06,1.048131e+06
2,7.708177e+05,7.916776e+05,8.082826e+05,8.181027e+05
3,8.415752e+05,8.583976e+05,8.567536e+05,8.484623e+05
4,1.235297e+06,1.277411e+06,1.287195e+06,1.272152e+06
5,8.904965e+05,9.120269e+05,9.091422e+05,8.951387e+05
...,...,...,...,...
996,1.663629e+06,1.706790e+06,1.769019e+06,1.812043e+06
997,7.971706e+05,8.291948e+05,8.492270e+05,8.508828e+05
998,9.505667e+05,1.004575e+06,1.040309e+06,1.060401e+06
999,1.402828e+06,1.433215e+06,1.470379e+06,1.514563e+06


In [65]:
submit.to_csv(data_path.joinpath('new_submit.csv'), index_label='id')