### Library

In [1]:
# load library
import pandas as pd
import numpy as np
import warnings
warnings.filterwarnings('ignore')

### Data Prepocessing

In [2]:
# load dataset
train = pd.read_csv('train.csv')

In [3]:
train = train[['periode','kas_kantor','kas_echannel']]

# calculate lag-1
train['kas_kantor_lag1'] = train.kas_kantor - train.kas_kantor.shift(1)
train['kas_echannel_lag1'] = train.kas_echannel - train.kas_echannel.shift(1)

# extract weekofday & day
train['periode_new'] = pd.to_datetime(train['periode'])
train['week'] = train['periode_new'].dt.dayofweek + 1
train['day'] = train['periode_new'].dt.day
train.head()

Unnamed: 0,periode,kas_kantor,kas_echannel,kas_kantor_lag1,kas_echannel_lag1,periode_new,week,day
0,2019-07-31,1928940000.0,2939100000.0,,,2019-07-31,3,31
1,2019-08-01,78491260000.0,2837250000.0,76562320000.0,-101850000.0,2019-08-01,4,1
2,2019-08-02,391762000000.0,2948050000.0,313270800000.0,110800000.0,2019-08-02,5,2
3,2019-08-03,393013300000.0,3693800000.0,1251294000.0,745750000.0,2019-08-03,6,3
4,2019-08-04,305720600000.0,3829300000.0,-87292740000.0,135500000.0,2019-08-04,7,4


In [4]:
# mean by week
agg_by_week = (train.groupby(['week'], as_index=False)
         .agg({'kas_kantor_lag1':'mean','kas_echannel_lag1':'mean'})
              .rename(columns={'kas_kantor_lag1':'mean_by_week_kantor', 'kas_echannel_lag1':'mean_by_week_echannel'}))

In [5]:
# mean by day
agg_by_day = (train.groupby(['day'], as_index=False)
         .agg({'kas_kantor_lag1':'mean','kas_echannel_lag1':'mean'})
              .rename(columns={'kas_kantor_lag1':'mean_by_day_kantor', 'kas_echannel_lag1':'mean_by_day_echannel'}))

### Predict with Validation Dataset

In [6]:
# join with aggregation data
test = train[train.periode_new>='2020-08-31']
test = test.merge(agg_by_day, on='day', how='left')
test = test.merge(agg_by_week, on='week', how='left')

# weighted mean between mean by day & mean by week
test['weighted_mean_kantor'] = 0.1*test['mean_by_day_kantor']+0.9*test['mean_by_week_kantor']
test['weighted_mean_echannel'] = 0.1*test['mean_by_day_echannel']+0.9*test['mean_by_week_echannel']
import copy
test2 = copy.deepcopy(test)
test = test[test.periode_new>='2020-09-01']
test = test.reset_index(drop=True)

In [7]:
# predict
test['predict_kantor']=0
test['predict_echannel']=0
for i in list(range(len(test))):
    if i==0:
        test['predict_kantor'][i] = test.loc[i,'weighted_mean_kantor']+test2.loc[i,'kas_kantor']
        test['predict_echannel'][i] = test.loc[i,'weighted_mean_echannel']+test2.loc[i,'kas_echannel']
    else:
        test['predict_kantor'][i] = test.loc[i,'weighted_mean_kantor']+test.loc[i-1,'predict_kantor']
        test['predict_echannel'][i] = test.loc[i,'weighted_mean_echannel']+test.loc[i-1,'predict_echannel']
test['absolute_error_kantor'] = np.abs((test['predict_kantor']-test['kas_kantor'])/test['kas_kantor'])
test['absolute_error_echannel'] = np.abs((test['predict_echannel']-test['kas_echannel'])/test['kas_echannel'])

# evaluation score
print('MAPE kantor : ',np.mean(test.absolute_error_kantor),'\n')
print('MAPE echannel : ',np.mean(test.absolute_error_echannel),'\n')
print('MAPE overal : ',((np.mean(test.absolute_error_kantor))+(np.mean(test.absolute_error_echannel)))/2)

MAPE kantor :  0.00995099850106269 

MAPE echannel :  0.0943262362728701 

MAPE overal :  0.052138617386966395


In [8]:
test[['periode','predict_kantor','predict_echannel']].head()

Unnamed: 0,periode,predict_kantor,predict_echannel
0,2020-09-01,10537183738407,5032020526
1,2020-09-02,10557335515539,4928745954
2,2020-09-03,10595066505732,4794554061
3,2020-09-04,10790124360768,5002942737
4,2020-09-05,10793660312421,5421086665


### Forecast 30days ahead

In [9]:
# data preparation
datelist = pd.date_range('2020-10-01', periods=31) # by default, starting date forecast = 2020-10-01 , n_periode = 31
df_submission = pd.DataFrame()
df_submission['periode'] = datelist
df_submission['periode_new'] = pd.to_datetime(df_submission['periode'])
df_submission['week'] = df_submission['periode_new'].dt.dayofweek + 1
df_submission['day'] = df_submission['periode_new'].dt.day

In [10]:
# weighted mean between mean by day & mean by week
df_submission = df_submission.merge(agg_by_day, on='day', how='left')
df_submission = df_submission.merge(agg_by_week, on='week', how='left')
df_submission['weighted_mean_kantor'] = 0.1*df_submission['mean_by_day_kantor']+0.9*df_submission['mean_by_week_kantor']
df_submission['weighted_mean_echannel'] = 0.1*df_submission['mean_by_day_echannel']+0.9*df_submission['mean_by_week_echannel']

In [11]:
#forecast
df_submission['predict_kantor']=0
df_submission['predict_echannel']=0
for i in list(range(len(df_submission))):
    if i==0:
        df_submission['predict_kantor'][i] = df_submission.loc[i,'weighted_mean_kantor']+test.loc[len(test)-1,'kas_kantor']
        df_submission['predict_echannel'][i] = df_submission.loc[i,'weighted_mean_echannel']+test.loc[len(test)-1,'kas_echannel']
    else:
        df_submission['predict_kantor'][i] = df_submission.loc[i,'weighted_mean_kantor']+df_submission.loc[i-1,'predict_kantor']
        df_submission['predict_echannel'][i] = df_submission.loc[i,'weighted_mean_echannel']+df_submission.loc[i-1,'predict_echannel']

In [12]:
df_submission[['periode','predict_kantor','predict_echannel']].head()

Unnamed: 0,periode,predict_kantor,predict_echannel
0,2020-10-01,11432309813598,5168871678
1,2020-10-02,11621797035903,5377178926
2,2020-10-03,11631565127158,5807776783
3,2020-10-04,11543143630096,5658838164
4,2020-10-05,11550018859079,5569533322


In [13]:
# save data for submission
submission = pd.DataFrame({'index':list(range(0,62)), 'value':list(df_submission.predict_kantor.values)+list(df_submission.predict_echannel.values)})
submission.to_csv('cro_submission_python.csv', index=False)