In [45]:
%matplotlib inline
import matplotlib.pyplot as plt
import numpy as np
import pandas as pd

import statsmodels as sm
from statsmodels.tsa.api import ARDL
from statsmodels.tsa.ardl import ardl_select_order

In [38]:
FILEPATH = '~/datasets/momentum/pageview_new_accounts_8-7-22.csv'

pageview_accounts_df = pd.read_csv(FILEPATH)

pageview_accounts_df = pageview_accounts_df.loc[pageview_accounts_df['wiki_age'] != pageview_accounts_df['wiki_age'].max()]
pageview_accounts_df = pd.concat((pageview_accounts_df,pd.get_dummies(pageview_accounts_df['month'],prefix='month')),axis=1)
pageview_accounts_df = sm.tools.add_constant(pageview_accounts_df)

In [39]:
pageview_accounts_df

Unnamed: 0,const,month,year,num_pageviews,num_new_accounts,num_articles,wiki_age,month_1,month_2,month_3,month_4,month_5,month_6,month_7,month_8,month_9,month_10,month_11,month_12
0,1.0,5,2015,6113855894,66099,19464270,5,0,0,0,0,1,0,0,0,0,0,0,0
1,1.0,6,2015,5943640901,62862,19563376,6,0,0,0,0,0,1,0,0,0,0,0,0
2,1.0,7,2015,6046735010,62370,19834768,7,0,0,0,0,0,0,1,0,0,0,0,0
3,1.0,8,2015,6021283659,62402,20071547,8,0,0,0,0,0,0,0,1,0,0,0,0
4,1.0,9,2015,6059240560,65628,20167477,9,0,0,0,0,0,0,0,0,1,0,0,0
...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...
82,1.0,3,2022,6222799117,49194,33646356,87,0,0,1,0,0,0,0,0,0,0,0,0
83,1.0,4,2022,5920294077,44585,33740626,88,0,0,0,1,0,0,0,0,0,0,0,0
84,1.0,5,2022,5982361801,43647,33853815,89,0,0,0,0,1,0,0,0,0,0,0,0
85,1.0,6,2022,5700627282,41161,33957286,90,0,0,0,0,0,1,0,0,0,0,0,0


In [40]:
y = pageview_accounts_df['num_new_accounts']
X = pageview_accounts_df[['num_pageviews','wiki_age','month_2','month_3','month_4','month_5','month_6','month_7','month_8','month_9','month_10','month_11','month_12','const']]

model = sm.api.OLS(y, X)
results = model.fit()
results.summary()

0,1,2,3
Dep. Variable:,num_new_accounts,R-squared:,0.868
Model:,OLS,Adj. R-squared:,0.845
Method:,Least Squares,F-statistic:,37.03
Date:,"Thu, 11 Aug 2022",Prob (F-statistic):,7.44e-27
Time:,18:54:24,Log-Likelihood:,-818.58
No. Observations:,87,AIC:,1665.0
Df Residuals:,73,BIC:,1700.0
Df Model:,13,,
Covariance Type:,nonrobust,,

0,1,2,3,4,5,6
,coef,std err,t,P>|t|,[0.025,0.975]
num_pageviews,7.672e-06,1.25e-06,6.138,0.000,5.18e-06,1.02e-05
wiki_age,-261.2704,13.961,-18.714,0.000,-289.094,-233.446
month_2,1505.2014,1927.799,0.781,0.437,-2336.896,5347.299
month_3,2554.4221,1756.777,1.454,0.150,-946.830,6055.675
month_4,-766.1085,1786.436,-0.429,0.669,-4326.471,2794.254
month_5,-2592.8284,1735.346,-1.494,0.139,-6051.368,865.711
month_6,-5066.5222,1959.820,-2.585,0.012,-8972.438,-1160.606
month_7,-6982.8369,1845.844,-3.783,0.000,-1.07e+04,-3304.076
month_8,-5177.4077,1885.748,-2.746,0.008,-8935.697,-1419.118

0,1,2,3
Omnibus:,1.378,Durbin-Watson:,0.566
Prob(Omnibus):,0.502,Jarque-Bera (JB):,1.138
Skew:,-0.043,Prob(JB):,0.566
Kurtosis:,2.446,Cond. No.,164000000000.0


In [63]:
y = pageview_accounts_df['num_new_accounts']
X = pageview_accounts_df[['num_pageviews']]

sel_res = ardl_select_order(
    y, 12, X, 12, ic="aic", trend="c"
)

for i, val in enumerate(sel_res.aic.head(10)):
    print(f"{i+1}: {val}")

1: (10, {'num_pageviews': 12})
2: (11, {'num_pageviews': 12})
3: (12, {'num_pageviews': 12})
4: (5, {'num_pageviews': 12})
5: (12, {'num_pageviews': 11})
6: (12, {'num_pageviews': 6})
7: (8, {'num_pageviews': 12})
8: (12, {'num_pageviews': 9})
9: (12, {'num_pageviews': 10})
10: (9, {'num_pageviews': 12})


In [71]:
y = pageview_accounts_df['num_new_accounts']
X = pageview_accounts_df[['num_pageviews']]
fixed_X = pageview_accounts_df[['wiki_age','month_2','month_3','month_4','month_5','month_6','month_7','month_8','month_9','month_10','month_11','month_12']]

sel_res = ardl_select_order(
    y, 12, X, 12, ic="bic", trend="c", fixed = fixed_X
)

for i, val in enumerate(sel_res.bic.head(10)):
    print(f"{i+1}: {val}")

1: (1, {'num_pageviews': 0})
2: (2, {'num_pageviews': 0})
3: (1, {'num_pageviews': 1})
4: (3, {'num_pageviews': 0})
5: (2, {'num_pageviews': 1})
6: (1, {'num_pageviews': 2})
7: (3, {'num_pageviews': 1})
8: (4, {'num_pageviews': 0})
9: (2, {'num_pageviews': 2})
10: (1, {'num_pageviews': 3})


KeyError: 0.0

In [76]:
'''
X = pageview_accounts_df[['num_pageviews','wiki_age','month_2','month_3','month_4','month_5','month_6','month_7','month_8','month_9','month_10','month_11','month_12']]
X_lags = {
    'num_pageviews':1,
    'wiki_age':0,
    'month_2':0,
    'month_3':0,
    'month_4':0,
    'month_5':0,
    'month_6':0,
    'month_7':0,
    'month_8':0,
    'month_9':0,
    'month_10':0,
    'month_11':0,
    'month_12':0,
}
'''

res = ARDL(
    y, 1, X, {'num_pageviews': 0}, trend="c", fixed = fixed_X
).fit()
res.summary()

0,1,2,3
Dep. Variable:,num_new_accounts,No. Observations:,87.0
Model:,"ARDL(1, 0)",Log Likelihood,-770.332
Method:,Conditional MLE,S.D. of innovations,1878.839
Date:,"Thu, 11 Aug 2022",AIC,1572.664
Time:,19:24:38,BIC,1611.933
Sample:,1,HQIC,1588.468
,87,,

0,1,2,3,4,5,6
,coef,std err,z,P>|z|,[0.025,0.975]
const,3637.5536,5946.577,0.612,0.543,-8219.585,1.55e+04
num_new_accounts.L1,0.6710,0.066,10.150,0.000,0.539,0.803
num_pageviews.L0,4.331e-06,8.65e-07,5.005,0.000,2.61e-06,6.06e-06
wiki_age,-95.0671,18.991,-5.006,0.000,-132.934,-57.201
month_2,-7669.5659,1528.238,-5.019,0.000,-1.07e+04,-4622.347
month_3,-2665.9155,1238.208,-2.153,0.035,-5134.831,-197.000
month_4,-9168.4442,1412.175,-6.492,0.000,-1.2e+04,-6352.648
month_5,-7675.3087,1265.745,-6.064,0.000,-1.02e+04,-5151.484
month_6,-1.098e+04,1382.952,-7.937,0.000,-1.37e+04,-8218.732


In [41]:
FILEPATH = '/home/jmads/datasets/momentum/active_editors_content_added_8-7-22.csv'

editors_content_df = pd.read_csv(FILEPATH)
editors_content_df = editors_content_df.loc[editors_content_df['wiki_age'] != editors_content_df['wiki_age'].max()]
editors_content_df = pd.concat((editors_content_df,pd.get_dummies(editors_content_df['month'],prefix='month')),axis=1)
editors_content_df = sm.tools.add_constant(editors_content_df)

In [42]:
y = editors_content_df['num_bytes_added']
X = editors_content_df[['num_active_editors','wiki_age','month_2','month_3','month_4','month_5','month_6','month_7','month_8','month_9','month_10','month_11','month_12','const']]

model = sm.api.OLS(y, X)
results = model.fit()
results.summary()

0,1,2,3
Dep. Variable:,num_bytes_added,R-squared:,0.833
Model:,OLS,Adj. R-squared:,0.824
Method:,Least Squares,F-statistic:,93.77
Date:,"Thu, 11 Aug 2022",Prob (F-statistic):,3.52e-87
Time:,18:55:15,Log-Likelihood:,-5535.6
No. Observations:,259,AIC:,11100.0
Df Residuals:,245,BIC:,11150.0
Df Model:,13,,
Covariance Type:,nonrobust,,

0,1,2,3,4,5,6
,coef,std err,t,P>|t|,[0.025,0.975]
num_active_editors,2.429e+04,699.392,34.731,0.000,2.29e+04,2.57e+04
wiki_age,-1.135e+07,4.93e+05,-23.044,0.000,-1.23e+07,-1.04e+07
month_2,1.486e+07,1.44e+08,0.103,0.918,-2.68e+08,2.98e+08
month_3,8.407e+06,1.44e+08,0.059,0.953,-2.75e+08,2.91e+08
month_4,6.895e+07,1.44e+08,0.480,0.632,-2.14e+08,3.52e+08
month_5,1.543e+08,1.44e+08,1.074,0.284,-1.29e+08,4.37e+08
month_6,2.217e+08,1.44e+08,1.542,0.124,-6.15e+07,5.05e+08
month_7,1.191e+08,1.44e+08,0.829,0.408,-1.64e+08,4.02e+08
month_8,8.517e+07,1.45e+08,0.586,0.558,-2.01e+08,3.72e+08

0,1,2,3
Omnibus:,33.172,Durbin-Watson:,0.165
Prob(Omnibus):,0.0,Jarque-Bera (JB):,54.591
Skew:,0.738,Prob(JB):,1.4e-12
Kurtosis:,4.696,Cond. No.,1390000.0
