**Author:** Jim Maddock  
**Created:** 8-31-22  
**Description:** OLS and Autoregressive Distributed Lag model (ARDL) comparison accross multiple language editions for Momentum.  Includes dataframes for RQ 1 (the relationship between readership and new editors) and RQ 2 (the relationship between active editors and content creation).  For a methods overview see [this document](https://docs.google.com/document/d/1FoAv1xFfmtMPX7PC33XZBSYaZGM0Lf5RBaGpFSCkRVk/edit?usp=sharing)

In [124]:
%matplotlib inline
import matplotlib.pyplot as plt
import numpy as np
import pandas as pd
import functools
import warnings

import statsmodels.api as sm
from statsmodels.tsa.api import ARDL
from statsmodels.tsa.ardl import ardl_select_order

In [103]:
def get_vars(df,possible_vars):    
    #df = df.loc[df['wiki_db'] == 'enwiki']
    var_map = {}
    group = df[possible_vars+['wiki_db']].groupby('wiki_db')
    for wiki, x in group:
        var_map[wiki] = possible_vars.copy()
        corr_table = x.corr()
        for var_1 in possible_vars:           
            for var_2,corr in corr_table[var_1].iteritems():
                if var_1 != var_2 and var_1 in var_map[wiki] and var_2 in var_map[wiki] and corr > .6:
                    var_map[wiki].remove(var_2)
    return var_map

In [96]:
def highlight_coefs(column,p_values):    
    styles = []
    pos = 'background-color: lightgreen;'
    neg = 'background-color: lightcoral;'
    default = ''
    
    for i,val in column.iteritems():
        if p_values.at[i,column.name] < .05 and val > 0:
            styles.append(pos)
        elif p_values.at[i,column.name] < .05 and val < 0:
            styles.append(neg)
        else:
            styles.append(default)
    
    return styles

In [3]:
FILEPATH = '/home/jmads/datasets/momentum/pageview_new_accounts_multi-wiki_8-24-22.csv'

pageview_accounts_df = pd.read_csv(FILEPATH)

pageview_accounts_df['datetime'] = pd.to_datetime(pageview_accounts_df['year'].astype(str)+'-'+pageview_accounts_df['month'].astype(str)+'-1')
pageview_accounts_df = pageview_accounts_df.loc[pageview_accounts_df['datetime'] != pageview_accounts_df['datetime'].max()]
pageview_accounts_df = pd.concat((pageview_accounts_df,pd.get_dummies(pageview_accounts_df['month'],prefix='month')),axis=1)
pageview_accounts_df = sm.tools.add_constant(pageview_accounts_df)

In [4]:
pageview_accounts_df

Unnamed: 0,const,month,year,wiki_db,num_pageviews,num_new_accounts,num_articles,year_1,month_1,wiki_age,...,month_3,month_4,month_5,month_6,month_7,month_8,month_9,month_10,month_11,month_12
0,1.0,5,2015,arwiki,99512972,3125,66698,2003,7,142,...,0,0,1,0,0,0,0,0,0,0
1,1.0,6,2015,arwiki,91875676,3041,67047,2003,7,143,...,0,0,0,1,0,0,0,0,0,0
2,1.0,7,2015,arwiki,96692677,3124,67407,2003,7,144,...,0,0,0,0,1,0,0,0,0,0
3,1.0,8,2015,arwiki,94689390,2765,67982,2003,7,145,...,0,0,0,0,0,1,0,0,0,0
4,1.0,9,2015,arwiki,93941035,2534,68326,2003,7,146,...,0,0,0,0,0,0,1,0,0,0
...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...
1224,1.0,3,2022,viwiki,59948309,1267,1274236,2002,11,232,...,1,0,0,0,0,0,0,0,0,0
1225,1.0,4,2022,viwiki,50054369,1007,1274814,2002,11,233,...,0,1,0,0,0,0,0,0,0,0
1226,1.0,5,2022,viwiki,47176896,1071,1275505,2002,11,234,...,0,0,1,0,0,0,0,0,0,0
1227,1.0,6,2022,viwiki,46589502,1037,1276093,2002,11,235,...,0,0,0,1,0,0,0,0,0,0


In [118]:
possible_vars = ['num_pageviews','wiki_age','num_articles']
var_map = get_vars(pageview_accounts_df,possible_vars)
var_map

{'arwiki': ['num_pageviews'],
 'bnwiki': ['num_pageviews'],
 'cawiki': ['num_pageviews', 'wiki_age'],
 'cswiki': ['num_pageviews', 'wiki_age'],
 'dewiki': ['num_pageviews', 'wiki_age'],
 'enwiki': ['num_pageviews', 'wiki_age'],
 'eswiki': ['num_pageviews', 'wiki_age'],
 'frwiki': ['num_pageviews', 'wiki_age'],
 'hiwiki': ['num_pageviews'],
 'idwiki': ['num_pageviews', 'wiki_age'],
 'jawiki': ['num_pageviews', 'wiki_age'],
 'ptwiki': ['num_pageviews', 'wiki_age'],
 'ruwiki': ['num_pageviews', 'wiki_age'],
 'viwiki': ['num_pageviews']}

# RQ 1 (Readers -> Editors) Model Results

## Model 1.1-multiwiki

The multi-wiki analysis for Model 1.1 compares the relationship between pageviews and new account creation for all 14 Wikipedia language editions within our multi-wiki dataset.  We compare statistical signifigance and coefficient directionality accross our 14 models in order to understand whether the relationships observed in English Wikipedia transfer to other language editions.  We leave comparison of the magnitude of coefficients to future work.

Comparison of Model 1.1 results indicates that the relationship between pageviews and new accounts remains remarkably consistant across most langauge editions in our subset.  *Pageviews* is statistically signifigant and positive in all langauge editions except Catalan and Russian (where it is not statistically signifigant) and Portuguese (where it is negative).  *WikiAge* is somewhat more variable.  In Catalan, Czech, German, English, and Russian *WikiAge* is negative; in French, Indonesian, Japanese, and Portuguese *WikiAge* is positive; in Spanish *WikiAge* is not statistically signifigant, and in the remaining language editions we ommitted the variable due to multicollinearity.

In [126]:
group = pageview_accounts_df.groupby('wiki_db')
model_coefs = pd.DataFrame()
model_pvalues = pd.DataFrame()

for wiki, x in group:
    y = x['num_new_accounts']
    X = x[var_map[wiki]+['month_2','month_3','month_4','month_5','month_6','month_7','month_8','month_9','month_10','month_11','month_12','const']]

    model = sm.OLS(y, X)
    results = model.fit()
    model_coefs = pd.concat([model_coefs,results.params.to_frame(wiki)],axis=1)
    model_pvalues = pd.concat([model_pvalues,results.pvalues.to_frame(wiki)],axis=1)

display(model_coefs.style.apply(functools.partial(highlight_coefs,p_values=model_pvalues)))

Unnamed: 0,arwiki,bnwiki,cawiki,cswiki,dewiki,enwiki,eswiki,frwiki,hiwiki,idwiki,jawiki,ptwiki,ruwiki,viwiki
num_pageviews,2.4e-05,3.9e-05,4e-06,1.1e-05,7e-06,8e-06,9e-06,1.4e-05,1.7e-05,9e-06,2e-06,-9e-06,-1e-06,1.7e-05
month_2,-7.709601,238.460337,-36.153484,55.545127,-98.498105,1505.201409,-514.006053,227.090314,-34.124037,-22.893629,-42.758735,-613.840922,-601.678698,-48.922796
month_3,-403.68916,173.038854,24.524909,96.256274,-68.00553,2554.422093,-242.839893,340.295548,-3.448003,26.534465,141.707034,448.097127,-224.154872,-22.187659
month_4,-413.794543,41.439894,-91.574419,25.524189,-372.700163,-766.108517,-750.561543,-397.103586,35.284107,-114.175349,49.996921,116.512341,-497.115141,-36.374457
month_5,-204.739697,9.335865,-117.679047,-12.440345,-350.598612,-2592.828379,-265.484613,-399.297781,28.298329,-43.842654,42.777194,283.46934,-511.675134,50.192914
month_6,-315.672877,-22.071074,-253.088972,-29.217653,-366.277059,-5066.52221,-320.005906,-335.996937,-7.701176,19.23168,-151.073928,-41.091269,-1360.030369,32.887969
month_7,-289.769707,119.443445,-336.732044,-84.676845,-708.744046,-6982.836936,-700.638391,-930.57077,7.098305,74.74278,-121.88698,-241.334311,-1468.975283,41.807171
month_8,-321.538094,145.628231,-377.883252,-82.294004,-725.05789,-5177.407742,-797.372254,-1267.179761,16.353694,12.608476,-31.394112,-147.026258,-1553.810809,96.188608
month_9,-287.561003,30.771336,-273.732071,-107.21162,-577.081303,-1292.649696,-672.227325,-697.275676,-1.879815,-107.210187,-202.809996,-411.620558,-1654.623096,-5.361026
month_10,-453.763827,35.299668,118.960136,5.887519,-407.81039,-908.261092,107.557543,-388.547437,-35.113028,-100.80536,-202.705392,117.194294,-775.674435,-18.653566


In [129]:
with warnings.catch_warnings(record=True):
    IV = 'num_pageviews'

    group = pageview_accounts_df.groupby('wiki_db')
    model_coefs = pd.DataFrame()
    model_pvalues = pd.DataFrame()

    for wiki, x in group:
        y = x['num_new_accounts']
        X = x[[IV]]
        fixed_X_vars = var_map[wiki]+['month_2','month_3','month_4','month_5','month_6','month_7','month_8','month_9','month_10','month_11','month_12']
        fixed_X_vars.remove(IV)
        fixed_X = x[fixed_X_vars]
        sel_res = ardl_select_order(
            y, 12, X, 12, ic="bic", trend="c", fixed = fixed_X
        )

        results = sel_res.model.fit()
        model_coefs = pd.concat([model_coefs,results.params.to_frame(wiki)],axis=1)
        model_pvalues = pd.concat([model_pvalues,results.pvalues.to_frame(wiki)],axis=1)

    display(model_coefs.sort_index().style.apply(functools.partial(highlight_coefs,p_values=model_pvalues)))

Unnamed: 0,arwiki,bnwiki,cawiki,cswiki,dewiki,enwiki,eswiki,frwiki,hiwiki,idwiki,jawiki,ptwiki,ruwiki,viwiki
const,27.083825,123.045476,1002.439587,335.241348,2557.766888,19513.763424,348.151722,-118.174687,61.71233,177.760967,283.891593,467.17273,1362.64142,195.035255
month_10,-113.991766,29.450483,118.960136,88.415635,-454.851395,-4559.429626,278.007046,-159.455899,-50.472345,-74.095059,-257.004003,6.233319,94.139972,3.122273
month_11,-273.925408,15.772543,83.508677,121.264993,-549.110852,-7431.784702,-374.238222,-391.92779,48.966855,-47.332775,-201.698897,-533.015014,-59.228562,-52.563356
month_12,67.599742,0.28185,-122.673707,-61.204205,-983.256118,-10585.864439,-693.66594,-889.649452,-32.62912,-35.844596,-85.817715,-722.174068,-197.266525,13.116422
month_2,-0.016081,228.313706,-36.153484,20.602912,-917.455708,-7669.565861,-750.375707,-706.147234,-56.888256,-47.323128,59.890347,-943.146174,-213.392652,-43.784145
month_3,-135.340973,109.920002,24.524909,54.200956,-405.844253,-2665.915501,-100.292777,-192.037858,15.003359,57.411409,-22.837296,139.643296,-110.802337,26.565845
month_4,-141.361554,-18.347796,-91.574419,-33.252309,-908.514404,-9168.444184,-954.959458,-1308.724294,10.996866,-159.349955,-70.95865,-710.856548,-256.305304,-22.565334
month_5,-56.383589,-4.607287,-117.679047,-73.163871,-664.778764,-7675.308702,-179.229797,-494.124713,7.242626,-41.445348,-169.05423,-368.245605,-175.379852,46.23471
month_6,-423.929643,-25.648205,-253.088972,-87.736436,-766.232634,-10976.259283,-657.948468,-903.208735,-34.736978,-3.926893,-139.090952,-575.123139,-489.452387,29.485581
month_7,140.905792,132.214337,-336.732044,-141.306875,-969.411839,-8324.941729,-868.602228,-1265.182691,19.411581,67.123084,-215.134375,-505.22907,-326.136529,47.137752


## Model 1.2-multiwiki

The consistencies between English and our other language editions we observe in Model 1.1 seem to translate to Model 1.2, our ARDL model specification.  The ARDL lag lengths remain mostly consistant accross all 14 language editions, where in 13 of the 14 language editions *NewAccounts* has at least a lag of 1, and in all 13 instances *NewAccounts<sub>t-1</sub>* is positive.  *PageViews*, our independant variable, is somewhat less consistant.  9 of the 14 language editions (including English) do not have lagged *PageViews* variables.  In Spanish, French, Hindi, Japanese, and Russian--the 4 language editions where Model 2.1 includes a *PageViews* varriable with a lag length of 1--the coefficient is negative, indicating that an increase in pageviews one month prior tends to result in a decrease in new account creation during the following month.  Both *WikiAge* and *Month* variables are inconsistant across our subset of language editions, indicating that age and seasonality seems to relate to new account creation in certain language editions (for example Catalan, German, English), but not all.

The notable takaway from this analysis is that, in all language editions except Portugese and Catalan, new accounts and pageviews seem to increase and decrease concurrently, which may indicate that exogenous factors influance both variables similarily.  In most language editions past pageviews does not seem to relate to future account creation, and when it does the relationship is inverted.  This relationship is opposite to what we would expect if reader attention (represented by pageviews) translated directly into new account creation.

In [101]:
group = pageview_accounts_df.groupby('wiki_db')
model_results_list = []
for wiki, x in group:
    y = x['num_new_accounts']
    X = x[var_map[wiki]+['month_2','month_3','month_4','month_5','month_6','month_7','month_8','month_9','month_10','month_11','month_12','const']]

    model = sm.OLS(y, X)
    results = model.fit()
    formatted_results = pd.concat([results.params.to_frame('coef'),results.pvalues.to_frame('p')],axis=1)
    model_results_list.append(formatted_results)

model_results_df = pd.concat(model_results_list,axis=1,keys=pageview_accounts_df['wiki_db'].unique())

display(model_results_df.transpose())

Unnamed: 0,Unnamed: 1,num_pageviews,month_2,month_3,month_4,month_5,month_6,month_7,month_8,month_9,month_10,month_11,month_12,const,wiki_age
arwiki,coef,2.386434e-05,-7.709601,-403.68916,-413.794543,-204.739697,-315.6729,-289.7697,-321.5381,-287.561,-453.763827,-336.808006,-65.572862,307.2173,
arwiki,p,1.721515e-10,0.985781,0.351684,0.339719,0.625012,0.4569097,0.4946363,0.4642002,0.5113468,0.295366,0.436571,0.879349,0.6067054,
bnwiki,coef,3.913417e-05,238.460337,173.038854,41.439894,9.335865,-22.07107,119.4434,145.6282,30.77134,35.299668,28.467062,13.608245,173.8373,
bnwiki,p,5.014524e-12,0.025407,0.101888,0.692848,0.926899,0.8282629,0.2417281,0.1678519,0.7692686,0.736414,0.786019,0.896743,0.06158083,
cawiki,coef,3.641015e-06,-36.153484,24.524909,-91.574419,-117.679047,-253.089,-336.732,-377.8833,-273.7321,118.960136,83.508677,-122.673707,1002.44,-1.855246
cawiki,p,0.5133503,0.375837,0.546928,0.027522,0.003692,1.054959e-07,2.992258e-09,3.497771e-11,1.916273e-08,0.005123,0.043933,0.005235,1.225247e-16,6.031461e-07
cswiki,coef,1.104765e-05,55.545127,96.256274,25.524189,-12.440345,-29.21765,-84.67685,-82.294,-107.2116,5.887519,81.90752,-9.802861,651.6854,-2.216228
cswiki,p,1.144462e-08,0.237039,0.029002,0.57176,0.76846,0.5642873,0.1635023,0.1661828,0.05565792,0.899265,0.073859,0.840981,1.249256e-10,3.489562e-05
dewiki,coef,7.044131e-06,-98.498105,-68.00553,-372.700163,-350.598612,-366.2771,-708.744,-725.0579,-577.0813,-407.81039,-277.897715,-612.584825,5432.656,-22.66559
dewiki,p,7.548608e-09,0.665269,0.739086,0.100007,0.109798,0.1458993,0.006064639,0.004719241,0.0325379,0.071275,0.203049,0.007111,2.041521e-05,7.969837000000001e-22


In [131]:
FILEPATH = '/home/jmads/datasets/momentum/active_editors_content_added_multi-wiki_8-24-22.csv'

editors_content_df = pd.read_csv(FILEPATH)
editors_content_df['datetime'] = pd.to_datetime(editors_content_df['year'].astype(str)+'-'+editors_content_df['month'].astype(str)+'-1')
editors_content_df = editors_content_df.loc[editors_content_df['datetime'] != editors_content_df['datetime'].max()]
editors_content_df = pd.concat((editors_content_df,pd.get_dummies(editors_content_df['month'],prefix='month')),axis=1)
editors_content_df = sm.tools.add_constant(editors_content_df)

In [132]:
possible_vars = ['num_active_editors','wiki_age','num_articles']
var_map = get_vars(editors_content_df,possible_vars)
var_map

{'arwiki': ['num_active_editors'],
 'bnwiki': ['num_active_editors'],
 'cawiki': ['num_active_editors'],
 'cswiki': ['num_active_editors'],
 'dewiki': ['num_active_editors', 'wiki_age'],
 'enwiki': ['num_active_editors', 'wiki_age'],
 'eswiki': ['num_active_editors', 'num_articles'],
 'frwiki': ['num_active_editors'],
 'hiwiki': ['num_active_editors'],
 'idwiki': ['num_active_editors'],
 'jawiki': ['num_active_editors'],
 'ptwiki': ['num_active_editors'],
 'ruwiki': ['num_active_editors'],
 'viwiki': ['num_active_editors']}

# RQ 2 (Editors -> Content) Model Results

## Model 2.1-multiwiki

Similar to our multi-wiki analysis of Models 1.1 and 1.2, results across all 14 language editions remain remarkably consistant for Model 2.1.  *ActiveEditors*, our independant variable, is positive and statistically signifigant in all 14 language editions, indicating that an increase in the number of editors associated with an increase in the amount of content added.  *WikiAge* and *TotalArticles* are only present in 3 of the 14 model specifications due to high correlation with *ActiveEditors*, but they are negative in all 3 instances.  *Month* is not statitsically signfigant in any of the 14 language editions.

In [133]:
group = editors_content_df.groupby('wiki_db')
model_coefs = pd.DataFrame()
model_pvalues = pd.DataFrame()

for wiki, x in group:
    y = x['num_bytes_added']
    X = x[var_map[wiki]+['month_2','month_3','month_4','month_5','month_6','month_7','month_8','month_9','month_10','month_11','month_12','const']]

    model = sm.OLS(y, X)
    results = model.fit()
    model_coefs = pd.concat([model_coefs,results.params.to_frame(wiki)],axis=1)
    model_pvalues = pd.concat([model_pvalues,results.pvalues.to_frame(wiki)],axis=1)

display(model_coefs.style.apply(functools.partial(highlight_coefs,p_values=model_pvalues)))

Unnamed: 0,arwiki,bnwiki,cawiki,cswiki,dewiki,enwiki,eswiki,frwiki,hiwiki,idwiki,jawiki,ptwiki,ruwiki,viwiki
num_active_editors,10582.20655,26602.540298,17895.999047,10621.061923,29606.640416,24290.564671,17321.277774,8791.868583,8102.258084,8197.827586,6147.303156,14202.577426,16914.157814,23710.394805
month_2,-5829842.017115,1650546.892453,-1242584.644846,-704837.624979,11925778.959593,14857808.117659,-4958982.427185,-13256042.440275,-1530416.928013,-3201863.248076,-4056566.056915,-1249251.153687,-4408400.00935,-3674376.366922
month_3,-5560781.127858,1842651.065418,-929702.445592,-950817.195581,26735559.639098,8406523.844107,-8875588.737515,-3174474.31319,4280495.242786,-2760409.674519,1434914.169596,408152.364591,-1230369.514781,-3433141.468716
month_4,3637728.83267,1100750.171833,-819447.245864,-3094485.832052,32250302.838161,68947099.998592,-8552387.262368,350831.954239,468956.789862,-2152221.627368,4894985.758698,-3630976.95348,-10789527.426757,1937828.714738
month_5,8285696.078847,13669.035493,-1416122.944116,-2393691.724578,24952057.158177,154286944.553276,-12425552.323486,-2639672.078182,-353744.81952,-2954547.233537,4582602.938807,-322649.661104,-8048016.731689,18037738.039712
month_6,10643353.334969,-585204.396208,-642068.787705,-3179391.098363,39917212.066087,221719824.142225,-7835822.376595,-14427948.238765,-258467.089364,-2108942.645526,-7857668.285543,-1424757.108293,-5223370.258977,3869073.361856
month_7,482583.514926,-1709100.09811,851527.827797,-2078046.408901,56311593.882954,119142005.732305,-104964.710739,-12517911.883534,-1231616.535578,-3785904.253498,-5026953.708895,-4630090.710633,-2636647.777487,4307636.13649
month_8,-5058967.439561,510653.729435,1495481.160248,-1337085.453365,14773338.999894,85171809.459695,-4514778.152604,-8570929.606521,548379.262545,-4373705.21652,-2219558.334554,2113583.687367,-1428919.727166,4745553.914789
month_9,-1405603.263546,-1168768.223903,484009.459648,-1323182.422231,23719198.232456,57013611.186913,-3613184.641697,-13989157.425017,29156.784069,-5584626.06903,130536.937779,-2812538.748285,-1551402.887981,-165713.654075
month_10,-4394860.241614,-2193171.312922,-399105.617894,-2870536.713274,33455148.832021,-3995308.542076,-16579413.42079,-1332390.550111,77110.109527,-4850740.455968,600445.32114,-5055840.971077,-8176652.842828,-1490765.81336


In [134]:
with warnings.catch_warnings(record=True):
    IV = 'num_active_editors'
    DV = 'num_bytes_added'

    group = editors_content_df.groupby('wiki_db')
    model_coefs = pd.DataFrame()
    model_pvalues = pd.DataFrame()

    for wiki, x in group:
        y = x[[DV]]
        X = x[[IV]]
        fixed_X_vars = var_map[wiki]+['month_2','month_3','month_4','month_5','month_6','month_7','month_8','month_9','month_10','month_11','month_12']
        fixed_X_vars.remove(IV)
        fixed_X = x[fixed_X_vars]
        sel_res = ardl_select_order(
            y, 12, X, 12, ic="bic", trend="c", fixed = fixed_X
        )

        results = sel_res.model.fit()
        model_coefs = pd.concat([model_coefs,results.params.to_frame(wiki)],axis=1)
        model_pvalues = pd.concat([model_pvalues,results.pvalues.to_frame(wiki)],axis=1)

    display(model_coefs.sort_index().style.apply(functools.partial(highlight_coefs,p_values=model_pvalues)))

Unnamed: 0,arwiki,bnwiki,cawiki,cswiki,dewiki,enwiki,eswiki,frwiki,hiwiki,idwiki,jawiki,ptwiki,ruwiki,viwiki
const,11387250.1168,749666.466325,2339176.521716,2146471.992547,43337038.138785,-35987371.421326,2933610.227092,14455980.299888,3115498.237824,2962789.723457,12854244.913759,6140359.926767,15190799.08169,-3600847.935332
month_10,-4187812.819134,-1181065.687163,-1395860.778998,-2730531.744591,7392244.717017,6014931.922707,-5495049.700574,991441.675857,-1018495.042354,-1514337.46015,-4197151.166087,-7889671.817479,-15470763.853645,-1100567.543734
month_11,99161.607966,1823405.225913,-1902092.767401,-1794753.334127,-24376207.877672,87354890.863358,9556026.975296,-13131015.620652,945050.617333,-1753182.269719,-7038644.393972,-1112919.060152,-21902253.363357,2010327.412379
month_12,533481.430718,596682.886396,-1687036.182595,-180407.931084,1712492.312612,113313370.354397,8788685.747257,-831030.004165,-1653780.232007,859606.966115,-2646184.787972,-1366776.683289,-8412127.513533,6844341.084863
month_2,-4703405.314201,1533692.842757,-3060173.792286,-1414767.793147,-40975202.880719,66319313.222071,-6006703.033741,-8385613.952414,-830231.642657,-4233714.278535,-10055091.130881,-2061960.78566,-9710572.662286,4184004.844871
month_3,-2329693.454567,-90712.217872,-1415485.419962,17181.100936,8912887.779924,61728407.168244,2590786.351194,-5123971.113905,3995551.711681,-1529302.243324,3113240.665746,-6948609.113684,2301750.422772,-5735840.493244
month_4,6868253.603744,-78555.718542,-1357777.159083,-2456051.997192,-30051764.132149,111839484.272863,-1113947.396954,2083112.290094,-1387664.575135,-1037928.871311,-2064979.805951,-5260687.475342,-20689754.440257,1841113.562699
month_5,5761124.147338,-421172.056626,-2419261.632532,-120134.687553,-10435237.042358,146138059.684107,1603667.131629,-7026582.82953,-2510323.01931,-2479304.290995,-894493.34811,-2235848.253863,-1149597.813305,15174970.984407
month_6,5758402.010601,-556361.927687,-1233766.772037,-1041913.317324,-8067321.678035,122287207.239183,489949.762963,-11747846.855063,-380321.153505,-905187.445199,-18747482.384394,-9230077.329352,-6430775.093208,-11531681.866775
month_7,-6567914.817946,-1318468.62303,-256066.551556,2197456.835637,11208872.543673,-29497105.189741,5235574.262449,-7819609.11916,-837115.275916,-3383457.410838,-4230777.343638,-4063215.422947,5467266.380499,6479896.088678


## Model 2.2-multiwiki

Model 2.2 again illustrates that the ARDL specification is less intuative in understanding the relationship between active editors and content added.  As with Model 2.1 we see that *ActiveEditors<sub>t</sub>* is signifigant and positive in all but the Hindi and Japanese language editions, where *ActiveEditors* is not included.  Lag lengths range from *ActiveEditors<sub>t-1</sub>* to *ActiveEditors<sub>t-6</sub>*, which are challenging to interpret, though in most cases these coefficients are negative.  These negative lagged coefficients may result in the balancing effect we discuss with respect to the English Language Edition.  

Our autogressive dependant variable is signifigant and positive at *BytesAdded<sub>t-1</sub>* for all 14 language editions, adding support for the content begets more content findings in prior work.  Again, lag lengths range from *BytesAdded<sub>t-1</sub>* to *BytesAdded<sub>t-4</sub>* for different language editions, which is challenging to interpret.  Seasonality and *TotalArticles* do not appear to relate to the amount of content added in any language edition, though *WikiAge* remains signifigant in the 2 language editions--English and German--where it is included in the model.

## RQ 2 Truncated Dataset Analysis

We conduct a simlar analysis using the truncated RQ 2 dataset, which starts in May 2015.  This dataset has identical time bounds to the RQ 1 dataset.  We use the same model specification for Model 2.1, but select a different ARDL parameterization for Model 2.2.  We refer to these models as Model 2.1-truncated and Model 2.2-truncated.

In [136]:
MIN_YEAR = pageview_accounts_df['year'].min()
MIN_MONTH = pageview_accounts_df.loc[pageview_accounts_df['year'] == MIN_YEAR]['month'].min()

print(MIN_MONTH,MIN_YEAR)

editors_content_df_2015 = editors_content_df.loc[editors_content_df['year'] >= MIN_YEAR]
editors_content_df_2015 = editors_content_df_2015.loc[~((editors_content_df_2015['year'] == MIN_YEAR) & (editors_content_df_2015['month'] < MIN_MONTH))]

5 2015


In [137]:
with warnings.catch_warnings(record=True):
    IV = 'num_active_editors'
    DV = 'num_bytes_added'

    group = editors_content_df_2015.groupby('wiki_db')
    model_coefs = pd.DataFrame()
    model_pvalues = pd.DataFrame()

    for wiki, x in group:
        y = x[[DV]]
        X = x[[IV]]
        fixed_X_vars = var_map[wiki]+['month_2','month_3','month_4','month_5','month_6','month_7','month_8','month_9','month_10','month_11','month_12']
        fixed_X_vars.remove(IV)
        fixed_X = x[fixed_X_vars]
        sel_res = ardl_select_order(
            y, 12, X, 12, ic="bic", trend="c", fixed = fixed_X
        )

        results = sel_res.model.fit()
        model_coefs = pd.concat([model_coefs,results.params.to_frame(wiki)],axis=1)
        model_pvalues = pd.concat([model_pvalues,results.pvalues.to_frame(wiki)],axis=1)

    display(model_coefs.sort_index().style.apply(functools.partial(highlight_coefs,p_values=model_pvalues)))

Unnamed: 0,arwiki,bnwiki,cawiki,cswiki,dewiki,enwiki,eswiki,frwiki,hiwiki,idwiki,jawiki,ptwiki,ruwiki,viwiki
const,14971209.326648,4087873.429855,5122124.598986,845842.893114,-138614453.478285,-127980465.034965,42690537.122629,-4062354.156596,5056129.462202,-5535045.494926,-79664735.221874,9729993.112378,-67673486.97263,-41343353.297353
month_10,-8562030.067057,-2661080.213215,214224.696034,-4527320.900893,-5730899.695401,12660537.272396,-4512413.836056,-478313.01194,1609557.326155,-3317687.406599,13933986.804018,-10595550.664372,2103063.527363,-5610729.526628
month_11,9127668.843761,3207401.983938,981052.508256,-568563.859519,-10974677.65297,45284402.91342,9015460.576137,1467267.049151,2577271.937273,-2451801.087986,30199772.562942,1877447.581674,-11817651.843178,-3292462.224361
month_12,4443872.57456,389172.870038,-781402.958733,-2066643.406581,-18118923.691401,37913911.608729,-6502700.016118,3340445.324485,843817.235257,-1840255.993402,16221525.453677,4470722.514936,-10894762.265725,28322226.272987
month_2,-3139376.814044,5070826.843013,-3429763.801928,-2827215.08952,-37987178.275973,8018754.247285,-6971482.975336,-5407437.618172,-1134453.493301,-5854060.631908,42711019.329388,3557464.196076,-24756549.845281,15680468.483
month_3,-332187.542109,-39098.98904,1139511.822163,-2791043.231554,-5942273.026306,-15699458.020168,6290943.263939,-8145127.460418,1111778.02454,-1592228.608397,-1069131.656337,-1890700.434421,-19286224.941307,-3646654.706128
month_4,12334221.914663,-210689.605239,-1260508.297096,-2298887.113912,-26291025.454032,39836198.699088,-5840410.767208,13989428.421643,55676.634406,-3052707.017751,57120612.680242,7246978.543484,-18146184.879778,6391415.360494
month_5,-4387884.011284,-2225167.743708,-1945510.256074,-3169108.382158,-20396496.23122,30907842.522377,-3515638.434563,-8063143.415807,190615.68254,-3886614.130722,18553681.929393,-4722625.947021,-15122031.915544,58519074.101387
month_6,-2000153.242707,-2054647.419494,-2983869.536891,-3150351.445934,-22183327.996848,2690277.440401,-8722194.20398,-7964054.580261,-495593.084331,968931.977648,26711555.22366,-3301278.294679,-15187133.163632,-29261182.923269
month_7,-2405895.376787,-1630754.662979,686426.922841,-3164356.891471,-15675002.267744,6349097.62134,-3363303.503975,1848869.832894,-246594.156439,-2937330.463536,16104690.696052,4190595.449582,-4603270.722175,18744561.837112
