# Goal
+ estimate how many absolute newcomers there are (per 90 days) that make 4 quality edits
+ use the full sample size
+ use percentage that make 4 edits
+ user percentage of above that make 4 quality
  + in any namespace
  + in nontalk
  + in main-only
+ by lang

## populations active in pre-treatment
```
lang  experience_level_pre_treatment
ar    bin_0                               8547
de    bin_0                              14083
fa    bin_0                               6265
pl    bin_0                               3287
```
## estimates of  having 4 edits in that span
```
ar    bin_0                             1752/8547
de    bin_0                             2216/10000
fa    bin_0                             1634/6265
pl    bin_0                              893/3287

--->
ar 0.2049
de 0.2216
fa 0.2608
pl 0.2717
```

In [14]:
import pandas as pd
import numpy as np

In [16]:
df = pd.read_csv('../outputs/thankee_power_analysis_data_for_sim_treatment_20180601_200_subsamples.csv')
newcomers = df[df['experience_level_pre_treatment']=='bin_0']

In [8]:
df.groupby(['lang','experience_level_pre_treatment']).size()

lang  experience_level_pre_treatment
ar    bin_0                             400
      bin_1460                          200
      bin_180                           200
      bin_2920                          200
      bin_365                           200
      bin_730                           200
      bin_90                            200
de    bin_0                             400
      bin_1460                          200
      bin_180                           200
      bin_2920                          200
      bin_365                           200
      bin_730                           200
      bin_90                            200
fa    bin_0                             400
      bin_1460                          200
      bin_180                           200
      bin_2920                          200
      bin_365                           200
      bin_730                           200
      bin_90                            200
pl    bin_0                            

In [20]:
df.groupby(['lang','experience_level_pre_treatment'])['num_quality_pre_treatment',
       'num_quality_pre_treatment_non_talk',
       'num_quality_pre_treatment_main_only'].apply(lambda c: c>=4).mean()

num_quality_pre_treatment              0.699063
num_quality_pre_treatment_non_talk     0.628125
num_quality_pre_treatment_main_only    0.460625
dtype: float64

In [44]:
newcomer_qual = newcomers.groupby('lang')['num_quality_pre_treatment',
       'num_quality_pre_treatment_non_talk',
       'num_quality_pre_treatment_main_only'].apply(lambda c: np.mean(c>=4)).rename({'num_quality_pre_treatment':'prop_qual_any',
       'num_quality_pre_treatment_non_talk':'prop_qual_non_talk',
       'num_quality_pre_treatment_main_only':'prop_qual_main_only'}, axis=1)

In [45]:
newcomer_abs = pd.DataFrame({'lang':['ar','de','fa','pl'],
              'abs_populations_90_pre_treatment':[8547, 14083, 6265, 3287],
             'prop_having_4_edits_pre_treatment':[0.2049, 0.2216, 0.2608, 0.2717]})

In [52]:
newcomer_stats = pd.merge(newcomer_abs, newcomer_qual, on='lang').set_index('lang')

In [53]:
newcomer_stats['abs_users_4_edits'] = newcomer_stats['abs_populations_90_pre_treatment'] * newcomer_stats['prop_having_4_edits_pre_treatment']
newcomer_stats['abs_users_4_edits_qual_any'] = newcomer_stats['abs_users_4_edits'] * newcomer_stats['prop_qual_any']
newcomer_stats['abs_users_4_edits_qual_non_talk'] = newcomer_stats['abs_users_4_edits'] * newcomer_stats['prop_qual_non_talk']
newcomer_stats['abs_users_4_edits_qual_main_only'] = newcomer_stats['abs_users_4_edits'] * newcomer_stats['prop_qual_main_only']


In [55]:
pd.options.display.float_format = '{:,.2f}'.format

In [56]:
newcomer_stats.T

lang,ar,de,fa,pl
abs_populations_90_pre_treatment,8547.0,14083.0,6265.0,3287.0
prop_having_4_edits_pre_treatment,0.2,0.22,0.26,0.27
prop_qual_any,0.88,0.62,0.14,0.72
prop_qual_non_talk,0.78,0.54,0.1,0.69
prop_qual_main_only,0.53,0.39,0.06,0.44
abs_users_4_edits,1751.28,3120.79,1633.91,893.08
abs_users_4_edits_qual_any,1536.75,1927.09,236.92,643.02
abs_users_4_edits_qual_non_talk,1357.24,1685.23,163.39,611.76
abs_users_4_edits_qual_main_only,919.42,1224.91,102.12,392.95
