# Goal
1. Get the experimentthings and actions related to the experiment ID
2. add activation rate variables
    1. simply, or think about batched/ cached way
3. later: compute statistical difference

In [1]:
from civilservant.models.core import ExperimentThing, ExperimentAction
from thanks.utils import _get_experiment_id
from civilservant.util import read_config_file
import os
from civilservant.db import init_session, init_engine
from sqlalchemy.dialects import mysql
import pandas as pd

In [2]:
db = init_session()
con = init_engine()

In [3]:
cnf = read_config_file(os.environ['CS_EXTRA_CONFIG_FILE'], os.path.abspath(''))

In [4]:
experiment_id = _get_experiment_id(db, cnf['name'], return_id=True)

In [5]:
experiment_id

14

In [6]:
ETs_q = db.query(ExperimentThing.id,
                 ExperimentThing.thing_id,
                 ExperimentThing.created_dt,
                 ExperimentThing.randomization_arm). \
                    filter_by(experiment_id=experiment_id)

In [7]:
ETs_sql = ETs_q.statement.compile(dialect=mysql.dialect())

In [8]:
ET_df = pd.read_sql(ETs_sql, params={"experiment_id":experiment_id}, con=con)

In [9]:
ET_df

Unnamed: 0,id,thing_id,created_dt,randomization_arm
0,user:fr:(U+2019)bb;,757,2020-01-27 19:32:48,1
1,user:fr:0bichette0,217,2020-01-27 19:31:23,1
2,user:fr:0idontknow0,1265,2020-01-27 19:34:06,0
3,user:fr:2020louise,1635,2020-01-27 19:35:05,2
4,user:fr:2609fbb,2811,2020-01-27 19:38:21,0
...,...,...,...,...
1680,user:fr:Zorelmo,444,2020-01-27 19:32:00,1
1681,user:fr:Zoucaaaa,2059,2020-01-27 19:36:15,0
1682,user:fr:ZouhZouh,311,2020-01-27 19:31:39,0
1683,user:fr:Zouquette,197,2020-01-27 19:31:20,2


In [10]:
EAs_q = db.query(ExperimentAction.action_object_id,
                 ExperimentAction.created_dt,
                 ExperimentAction.metadata_json). \
                    filter_by(experiment_id=experiment_id)

In [11]:
EAs_sql = EAs_q.statement.compile(dialect=mysql.dialect())

In [12]:
EA_df = pd.read_sql(EAs_sql, params={"experiment_id":experiment_id}, con=con)

In [13]:
EA_df

Unnamed: 0,action_object_id,created_dt,metadata_json
0,78649,2020-01-24 18:49:40,"{'lang': 'fr', 'signer': '6PO', 'user_name': '..."
1,78650,2020-01-24 18:49:40,"{'lang': 'fr', 'signer': 'Braaark', 'user_name..."
2,78651,2020-01-24 18:49:40,"{'lang': 'fr', 'signer': 'Goombiis', 'user_nam..."
3,78653,2020-01-24 18:49:41,"{'lang': 'fr', 'signer': 'Frakir', 'user_name'..."
4,78654,2020-01-24 18:49:41,"{'lang': 'fr', 'signer': 'Arthur Crbz', 'user_..."
...,...,...,...
1686,2985,2020-01-27 19:38:54,"{'lang': 'fr', 'errors': [], 'signer': 'Apipo1..."
1687,2988,2020-01-27 19:38:54,"{'lang': 'fr', 'errors': [], 'signer': 'VinceR..."
1688,2989,2020-01-27 19:38:54,"{'lang': 'fr', 'errors': [], 'signer': 'Naivuo..."
1689,2990,2020-01-27 19:38:55,"{'lang': 'fr', 'errors': [], 'signer': 'Goombi..."


In [14]:
EA_df.dtypes

action_object_id            object
created_dt          datetime64[ns]
metadata_json               object
dtype: object

In [15]:
def key_or_nan(d, key):
    try:
        return d[key]
    except KeyError:
        return float('nan')

In [16]:
EA_df['action_complete'] = EA_df['metadata_json'].apply(lambda d: key_or_nan(d, 'action_complete'))
EA_df['action_response'] = EA_df['metadata_json'].apply(lambda d: key_or_nan(d, 'action_response'))

In [17]:
EA_df['user_name'] = EA_df['metadata_json'].apply(lambda d: key_or_nan(d, 'user_name'))

In [18]:
# EA_df[pd.notnull(EA_df['action_complete'])]

In [19]:
df = ET_df.merge(how='left', right=EA_df, left_on='thing_id', right_on='action_object_id', suffixes=('_et','_ea') )

In [20]:
df.dtypes

id                           object
thing_id                     object
created_dt_et        datetime64[ns]
randomization_arm             int64
action_object_id             object
created_dt_ea        datetime64[ns]
metadata_json                object
action_complete              object
action_response              object
user_name                    object
dtype: object

In [21]:
df.head(2)

Unnamed: 0,id,thing_id,created_dt_et,randomization_arm,action_object_id,created_dt_ea,metadata_json,action_complete,action_response,user_name
0,user:fr:(U+2019)bb;,757,2020-01-27 19:32:48,1,757,2020-01-27 19:32:48,"{'lang': 'fr', 'signer': 'VinceReddington', 'u...",,,(U+2019)bb;
1,user:fr:0bichette0,217,2020-01-27 19:31:23,1,217,2020-01-27 19:31:23,"{'lang': 'fr', 'signer': 'Apipo1907', 'user_na...",,,0bichette0


## Getting external data

In [22]:
from civilservant.wikipedia.queries.revisions import get_timestamps_within_range

In [23]:
from civilservant.wikipedia.utils import make_cached_df

In [24]:
from civilservant.wikipedia.connections.database import make_wmf_con
import datetime

In [25]:
wmf_con = make_wmf_con()

In [26]:
@make_cached_df('fr-user-edits-stop-rule')
def fr_user_edits(user_name, start_date):
    end_date = start_date + datetime.timedelta(days=7)
    return get_timestamps_within_range(lang='fr', start_date=start_date, end_date=end_date, user_name=user_name,
                         con=wmf_con)

In [None]:
df['timestamps']=df.apply(lambda row: fr_user_edits(row['user_name'], row['created_dt_ea']), axis=1)