In [1]:
import pandas as pd
import os
pd.set_option('max_colwidth', 100)
import datetime
import mwapi

In [154]:
wmfdate_parser = lambda s: datetime.datetime.strptime(s, '%Y%m%d%H%M%S')
mw = mwapi.Session('https://fr.wikipedia.org')

Sending requests with default User-Agent.  Set 'user_agent' on mwapi.Session to quiet this message.


In [155]:
def get_bot_contribs(botname, start, end):
    ret_contribs = []
    continuation = 'start'
    while continuation is not None:
        q_params = dict(action='query', list='usercontribs', 
                        ucstart=start, ucend=end, ucuser=botname)
        if continuation != 'start':
            q_params['uccontinue'] = continuation['uccontinue']
        q_res = mw.get(**q_params)
        continuation=q_res['continue'] if 'continue' in q_res else None
        contribs = q_res['query']['usercontribs']
        ret_contribs.extend(contribs)
    return ret_contribs    

In [159]:
start = 20191028000000
end = 20191027000000
contribs_l = get_bot_contribs('Loveless', start, end)
contribs_b = get_bot_contribs('Loveless bienvenue', start, end)

In [160]:
len(contribs_l), len(contribs_b)

(455, 874)

In [162]:
dfl = pd.DataFrame.from_records(contribs_l)
dfb = pd.DataFrame.from_records(contribs_b)

# Differences.
## Who did Loveless invite but not Loveless bienvenue?
## Who did Loveless bienvenue invite but not loveless?

In [163]:
def compare_title(t):
    ns_prefixes = ['Discussion utilisatrice:', 'Discussion utilisateur:']
    for prefix in ns_prefixes:
        if t.startswith(prefix):
            page_name = t.split(prefix)[1]
    if page_name.startswith('Loveless bienvenue/draft/'):
        page_name = page_name.split('Loveless bienvenue/draft/')[1]
    return page_name

In [164]:
dfl['compare_title'] = dfl['title'].apply(compare_title)
dfb['compare_title'] = dfb['title'].apply(compare_title)

In [165]:
dfl=dfl[['user','compare_title','timestamp']]
dfb=dfb[['user','compare_title','timestamp']]

In [166]:
dfl.groupby('compare_title').size().max()

1

In [167]:
dfb.groupby('compare_title').size().max()

1

In [168]:
l = dfl.set_index('compare_title')

In [169]:
b = dfb.set_index('compare_title')

In [170]:
j = l.join(b, how='outer', lsuffix='_l', rsuffix='_b')

In [171]:
overlap = j[(pd.notnull(j['user_l']))&(pd.notnull(j['user_b']))]
underlap_l_missing = j[(pd.isnull(j['user_l']))&(pd.notnull(j['user_b']))]
underlap_b_missing = j[(pd.notnull(j['user_l']))&(pd.isnull(j['user_b']))]

In [172]:
overlap

Unnamed: 0_level_0,user_l,timestamp_l,user_b,timestamp_b
compare_title,Unnamed: 1_level_1,Unnamed: 2_level_1,Unnamed: 3_level_1,Unnamed: 4_level_1
ANKA59,Loveless,2019-10-27T12:06:07Z,Loveless bienvenue,2019-10-27T12:07:06Z
Abbonnezvousamachainec'estCRYO'ZENITH,Loveless,2019-10-27T15:36:08Z,Loveless bienvenue,2019-10-27T15:37:05Z
Abdou shn,Loveless,2019-10-27T19:15:07Z,Loveless bienvenue,2019-10-27T19:15:08Z
Acere0011,Loveless,2019-10-27T23:15:06Z,Loveless bienvenue,2019-10-27T23:17:06Z
Adamakama66,Loveless,2019-10-27T01:39:06Z,Loveless bienvenue,2019-10-27T01:39:06Z
...,...,...,...,...
Yveskappooo,Loveless,2019-10-27T20:09:09Z,Loveless bienvenue,2019-10-27T20:09:06Z
Zazali2019,Loveless,2019-10-27T19:57:09Z,Loveless bienvenue,2019-10-27T19:57:05Z
Zouhair 01,Loveless,2019-10-27T12:45:08Z,Loveless bienvenue,2019-10-27T12:45:05Z
انس كرم,Loveless,2019-10-27T15:12:06Z,Loveless bienvenue,2019-10-27T15:13:06Z


In [176]:
underlap_l_missing

Unnamed: 0_level_0,user_l,timestamp_l,user_b,timestamp_b
compare_title,Unnamed: 1_level_1,Unnamed: 2_level_1,Unnamed: 3_level_1,Unnamed: 4_level_1
$addl3backtamar1n134,,,Loveless bienvenue,2019-10-27T04:39:05Z
.vxctoria,,,Loveless bienvenue,2019-10-27T17:43:05Z
0x0n,,,Loveless bienvenue,2019-10-27T23:41:05Z
0zwood,,,Loveless bienvenue,2019-10-27T09:37:06Z
1999blacklist666,,,Loveless bienvenue,2019-10-27T18:55:05Z
...,...,...,...,...
ԳրիգորյանԷդգար,,,Loveless bienvenue,2019-10-27T07:59:06Z
Դավիթ Ալիխանյան,,,Loveless bienvenue,2019-10-27T08:03:07Z
حب خادع,,,Loveless bienvenue,2019-10-27T23:33:05Z
ديارسوبا سليمان,,,Loveless bienvenue,2019-10-27T20:27:06Z


In [177]:
underlap_b_missing

Unnamed: 0_level_0,user_l,timestamp_l,user_b,timestamp_b
compare_title,Unnamed: 1_level_1,Unnamed: 2_level_1,Unnamed: 3_level_1,Unnamed: 4_level_1
Alexandrelau7,Loveless,2019-10-27T22:51:06Z,,
Francxshr/Archive 1,Loveless,2019-10-27T20:21:06Z,,
Manu911me,Loveless,2019-10-27T09:27:06Z,,
Maximet26,Loveless,2019-10-27T17:54:08Z,,
Salixe,Loveless,2019-10-27T15:57:07Z,,


In [158]:
# status
Alexandrelau7 -- never existed in our database, have to look more into logs
Francxshr/Archive 1 -- this is not a new user but template usage
Manu911me -- never existed in our database, have to look more into logs
Maximet26 -- userblocked [we wont invite a bloced user]
Salixe  -- name change [understandable]
