In [1]:
!pip install mwapi pandas seaborn 
import pandas as pd
import seaborn as sns
import matplotlib.pyplot as plt
import numpy as np
import mwapi
%matplotlib inline



In [2]:
en_df = pd.read_csv("../../datasets/enwiki_20161201_reverted_bot2bot.tsv", sep="\t")

In [3]:
len(en_df)

1001093

In [4]:
en_df[0:2].transpose()

Unnamed: 0,0,1
rev_id,273691771,136526894
rev_timestamp,20090227173507,20070607044209
rev_user,6505923,4534303
rev_user_text,Kbdankbot,PbBot
rev_page,5040439,3046554
rev_sha1,qj45ne2z4yfexmpaz5wfnbm2yrmqt4j,3xtnw7u4w9h6cg1smw97mqnr1en6a55
rev_minor_edit,False,False
rev_deleted,False,False
rev_parent_id,2.59117e+08,1.20932e+08
archived,False,False


### Convert timestamps

In [5]:
en_df['reverting_timestamp_dt'] = pd.to_datetime(en_df['reverting_timestamp'], format="%Y%m%d%H%M%S")
en_df['reverted_timestamp_dt'] = pd.to_datetime(en_df['rev_timestamp'], format="%Y%m%d%H%M%S")

en_df = en_df.set_index('reverting_timestamp_dt')

en_df['reverting_timestamp_dt'] = pd.to_datetime(en_df['reverting_timestamp'], format="%Y%m%d%H%M%S")


### Top reverting bots, ns0

In [6]:
en_df[en_df['page_namespace']==0]['reverting_user_text'].value_counts()[0:50]

Addbot            152226
Xqbot              80192
AvicBot            50438
EmausBot           46648
RussBot            16138
Invadibot          11888
タチコマ robot          9946
AvocatoBot          9394
DarknessBot         8004
VolkovBot           5920
DumbBOT             5674
Cydebot             4150
Redirect fixer      4086
KLBot2              4032
MystBot             3746
Escarbot            3584
ChuispastonBot      3310
Scepbot             3216
WikitanvirBot       2928
MastiBot            2502
SassoBot            2396
KasparBot           2134
PixelBot            2096
Muro Bot            1956
SieBot              1818
RedBot              1806
JAnDbot             1792
ZéroBot             1736
JL-Bot              1626
EdoBot              1312
JYBot               1204
Fluxbot             1178
Mathbot             1062
Luckas-bot          1002
Ripchip Bot          934
ArthurBot            916
SoxBot               884
Dexbot               872
TXiKiBoT             860
Thijs!bot            858


### Top reverted bots

In [7]:
en_df[en_df['page_namespace']==0]['rev_user_text'].value_counts()[0:50]

EmausBot              67544
Xqbot                 62314
AvicBot               39626
RussBot               34948
ZéroBot               33798
Luckas-bot            13946
DarknessBot           11952
TuHan-Bot             10710
タチコマ robot            10502
Invadibot              9792
VolkovBot              8654
Makecat-bot            8621
AvocatoBot             8342
Redirect fixer         7814
Thijs!bot              7696
Zorrobot               7516
SieBot                 6014
FoxBot                 5886
Lowercase sigmabot     5592
MystBot                5398
Scepbot                4690
WikitanvirBot          4674
YFdyh-bot              4481
Addbot                 4330
HRoestBot              3774
JackieBot              3526
Muro Bot               3346
ChuispastonBot         3160
LaaknorBot             3062
RedBot                 2928
JYBot                  2906
Cydebot                2846
GrouchoBot             2734
TXiKiBoT               2690
SteenthIWbot           2392
DSisyphBot          

### Reverting/reverted bot pairs

In [8]:
en_df['bot_pair'] = en_df['reverting_user_text'] + " reverting " + en_df['rev_user_text']

In [9]:
en_df[en_df['page_namespace']==0]['bot_pair'].value_counts()[0:50]

Addbot reverting EmausBot               31002
Addbot reverting ZéroBot                26220
AvicBot reverting Xqbot                 22688
Xqbot reverting AvicBot                 21978
EmausBot reverting Xqbot                14006
AvicBot reverting EmausBot              12282
Xqbot reverting RussBot                 11536
Xqbot reverting EmausBot                10814
Addbot reverting TuHan-Bot               9012
Xqbot reverting DarknessBot              8080
Addbot reverting Luckas-bot              7916
EmausBot reverting AvicBot               7502
Xqbot reverting Invadibot                7342
Addbot reverting Makecat-bot             6833
Addbot reverting Zorrobot                6542
Addbot reverting Thijs!bot               6090
Invadibot reverting Xqbot                6076
DumbBOT reverting Lowercase sigmabot     5578
Addbot reverting SieBot                  4130
タチコマ robot reverting RussBot             4002
AvocatoBot reverting AvicBot             3836
AvicBot reverting AvocatoBot      

## Proportion of total edits that were bot-bot reverts/reverted

### Get list of all bots

In [10]:
all_reverted_bots = en_df['rev_user_text'].unique()
len(all_reverted_bots)

671

In [11]:
all_reverting_bots = en_df['reverting_user_text'].unique()
len(all_reverting_bots)

527

In [12]:
all_bots = list(set(all_reverting_bots).union(set(all_reverted_bots)))
len(all_bots)

750

### Get total edit count for each bot

In [13]:
session = mwapi.Session('https://en.wikipedia.org', user_agent="Edit count script by User:Staeiou")

result = []
for count in range(0,round(len(all_bots)/50)+1):
    start = count * 50
    end = start + 50
    result.append(session.get(action='query', list='users', ususers=all_bots[start:end], usprop='editcount'))
    

In [14]:
bot_editcount = {}
for x1 in result:
    for x2 in x1['query']['users']:
        bot_editcount[x2['name']] = x2['editcount']
        #print(x2['name'], x2['editcount'])

In [15]:
bot_editcount_df = pd.DataFrame.from_dict(bot_editcount, orient='index')
bot_editcount_df.sort_values(by=0, ascending=False)[0:25]

Unnamed: 0,0
Cydebot,5709990
WP 1.0 bot,5494319
Yobot,4651415
ClueBot NG,4408957
SmackBot,3734324
Addbot,2838809
AnomieBOT,2836850
SineBot,2099708
EmausBot,1671243
MediaWiki message delivery,1629233


In [16]:
bot_editcount_df.ix['Cydebot'][0]

5709990

In [17]:
reverting_bot_counts = en_df['reverting_user_text'].value_counts()
reverted_bot_counts = en_df['rev_user_text'].value_counts()

### Percent of edits each bot made that were reverting another bot

In [48]:
reverting_df = pd.DataFrame(columns=['user', 'total_edits', 'reverting_bot_edits', 'prop_bot_revert'])
for bot_user in reverting_bot_counts.index:
    #print(bot_user, round(reverting_bot_counts[bot_user]/bot_editcount_df.ix[bot_user][0],3)*100, "%")
    row_d = {'user' : bot_user,
             'total_edits' : bot_editcount_df.ix[bot_user][0],
             'reverting_bot_edits' : reverting_bot_counts[bot_user],
             'prop_bot_revert' : reverting_bot_counts[bot_user]/bot_editcount_df.ix[bot_user][0]}
    reverting_df = reverting_df.append(row_d, ignore_index=True)

reverting_df = reverting_df.set_index('user')

In [49]:
reverting_df[0:5]

Unnamed: 0_level_0,total_edits,reverting_bot_edits,prop_bot_revert
user,Unnamed: 1_level_1,Unnamed: 2_level_1,Unnamed: 3_level_1
Addbot,2838809.0,291629.0,0.102729
Xqbot,1526766.0,90220.0,0.059092
EmausBot,1671243.0,74570.0,0.044619
AvicBot,817979.0,57844.0,0.070716
HBC AIV helperbot7,253005.0,56408.0,0.222952


### Percent of edits each bot made that were reverted by another bot

In [50]:
reverted_df = pd.DataFrame(columns=['user', 'total_edits', 'reverted_bot_edits', 'prop_bot_reverted'])

for bot_user in reverted_bot_counts.index:
    #print(bot_user, round(reverted_bot_counts[bot_user]/bot_editcount_df.ix[bot_user][0],3)*100, "%")
    row_d = {'user' : bot_user,
             'total_edits' : bot_editcount_df.ix[bot_user][0],
             'reverted_bot_edits' : reverted_bot_counts[bot_user],
             'prop_bot_reverted' : reverted_bot_counts[bot_user]/bot_editcount_df.ix[bot_user][0]}
    reverted_df = reverted_df.append(row_d, ignore_index=True)
    
reverted_df = reverted_df.set_index('user')

In [51]:
reverted_df[0:5]

Unnamed: 0_level_0,total_edits,reverted_bot_edits,prop_bot_reverted
user,Unnamed: 1_level_1,Unnamed: 2_level_1,Unnamed: 3_level_1
EmausBot,1671243.0,108569.0,0.064963
Xqbot,1526766.0,72936.0,0.047772
ZéroBot,704777.0,61450.0,0.087191
ClueBot NG,4408957.0,47680.0,0.010814
AvicBot,817979.0,46484.0,0.056828


#### Join dataframes

TODO: fix

In [57]:
pd.concat([reverting_df, reverted_df], axis=1)
#reverting_df.join(reverted_df, how='outer', on='index')

Unnamed: 0,total_edits,reverting_bot_edits,prop_bot_revert,total_edits.1,reverted_bot_edits,prop_bot_reverted
.anacondabot,25534.0,28.0,0.001097,25534.0,52.0,0.002037
28bot,93387.0,20.0,0.000214,93387.0,14.0,0.000150
3RRBot,3841.0,2.0,0.000521,,,
718 Bot,99574.0,724.0,0.007271,99574.0,10.0,0.000100
A4bot,25852.0,8.0,0.000309,25852.0,152.0,0.005880
ABot,,,,1372.0,4.0,0.002915
AEBot,10.0,2.0,0.200000,,,
AHbot,,,,425.0,36.0,0.084706
AIDbot,507.0,14.0,0.027613,507.0,24.0,0.047337
AMbot,,,,30053.0,2.0,0.000067
