In [None]:
%load_ext autoreload
%autoreload 2
%store -r the_page

In [None]:
page_name = the_page.page_id #'Chicago'
from external.wikipedia import WikipediaDV, WikipediaAPI
wikipedia_dv = WikipediaDV(WikipediaAPI(domain='en.wikipedia.org'))
page = wikipedia_dv.get_page(page_name)

# Editions per page with Wikiwho

In [None]:
from wikiwho_wrapper import WikiWho
import pandas as pd
wikiwho = WikiWho(lng='en')
editions = wikiwho.dv.editions(the_page.page_id)
editions.head()

In [None]:
df = editions
df['year_month'] = pd.to_datetime(df['year_month'])

# Group the data by year month and page (drop the editor information)
df.drop('editor_id', axis=1).groupby(['year_month','page_id']).sum()

# add columns with the total actions
df = df.join(pd.DataFrame(
    df.loc[:,'adds':'adds_stopword_count'].values +\
    df.loc[:,'dels':'dels_stopword_count'].values +\
    df.loc[:,'reins':'reins_stopword_count'].values, 
    index=df.index, 
    columns=['actions', 
             'actions_surv_48h', 
             'actions_persistent', 
             'actions_stopword_count']
))

# Visualization
from visualization.editions_listener import DFListener
from ipywidgets import interact
listener = DFListener(df)
# interact(listener.editions_per_month, 
#          begin=df.year_month,
#          end=df.year_month.sort_values(ascending=False),
#          actions=['All Actions', 'Additions', 'Reinsertions', 'Deletions'])
actions = (df.loc[:,'actions':'actions_stopword_count'].columns.append(
    df.loc[:,'adds':'reins_stopword_count'].columns)).values.tolist()


interact(listener.editions_per_month, 
         begin=df.year_month,
         end=df.year_month.sort_values(ascending=False),
         granularity=['Yearly', 'Monthly'],
         black=actions, red=['actions_surv_48h'] + actions + ['None'],
         green=['None'] + actions, blue=['None'] + actions)


# Conflict Score of Editors of a Page

In [None]:
from metrics.conflict import ConflictCalculator
from wikiwho_wrapper import WikiWho

wikiwho = WikiWho(lng='en')
calculator = ConflictCalculator(the_page.page_id, wikiwho)
df = calculator.calculate()

print(f'Total article conflict score: {calculator.get_page_conflict_score()}')
editor_conflicts = calculator.get_conflict_score_per_editor()
editor_conflicts

# Grouping the conflict insertions and deletions per author

In [None]:
# to visualize the entire tokens
pd.set_option('display.max_colwidth', -1)

# get the words that each editor introduce and delete
conflicts = calculator.get_all_conflicts()
conflict_tokens = conflicts[['token', 'editor', 'action']].groupby(
    ['editor', 'action']).aggregate(lambda x: set(x))

# merge back with the previous table so one can see the conflict score
conflict_tokens.reset_index(1).join(
    editor_conflicts['conflict_score']).set_index('action', append=True).sort_values(
    'conflict_score', ascending=False)


# Selecting the most controversial author 

In [None]:
def editor_fn(editor):
    global the_editor
    wikipedia_dv = WikipediaDV(WikipediaAPI(domain='en.wikipedia.org'))
    try:
        the_editor = wikipedia_dv.get_editor(int(editor))
    except:
        the_editor = wikipedia_dv.get_editor(editor[2:])
    return the_editor.to_frame('values')

interact(editor_fn, editor=editor_conflicts.index);


In [None]:
%store the_editor

from IPython.display import HTML
HTML('<a href="3. General metadata of the editor.ipynb" target="_blank">Go to next workbook</a>')