In [None]:
%load_ext autoreload
%autoreload 2
%store -r the_page

In [None]:
page_name = the_page.page_id #'Chicago'
from external.wikipedia import WikipediaDV, WikipediaAPI
wikipedia_dv = WikipediaDV(WikipediaAPI(domain='en.wikipedia.org'))
page = wikipedia_dv.get_page(page_name)

In [None]:
from IPython.display import display, Markdown as md
md(f'# A. Actions for the page "*{the_page["title"]}*"')

In [None]:
from wikiwho_wrapper import WikiWho
import pandas as pd
import qgrid
# set the default max number of rows to 10 so the larger DataFrame we render don't take up to much space 
qgrid.set_grid_option('maxVisibleRows', 5)

wikiwho = WikiWho(lng='en')
actions = wikiwho.dv.editions(the_page.page_id)

# define total columns
total_columns = ['total', 'total_surv_48h', 'total_persistent', 'total_stopword_count']

# add columns with the total actions
actions = actions.join(pd.DataFrame(
    actions.loc[:,'adds':'adds_stopword_count'].values +\
    actions.loc[:,'dels':'dels_stopword_count'].values +\
    actions.loc[:,'reins':'reins_stopword_count'].values, 
    index=actions.index, 
    columns=total_columns
))

qgrid.show_grid(actions[['year_month', 'editor_id'] + total_columns])

In [None]:
# Convert to datetime
actions['year_month'] = pd.to_datetime(actions['year_month'])

# Group the data by year month and page (drop the editor information)
actions.drop('editor_id', axis=1).groupby(['year_month','page_id']).sum()

# Visualization
from visualization.actions_listener import ActionsListener
from ipywidgets import interact
listener = ActionsListener(actions)
action_types = (actions.loc[:,'total':'total_stopword_count'].columns.append(
    actions.loc[:,'adds':'reins_stopword_count'].columns)).values.tolist()

interact(listener.listen, 
         begin=actions.year_month,
         end=actions.year_month.sort_values(ascending=False),
         granularity=['Yearly', 'Monthly'],
         black=action_types, 
         red=['total_surv_48h'] + action_types + ['None'],
         green=['None'] + action_types, 
         blue=['None'] + action_types)

In [None]:
from IPython.display import display, Markdown as md
display(md(f'# B. Conflict Score for the page "*{the_page["title"]}*"'))
display(md(f'## B.1 Tokens Conflict Score'))

In [None]:
from metrics.conflict import ConflictCalculator
from wikiwho_wrapper import WikiWho
from IPython.display import clear_output

# create the api
wikiwho = WikiWho(lng='en')

# call the calculator
calculator = ConflictCalculator(the_page.page_id, wikiwho)
calculator.calculate()
clear_output()

# display the tokens, the difference in seconds and its corresponding conflict score
actions = calculator.actions
actions['time_diff_secs'] = actions['time_diff'].dt.total_seconds()
qgrid.show_grid(actions[[
    'action', 'token', 'token_id', 'rev_id', 
    'editor', 'time_diff_secs', 'conflict']].sort_values('conflict', ascending=False))

actions


## B.2 Conflict Score and related metrics by time

In [None]:
# Visualization
from visualization.conflicts_listener import ConflictsListener
listener = ConflictsListener(actions)

month_year = pd.Series(actions['rev_time'].dt.to_period('D').unique()).dt.to_timestamp()

metrics = ['Conflict Score', 'Conflict Ratio', 'Total Conflicts', 
           'Total Elegible Actions', 'Total Actions', 'Total Time',
          'Time per Elegible Action']
from ipywidgets import interact
interact(listener.listen, 
         begin=month_year,
         end=month_year.sort_values(ascending=False),
         granularity=['Yearly', 'Monthly', 'Daily'],
         black=metrics,
         red=['None'] + metrics)

In [None]:
md(f'## B.3 Conflict Score per Editor for *{page["title"]}*')

In [None]:
print(f'Total article conflict score: {calculator.get_page_conflict_score()}')
editors_conflicts = calculator.get_conflict_score_per_editor()
editors_conflicts

## B.4 Grouping the conflict insertions and deletions per editor

In [None]:
# to visualize the entire tokens
pd.set_option('display.max_colwidth', -1)

# get the words that each editor introduce and delete
conflicts = calculator.get_all_conflicts()
if len(conflicts) == 0:
    print("There are no conflicts in this article")
else:
    conflict_tokens = conflicts[['token', 'editor', 'action']].groupby(
        ['editor', 'action']).aggregate(lambda x: set(x))

    # merge back with the previous table so one can see the conflict score
    display(conflict_tokens.reset_index(1).join(
        editors_conflicts['conflict_score']).set_index('action', append=True).sort_values(
        'conflict_score', ascending=False))
    

In [None]:
md(f'# C. Selecting a controversial editor for *{page["title"]}*')

In [None]:
def editor_fn(editor):
    global the_editor
    global editor_inputname
    editor_inputname = editor
    
    
    wikipedia_dv = WikipediaDV(WikipediaAPI(domain='en.wikipedia.org'))
    try:
        the_editor = wikipedia_dv.get_editor(int(editor))
    except:
        the_editor = wikipedia_dv.get_editor(editor[2:])

    # display the data that will be passed to the next notebook
    display(the_editor.to_frame('values'))
    
    %store the_editor
    %store editions
    %store editor_inputname
    %store calculator
       
if len(editors_conflicts) > 0:
    display(md('**Select an editor based on the previous information**'))
    display(md('*Recomendation: editors with many conflicts and moderate conflict score*'))
    interact(editor_fn, editor=editors_conflicts.index)
else:
    print("There are no conflicts")

In [None]:
if len(editors_conflicts) > 0:
    from IPython.display import HTML
    display(HTML('<a href="3. Using WikiWho to analyze an editor in the context of a page.ipynb" target="_blank">Go to next workbook</a>'))
else:
    from IPython.display import HTML
    display(HTML('<a href="1. General Metadata of a Wikipedia Article.ipynb" target="_blank">Go back to the previous workbook</a>'))