In [None]:
%load_ext autoreload
%autoreload 2
%store -r the_page

if 'the_page' not in locals():
    import pickle
    print("Loading default data...")
    the_page = pickle.load(open("data/the_page.p",'rb'))

from IPython.display import display, Markdown as md
display(md("---"))
display(md(f"# A. Insertions, Deletions, Reinsertions (Actions)"))
display(md(f" Provided by the [WikiWho API](https://www.wikiwho.net/en/api/v1.0.0-beta/)"))
display(md(f"***Page: {the_page['title']}***"))
display(md(f"---\n***IMPORTANT:*** For articles with long revisions history, the process can take a long time. "
           "Please give the some time to load (see cog wheel symbol right of 'edit app') "
           "before interacting with the controls too often!"))

In [None]:
from wikiwho_wrapper import WikiWho
import pandas as pd
import qgrid
# set the default max number of rows to 10 so the larger DataFrame we render don't take up to much space 
qgrid.set_grid_option('maxVisibleRows', 5)

wikiwho = WikiWho(lng='en')
agg_actions = wikiwho.dv.edit_persistence(the_page.page_id)

# define total columns
total_columns = ['total', 'total_surv_48h', 'total_persistent', 'total_stopword_count']

# add columns with the total actions
agg_actions = agg_actions.join(pd.DataFrame(
    agg_actions.loc[:,'adds':'adds_stopword_count'].values +\
    agg_actions.loc[:,'dels':'dels_stopword_count'].values +\
    agg_actions.loc[:,'reins':'reins_stopword_count'].values, 
    index=agg_actions.index, 
    columns=total_columns
))

# Grab user names from wikipedia and merge them to the editors_conflict dataframe
from external.wikipedia import WikipediaDV, WikipediaAPI
wikipedia_dv = WikipediaDV(WikipediaAPI(domain='en.wikipedia.org'))
editors = wikipedia_dv.get_editors(agg_actions['editor_id'].unique()).rename(columns = {
    'userid': 'editor_id'})

# Merge the namesof the editors to the aggregate actions dataframe
agg_actions = agg_actions.merge(editors[['editor_id', 'name']], on='editor_id')
agg_actions.insert(3, 'editor', agg_actions['name'])
agg_actions = agg_actions.drop(columns=['name'])
agg_actions['editor'] = agg_actions['editor'].fillna("Unregistered")

qgrid.show_grid(agg_actions[['year_month', 'editor_id', 'editor'] + total_columns])

In [None]:
# Convert to datetime
agg_actions['year_month'] = pd.to_datetime(agg_actions['year_month'])

# Group the data by year month and page (drop the editor information)
agg_actions.drop('editor_id', axis=1).groupby(['year_month','page_id']).sum()

# Listener
from visualization.actions_listener import ActionsListener
listener = ActionsListener(agg_actions)
action_types = (agg_actions.loc[:,'total':'total_stopword_count'].columns.append(
    agg_actions.loc[:,'adds':'reins_stopword_count'].columns)).values.tolist()

# Visualization
from utils.notebooks import get_date_slider_from_datetime
from ipywidgets import interact, fixed
from ipywidgets.widgets import Dropdown

interact(listener.listen,
         _range = get_date_slider_from_datetime(agg_actions['year_month']),
         editor=fixed('All'),
         granularity=Dropdown(options=['Yearly', 'Monthly'], value='Yearly'),
         black=Dropdown(options=action_types, value='total'), 
         red=Dropdown(options= ['None'] + action_types, value='total_surv_48h'),
         green=Dropdown(options= ['None'] + action_types, value='None'), 
         blue=Dropdown(options= ['None'] + action_types, value='None'))

In [None]:
from IPython.display import display, Markdown as md
display(md("---"))
display(md(f"# B. Meassuring conflict"))
display(md(f'## B.1 Tokens Conflict Score'))
display(md(f"***Page: {the_page['title']}***"))

In [None]:
# create the api
from wikiwho_wrapper import WikiWho
wikiwho = WikiWho(lng='en')

from IPython.display import display, Markdown as md
# Get the content and revisions from the wikiwho api
display(md("Downloading all_content from the WikiWhoApi..."))
all_content = wikiwho.dv.all_content(the_page['page_id'])

display(md("Downloading revisions from the WikiWhoApi..."))
revisions = wikiwho.dv.rev_ids_of_article(the_page['page_id'])

from IPython.display import clear_output
clear_output()

In [None]:

from metrics.conflict import ConflictManager
from wikiwho_wrapper import WikiWho
from IPython.display import clear_output
from IPython.display import HTML
from utils.notebooks import get_next_notebook, get_previous_notebook

column_descriptions = """**Columns description**
- `token`: the string of the token that is being tracked
- `token_id`: the id of the token that is being tracked
- `rev_id`: the revision id in which the action (insertion or deletion) happen
- `editor_id`: the id of the editor that inserted the token (if starts with `0|`, it means that
the editor is not registered, and the ip is displayed instead
- `time_diff_secs`: the ellapsed time between the action (either insertion or deletion) performed in 
the current revision, and the latest revision in which the same action was performed.
- `conflict`: a score to meassure conflict that is calculated based on the `time_diff_secs` 
with the following formula: *1 / log<sub>3600</sub>(time_diff_secs + 2)*. For details, please refer to 
[Flöck et al, 2017](https://arxiv.org/abs/1703.08244)"""

# call the calculator
calculator = ConflictManager(all_content, revisions)
calculator.calculate()
clear_output()

# display the tokens, the difference in seconds and its corresponding conflict score
conflicts = calculator.conflicts.copy()
conflicts['time_diff_secs'] = conflicts['time_diff'].dt.total_seconds()
    
if len(conflicts) > 0:
    display(qgrid.show_grid(conflicts[[
        'action', 'token', 'token_id', 'rev_id', 
        'editor', 'time_diff_secs', 'conflict']].rename(columns={
        'editor': 'editor_id'}).sort_values('conflict', ascending=False)))
    display(md(column_descriptions))
else:
    display(md(f'**There are no conflicting tokes in this page.**'))
    display(HTML(f'<a href="{get_previous_notebook()}" target="_blank">Go back to the previous workbook</a>'))

In [None]:
from IPython.display import display, Markdown as md
display(md("---"))
display(md(f'## B.2 Conflicting tokens per page'))
display(md(f"***Page: {the_page['title']}***"))

In [None]:
# listener
from visualization.wordcloud_listener import WCListener

listener = WCListener(sources = {
    'All actions': calculator.all_actions,
    'Elegible Actions': calculator.elegible_actions,
    'Only Conflicts': calculator.conflicts
})

# visualization
from utils.notebooks import get_date_slider_from_datetime
from ipywidgets import interact, fixed

from ipywidgets.widgets import Dropdown, HTML, interactive_output, VBox

_range=get_date_slider_from_datetime(calculator.all_actions['rev_time'])
source=Dropdown(options=list(listener.sources.keys()), value='Only Conflicts', description='Source (*)')
action=Dropdown(options=['Both', 'Just Insertions', 'Just Deletions'], value='Both', description='Action')
editor=fixed('All')

out = interactive_output(listener.listen, {
         '_range': _range,
         'source': source,
         'action': action,
         'editor': editor})

VBox([_range, action, source, out,
      HTML(value="""
      <b>(*) Source options:</b><br /> <ul>
      <li><b>Only Conflicts</b>: use only the tokens that enter into conflict. A conflict occurs when an editor 
      insists on a token to exists (or not) on an article. For example, Ana inserts the token x in the document,
      then Bob removes it, and then Ana inserts it again. Formally, an action <i>a<sub>i</sub></i>(insertion or deletion)
      on a token written by editor <i>A</i> is in conflict in revision <i>r</i>, if (1) the previous time the same 
      action <i>a<sub>i-1</sub></i> happened, it was also performed by editor <i>A</i>, and if (2) the reverse action 
      <i>a'<sub>i</sub></i> that happened between <i>a<sub>i-1</sub></i> and <i>a<sub>i</sub></i> (<i>a'<sub>i</sub></i> is a deletion if <i>a<sub>i</sub></i> is an insert, and viceversa)
      was perform by and editor B, where B is different from A. </li>
      <li><b>Elegible Actions</b>: use only the tokens that can potentially enter into conflict, i.e. actions
      that have occurred at least twice, e.g. the token x has been inserted twice (which necessarily implies it
      was remove once).</li>
      <li><b>All Actions</b>: use all tokens</li></ul><br />
      """)
      ])



In [None]:
from IPython.display import display, Markdown as md
display(md("---"))
display(md(f'## B.3 Conflict Score and related metrics'))
display(md(f"***Page: {the_page['title']}***"))

In [None]:
# Visualization
from visualization.conflicts_listener import ConflictsListener
elegible_actions = calculator.elegible_actions.copy()
listener = ConflictsListener(elegible_actions)

metrics = ['Conflict Score', 'Conflict Ratio', 'Total Conflicts', 
           'Total Elegible Actions', 'Total Actions', 'Total Time',
          'Time per Elegible Action']

display(md(f'**Page conflict score: {calculator.get_page_conflict_score()}**'))

# Visualization
from utils.notebooks import get_date_slider_from_datetime
from ipywidgets import interact
from ipywidgets.widgets import Dropdown

if (calculator.get_page_conflict_score() != 0):
    interact(listener.listen,
             _range = get_date_slider_from_datetime(elegible_actions['rev_time']),
             granularity=Dropdown(options=['Yearly', 'Monthly', 'Daily'], value='Monthly'),
             black=Dropdown(options=metrics, value='Conflict Score'),
             red=Dropdown(options= ['None'] + metrics, value='None'))

In [None]:
from IPython.display import display, Markdown as md
display(md("---"))
display(md(f'## B.4 Conflict Score per Editor'))
display(md(f"***Page: {the_page['title']}***"))

In [None]:
editors_conflicts = calculator.get_conflict_score_per_editor()
qg_obj = qgrid.show_grid(editors_conflicts)
if len(editors_conflicts) > 0:
    display(qg_obj)
else:
    display(md(f'**There is no Conflict Score**'))  

In [None]:
from IPython.display import HTML
from utils.notebooks import get_next_notebook, get_previous_notebook

%store agg_actions
%store calculator
clear_output()
        

if len(editors_conflicts) > 0:
    display(HTML(f'<a href="{get_next_notebook()}" target="_blank">Go to next workbook</a>'))
else:
    display(HTML(f'<a href="{get_previous_notebook()}" target="_blank">Go back to the previous workbook</a>'))
