In [2]:
from IPython.display import display, Markdown as md, Javascript

%load_ext autoreload
%autoreload 2

%store -r the_page
%store -r the_editor
%store -r editor_inputname
%store -r calculator
%store -r editors_conflicts

if ('the_page' not in locals() or 
    'the_editor' not in locals() or 
    'editor_inputname' not in locals() or 
    'calculator' not in locals() or 
    'editors_conflicts' not in locals()):
    
    import pickle
    print("Loading default data...")
    the_page = pickle.load(open("data/the_page.p",'rb'))
    the_editor = pickle.load(open("data/the_editor.p",'rb'))
    editor_inputname = pickle.load(open("data/editor_inputname.p",'rb'))
    calculator = pickle.load(open("data/calculator.p",'rb'))
    editors_conflicts = pickle.load(open("data/editors_conflicts.p",'rb'))
    
display(Javascript('IPython.notebook.execute_cells_below()'))

<IPython.core.display.Javascript object>

The autoreload extension is already loaded. To reload it, use:
  %reload_ext autoreload


<IPython.core.display.Javascript object>

### <span style="color:green"> Modules Imported </span>

In [3]:
## Modules Imported ##

# Display
from IPython.display import display, Markdown as md, clear_output, HTML
from datetime import datetime, date

# APIs
from wikiwho_wrapper import WikiWho
from external.wikipedia import WikipediaDV, WikipediaAPI
from external.xtools import XtoolsAPI, XtoolsDV

# Load and process data.
import pickle
import pandas as pd

# Visualization
import qgrid
from visualization.conflicts_listener import ConflictsListener
from visualization.calculator_listener import ConflictCalculatorListener

from utils.notebooks import get_date_slider_from_datetime, get_notebook_by_number

from ipywidgets import interact, Output, widgets, Layout
from ipywidgets.widgets import Dropdown, fixed

# Load the variables stored in the last notebook
%store -r the_page
%store -r conflict_calculator
%store -r conflicts_by_editors
%store -r editor_info
%store -r editor_input_id

# Check them if in the namespace, otherwise load the default data.
if ('the_page' not in locals() or  
    'conflict_calculator' not in locals() or 
    'conflicts_by_editors' not in locals() or
    'editor_info' not in locals() or
    'editor_input_id' not in locals()):
    
    print("Loading default data...")
    the_page = pickle.load(open("data/the_page.p",'rb'))
    conflict_calculator = pickle.load(open("data/calculator.p",'rb'))
    conflicts_by_editors = pickle.load(open("data/editors_conflicts.p",'rb'))
    editor_info = pickle.load(open("data/the_editor.p",'rb'))
    editor_input_id = pickle.load(open("data/editor_inputname.p",'rb'))

---

# A.  Select an editor to analyze their conflicting editors

In [4]:
display(md(f"***Page: {the_page['title']}***"))
display(md("The table below presents the conflict score and other related  metrics per editor "
f"(*editor_id* and *editor* column). Select one editor of the page \"{the_page['title']}\" to analyze " 
"the general Wikipedia metadata of the editor. At the end you can select created pages of the editor "
"in order to restart the analysis in a different page:"))

***Page: The Camp of the Saints***

The table below presents the conflict score and other related  metrics per editor (*editor_id* and *editor* column). Select one editor of the page "The Camp of the Saints" to analyze the general Wikipedia metadata of the editor. At the end you can select created pages of the editor in order to restart the analysis in a different page:

- **conflict_n**: the total number of conflicts
- **conflict**: the sum of conflict scores of all actions (without division)
- **actions**: the total number of actions performed by the editor
- **conflict_score**: the sum of conflict scores of all actions divided by the number of elegible actions
- **conflict_ratio**: the count of all conflicts divided by the number of elegible actions

In [5]:
def display_conflict_score(eleg_actions):
    global listener
    
    
    listener = ConflictCalculatorListener(eleg_actions)

    metrics = ['Conflict Score', 'Absolute Conflict Score', 
               'Conflict Ratio',  'Number of Conflicts', 
               'Total Elegible Actions', 
               'Total Conflict Time', 'Total Elegible Time', 
               'Time per Conflict Action', 'Time per Elegible Action']

    display(md(f'*Total Page conflict score: {calculator.get_page_conflict_score()}*'))

    # Visualization
    interact(listener.listen,
             _range = get_date_slider_from_datetime(eleg_actions['rev_time']),
             granularity=Dropdown(options=['Yearly', 'Monthly', 'Daily'], value='Daily'),
             black=Dropdown(options=metrics, value='Conflict Score'),
             red=Dropdown(options= ['None'] + metrics, value='None'))

def select_editor(editor):
    global editor_df
    global the_editor
    global editor_inputname

    editor_inputname=editor
    
    wikipedia_dv = WikipediaDV(WikipediaAPI(domain='en.wikipedia.org'))
    try:
        the_editor = wikipedia_dv.get_editor(int(editor_inputname))
    except:
        the_editor = wikipedia_dv.get_editor(editor_inputname[2:])

    with out:
        %store the_editor
        %store editor_inputname

        clear_output()
        display(md("### Current Selection:"))
        
        if 'invalid' in the_editor:
            display(f"The editor {editor_inputname} was not found, try a different editor")
        else:
            # display the data that will be passed to the next notebook
            display(the_editor.to_frame('values'))
            display(md(f"#### Evolution of the Conflict Score of *{the_editor['name']}*"))

            editor_df = calculator.elegible_actions[
                calculator.elegible_actions['editor'] == str(editor_inputname)].copy()


            display_conflict_score(editor_df)


def on_selection_change(change):

    try:
        select_editor(qg_obj.get_selected_df().iloc[0].name)
    except:
        print('Problem parsing the name. Execute the cell again and try a different editor.')

qgrid.set_grid_option('maxVisibleRows', 5)
qg_obj = qgrid.show_grid(editors_conflicts)
qg_obj.observe(on_selection_change, names=['_selected_rows'])
                       
display(md("### Select one editor (row) to continue the demo:"))
display(md('**Recomendation:** select an editor with *many conflicts* and *mid-high conflict score*'))
display(qg_obj)
out = Output()
display(out)
select_editor(editor_inputname)

### Select one editor (row) to continue the demo:

**Recomendation:** select an editor with *many conflicts* and *mid-high conflict score*

QgridWidget(grid_options={'fullWidthRows': True, 'syncColumnCellResize': True, 'forceFitColumns': True, 'defau…

Output()

In the above graph you can select the *date range* and *granularity* (yearly, montly) 
of the timeline (X-axis), and plot any of the following counts in the black and red lines:
   
- **Conflict Score**: the sum of conflict scores of all actions divided by the number of elegible actions
- **Absolute Conflict Score**: the sum of conflict scores of all actions (without division)
- **Conflict Ratio**: the count of all conflicts divided by the number of elegible actions
- **Number of Conflicts**: the total number of conflicts
- **Total Elegible Actions**: the total number of elegible actions
- **Total Conflict Time**: the sum of all the times (*time_diff_secs*) that has been taken by conflict actions
- **Total Elegible Time**: the sum of all the times (*time_diff_secs*) that has been taken by elegible actions
- **Time per Conflict Action**: average time of conflict actions
- **Time per Elegible Action**: average time of elegible actions

### <span style="color:green"> TRY YOURSELF! THIS IS WHAT WILL HAPPEN WHEN YOU SELECT AN EDITOR </span>

In [18]:
### ----------------------------------------------------------------------------------- ###
### TRY YOURSELF! THIS IS WHAT WILL HAPPEN WHEN YOU SELECT AN EDITOR                    ###
### ----------------------------------------------------------------------------------- ###

## This is the page you used ##
print('The page that is being used:', the_page['title'])

## Use the variable from the last notebook: conflicts_by_editors (pd.DataFrame)        ##
## Display the dataframe using interactive grid, you could learn more through the doc: ##
## https://qgrid.readthedocs.io/en/latest/                                             ##
qgrid.set_grid_option('maxVisibleRows', 5)
qgrid_init = qgrid.show_grid(conflicts_by_editors)
display(qgrid_init)

## Get the editor info with Wikipedia API (get_editor() method), more details you could check: ##
## https://github.com/gesiscss/wikiwho_demo/blob/master/external/api.py                        ##
## https://github.com/gesiscss/wikiwho_demo/blob/master/external/wikipedia.py                  ##
wikipedia_dv = WikipediaDV(WikipediaAPI(domain='en.wikipedia.org'))

# This is an example editor index. You could change it manully by typing in a new index from
# the above grid, e.g. 737021
editor_input_id = 263324

# store the editor_input_id for the usage in next notebook
%store editor_input_id

# Get the editor's information in the form of pd.DataFrame
editor_info = wikipedia_dv.get_editor(int(editor_input_id))

# store editor_info for the usage in next notebook
%store editor_info

## Display the basic information of the selected editor ##
editor_url = f'{wikipedia_dv.api.base}action=query&list=users&ususerids={editor_input_id}&usprop=blockinfo|editcount|registration|gender&format=json'
print("Editor's data can be found in:")
print(editor_url)
display(md("### Current Selection:"))
display(editor_info.to_frame('values'))

## Interactive evolution of conflict score of this editor, using ConflictCalculatorListener, more details see ##
## https://github.com/gesiscss/wikiwho_demo/blob/master/visualization/calculator_listener.py                  ##
display(md(f"#### Evolution of the Conflict Score of *{editor_info['name']}*"))

# Dataframe containing the info for interactive
editor_df = conflict_calculator.elegible_actions[conflict_calculator.elegible_actions['editor'] == str(editor_input_id)].copy()
           
# Create a ConflictCalculatorListener instance.
conflicts_cal_listener = ConflictCalculatorListener(editor_df)

# Set parameters
begin_date = date(2017, 3, 1)
end_date = date(2019, 12, 16)
frequency = 'Daily' # 'Monthly', 'Daily'

# The metrics we need:
# ['Conflict Score', 'Absolute Conflict Score', 'Conflict Ratio', 'Number of Conflicts',
#  'Total Elegible Actions', 'Total Conflict Time', 'Total Elegible Time', 
# 'Time per Conflict Action', 'Time per Elegible Action', ('None')]
# Note: only 'red_line' has 'None' option.
black_line = 'Conflict Score'
red_line = 'None'
           
print('Time range from', begin_date.strftime("%Y-%m-%d"), 'to', end_date.strftime("%Y-%m-%d"))
print('Total Page conflict score:', conflict_calculator.get_page_conflict_score())
           
conflicts_cal_listener.listen(
    _range = (begin_date, end_date),
    granularity = frequency,
    black = black_line,
    red = red_line
)

The page that is being used: The Camp of the Saints


QgridWidget(grid_options={'fullWidthRows': True, 'syncColumnCellResize': True, 'forceFitColumns': True, 'defau…

Stored 'editor_input_id' (int)
Stored 'editor_info' (Series)
Editor's data can be found in:
https://en.wikipedia.org/w/api.php?action=query&list=users&ususerids=263324&usprop=blockinfo|editcount|registration|gender&format=json


### Current Selection:

Unnamed: 0,values
userid,263324
name,Volunteer Marek
editcount,80254
registration,2005-05-10T10:18:46Z
gender,unknown


#### Evolution of the Conflict Score of *Volunteer Marek*

Time range from 2017-03-01 to 2019-12-16
Total Page conflict score: 0.8679683293977595


In [6]:
display(md("---"))
display(md(f"# B. Basic editor information"))
display(md(f"Provided by Wikipedia"))
display(md(f"***Editor: {the_editor['name']}***"))
display(md(f"""The following is information about the editor directly available in Wikipedia."""))

---

# B. Basic editor information

Provided by Wikipedia

***Editor: Volunteer Marek***

The following is information about the editor directly available in Wikipedia.

In [7]:
the_editor.to_frame('value')

Unnamed: 0,value
userid,263324
name,Volunteer Marek
editcount,80254
registration,2005-05-10T10:18:46Z
gender,unknown


In [8]:
display(md("---"))
display(md(f"# C. Modified pages of an editor"))
display(md(f"Provided through the Xtools API"))
display(md(f"***Editor: {the_editor['name']}***"))
display(md(f"""The following is some metadata about the creation and deletion
of pages in Wikipedia by the editor."""))

---

# C. Modified pages of an editor

Provided through the Xtools API

***Editor: Volunteer Marek***

The following is some metadata about the creation and deletion
of pages in Wikipedia by the editor.

In [9]:
xtools_api = XtoolsAPI(project = 'en.wikipedia.org')
xtools_dv = XtoolsDV(xtools_api)

try:
    editor_info = xtools_dv.get_modified_pages_counts_per_editor(the_editor['name'])
    display(editor_info.to_frame('value'))
except:
    clear_output()
    display(md(f'**There are no modified pages by this editor.**'))


https://xtools.wmflabs.org/api/user/pages_count/en.wikipedia.org/Volunteer Marek


Unnamed: 0,value
Created pages:,324
Deleted pages,5


In [10]:
display(md("---"))
display(md(f"# D. Select one page created by an editor"))
display(md(f"Provided through the Xtools API"))
display(md(f"***Editor: {the_editor['name']}***"))
display(md(f"""The following table shows a list of created paged by the editor with some general 
information about the page:
- *page_title*: title of the page
- *page_len*: an estimated amount of words in the page
- *rev_id*: the id of the last revision
- *rev_len*: the number of revisions made on that page
- *rev_timestamp*: the timestamp of the last revision (last modification)"""))

---

# D. Select one page created by an editor

Provided through the Xtools API

***Editor: Volunteer Marek***

The following table shows a list of created paged by the editor with some general 
information about the page:
- *page_title*: title of the page
- *page_len*: an estimated amount of words in the page
- *rev_id*: the id of the last revision
- *rev_len*: the number of revisions made on that page
- *rev_timestamp*: the timestamp of the last revision (last modification)

In [12]:
xtools_api = XtoolsAPI(project = 'en.wikipedia.org')
xtools_dv = XtoolsDV(xtools_api)

try:
    created_pages = xtools_dv.get_created_pages_per_editor(the_editor['name'])
except:
    clear_output()
    display(md(f'**There are no created pages by this editor.**'))
    display(HTML(f'<a href="{get_notebook_by_number(1)}" target="_blank">Go to next workbook</a>'))
    
def select_page(page):
    global the_page
    page_inputname=page
    
    wikipedia_dv = WikipediaDV(WikipediaAPI(domain='en.wikipedia.org'))
    try:
        the_page = wikipedia_dv.get_page(int(page_inputname))
    except:
        the_page = wikipedia_dv.get_page(page_inputname)

    with out:
        %store the_page
        clear_output()
        display(md("### Current Selection:"))
        
        if 'invalid' in the_page:
            display(f"The page {page_inputname} was not found, try a different page")
        else:
            # display the data that will be passed to the next notebook
            display(the_page.to_frame('values'))
            display(HTML(f'<a href="{get_notebook_by_number(1)}" target="_blank">Go to next workbook</a>'))
   

def on_selection_change(change):
    try:
        select_page(qg_obj.get_selected_df().iloc[0].page_title)
    except:
        print('Problem parsing the name. Execute the cell again and try a different page.')
        display(HTML(f'<a href="{get_notebook_by_number(1)}" target="_blank">Go to next workbook</a>'))

if 'created_pages' in locals():
    qg_obj = qgrid.show_grid(created_pages[['page_title', 'page_len', 'rev_id', 'rev_len', 'rev_timestamp']])
    qg_obj.observe(on_selection_change, names=['_selected_rows'])

if 'qg_obj' in locals():
    display(md("### Select one page row for the next notebook:"))
    display(qg_obj)
    out = Output()
    display(out)



### Select one page row for the next notebook:

QgridWidget(grid_options={'fullWidthRows': True, 'syncColumnCellResize': True, 'forceFitColumns': True, 'defau…

Output()