In [15]:
from IPython.display import display, Markdown as md, Javascript, HTML
from datetime import datetime
from utils.notebooks import get_date_slider_from_datetime, get_notebook_by_number
from ipywidgets import interact, Output, widgets, Layout
from ipywidgets.widgets import Dropdown, fixed

%store -r the_page
%store -r the_editor
%store -r editor_inputname
# %store -r calculator
%store -r editors_conflicts

# if ('the_page' not in locals() or 
#     'the_editor' not in locals() or 
#     'editor_inputname' not in locals() or 
#     'calculator' not in locals() or 
#     'editors_conflicts' not in locals()):
    
#     import pickle
#     print("Loading default data...")
#     the_page = pickle.load(open("data/the_page.p",'rb'))
#     the_editor = pickle.load(open("data/the_editor.p",'rb'))
#     editor_inputname = pickle.load(open("data/editor_inputname.p",'rb'))
#     calculator = pickle.load(open("data/calculator.p",'rb'))
#     editors_conflicts = pickle.load(open("data/editors_conflicts.p",'rb'))
    
display(Javascript('IPython.notebook.execute_cells_below()'))

<IPython.core.display.Javascript object>

In [3]:
%%html
<style>
summary{
    display:list-item;
}
</style>

In [4]:
%%capture

%load_ext autoreload
%autoreload 2

### <span style="color:green"> Modules Imported </span>

In [5]:
## Modules Imported ##

# Display
from IPython.display import display, Markdown as md, clear_output
from datetime import date
import urllib

# APIs
from wikiwho_wrapper import WikiWho
from external.wikipedia import WikipediaDV, WikipediaAPI
from external.wikimedia import WikiMediaDV, WikiMediaAPI
from external.xtools import XtoolsAPI, XtoolsDV

# Data Processing
import pickle
import pandas as pd

# Visualization tools
import qgrid
import matplotlib.pyplot as plt

# Page views timeline
from visualization.views_listener import ViewsListener

# Change actions timeline
from visualization.actions_listener import ActionsListener

# Conflicts visualization
from visualization.conflicts_listener import ConflictsListener, ConflictsActionListener
from visualization.calculator_listener import ConflictCalculatorListener

# Word cloud visualization
from visualization.wordcloud_listener import WCListener, WCActionsListener
from visualization.wordclouder import WordClouder

# Wikipedia talk pages visualization
from visualization.talks_listener import TalksListener
from visualization.topics_listener import TopicsListener

# Tokens ownership visualization
from visualization.owned_listener import OwnedListener

# To remove stopwords
from visualization.editors_listener import remove_stopwords

# Metrics management
from metrics.conflict import ConflictManager
from metrics.token import TokensManager

# For language selection
from utils.lngselection import abbreviation, lng_listener

# Load the variables stored in the last notebook
%store -r the_page
#%store -r conflict_calculator
#%store -r conflicts_by_editors
%store -r editor_info
%store -r editor_input_id
%store -r lng_selected
%store -r sources
%store -r con_manager_all

# # Check them if in the namespace, otherwise load the default data.
# if ('the_page' not in locals() or  
#     'conflict_calculator' not in locals() or 
#     'conflicts_by_editors' not in locals() or
#     'editor_info' not in locals() or
#     'editor_input_id' not in locals()):
    
#     print("Loading default data...")
#     the_page = pickle.load(open("data/the_page.p",'rb'))
#     conflict_calculator = pickle.load(open("data/calculator.p",'rb'))
#     conflicts_by_editors = pickle.load(open("data/editors_conflicts.p",'rb'))
#     editor_info = pickle.load(open("data/the_editor.p",'rb'))
#     editor_input_id = pickle.load(open("data/editor_inputname.p",'rb'))

In [6]:
display(md(f"# ***Page: {the_page['title']} ({lng_selected.upper()})***"))

# ***Page: The Camp of the Saints (EN)***

---

# A.  Select an editor to analyze their conflicting editors

The table below presents the conflict score and other related  metrics per editor
(*editor_id* and *editor* column). At the end you can select created pages of the editor 
in order to restart the analysis in a different page.

<details>    
    <summary style="cursor: pointer;font-weight:bold">Columns description</summary>

- **conflicts**: the total number of conflicts
- **elegibles**: the total number of elegible actions performed by the editor
- **conflict**: the sum of conflict scores of all actions divided by the number of elegible actions
</details>

In [7]:
def display_conflict_score(eleg_actions):
    global listener
        
    listener = ConflictCalculatorListener(eleg_actions)

    metrics = ['Conflict Score', 'Absolute Conflict Score', 
               'Conflict Ratio',  'Number of Conflicts', 
               'Total Elegible Actions', 
               'Total Conflict Time', 'Total Elegible Time', 
               'Time per Conflict Action', 'Time per Elegible Action']

    display(md(f'*Total Page conflict score: {con_manager_all.get_page_conflict_score()}*'))

    # Visualization
    interact(listener.listen,
             #_range = get_date_slider_from_datetime(eleg_actions['rev_time']),
             _range1=widgets.DatePicker(description='Date starts', value=eleg_actions.sort_values('rev_time')['rev_time'].iloc[0], layout=Layout(width='25%')),
             _range2=widgets.DatePicker(description='Date ends', value=eleg_actions.sort_values('rev_time')['rev_time'].iloc[-1], layout=Layout(width='25%')),
             granularity=Dropdown(options=['Yearly', 'Monthly', 'Daily'], value='Daily'),
             black=Dropdown(options=metrics, value='Conflict Score'),
             red=Dropdown(options= ['None'] + metrics, value='None'))

def select_editor(editor):
    global editor_df
    global the_editor
    global editor_inputname

    editor_inputname=editor
    
    wikipedia_dv = WikipediaDV(WikipediaAPI(lng=lng_selected))
    try:
        the_editor = wikipedia_dv.get_editor(int(editor_inputname))
    except:
        the_editor = wikipedia_dv.get_editor(editor_inputname[2:])

    with out:
        %store the_editor
        %store editor_inputname

        clear_output()
        display(md("### Current Selection:"))
        
        url = f'{wikipedia_dv.api.base}action=query&list=users&ususerids={editor_inputname}&usprop=blockinfo|editcount|registration|gender&format=json'
        print("Editor's metadata can be found in:")
        print(url)
        
        if 'invalid' in the_editor:
            display(f"The editor {editor_inputname} was not found, try a different editor")
        else:
            # display the data that will be passed to the next notebook
            display(the_editor.to_frame('values'))
            display(md(f"#### Evolution of the Conflict Score of *{the_editor['name']}*"))
            
            elegiblesActions = remove_stopwords(sources["elegibles_all"], lng=lng_selected)
            editor_df = elegiblesActions[elegiblesActions['editor'] == str(editor_inputname)].copy()


            display_conflict_score(editor_df)


def on_selection_change(change):
    try:
        select_editor(qg_obj.get_selected_df().iloc[0].name)
    except:
        print('Problem parsing the name. Execute the cell again and try a different editor.')

qgrid.set_grid_option('maxVisibleRows', 5)
qg_obj = qgrid.show_grid(editors_conflicts)
qg_obj.observe(on_selection_change, names=['_selected_rows'])
                       
display(md("### Select one editor (row) to continue the demo:"))
display(md('**Recomendation:** select an editor with *many conflicts* and *mid-high conflict score*'))
display(qg_obj)
out = Output()
display(out)
select_editor(editor_inputname)

### Select one editor (row) to continue the demo:

**Recomendation:** select an editor with *many conflicts* and *mid-high conflict score*

QgridWidget(grid_options={'fullWidthRows': True, 'syncColumnCellResize': True, 'forceFitColumns': True, 'defau…

Output()

In the above graph you can select the *date range* and *granularity* (yearly, montly) 
of the timeline (X-axis), and plot any of the following counts in the black and red lines:

<details>    
    <summary style="cursor: pointer;font-weight:bold">Options description</summary>
    
- **Conflict Score**: the sum of conflict scores of all actions divided by the number of elegible actions
- **Absolute Conflict Score**: the sum of conflict scores of all actions (without division)
- **Conflict Ratio**: the count of all conflicts divided by the number of elegible actions
- **Number of Conflicts**: the total number of conflicts
- **Total Elegible Actions**: the total number of elegible actions
- **Total Conflict Time**: the sum of all the times (*time_diff_secs*) that has been taken by conflict actions
- **Total Elegible Time**: the sum of all the times (*time_diff_secs*) that has been taken by elegible actions
- **Time per Conflict Action**: average time of conflict actions
- **Time per Elegible Action**: average time of elegible actions
</details>

<span style="color: #626262"> Try yourself! This is what will happen when you select an editor: </span>

In [8]:
### ---------------------------------------------------------------- ###
### TRY YOURSELF! THIS IS WHAT WILL HAPPEN WHEN YOU SELECT AN EDITOR ###
### ---------------------------------------------------------------- ###

## Use the variable from the last notebook: conflicts_by_editors (pd.DataFrame)        ##
## Display the dataframe using interactive grid, you could learn more through the doc: ##
## https://qgrid.readthedocs.io/en/latest/                                             ##
qgrid.set_grid_option('maxVisibleRows', 5)
qgrid_init = qgrid.show_grid(editors_conflicts)
display(qgrid_init)

## Get the editor info with Wikipedia API (get_editor() method), more details you could check: ##
## https://github.com/gesiscss/wikiwho_demo/blob/master/external/api.py                        ##
## https://github.com/gesiscss/wikiwho_demo/blob/master/external/wikipedia.py                  ##
wikipedia_dv = WikipediaDV(WikipediaAPI(lng=lng_selected))

# This is an example editor index. You could change it manully by typing in a new index from
# the above grid, e.g. 737021
editor_input_id = editors_conflicts.index[1]

# Get the editor's information in the form of pd.DataFrame
editor_info = wikipedia_dv.get_editor(int(editor_input_id))

## Display the basic information of the selected editor ##
editor_url = f'{wikipedia_dv.api.base}action=query&list=users&ususerids={editor_input_id}&usprop=blockinfo|editcount|registration|gender&format=json'
print("Editor's metadata can be found in:")
print(editor_url)
display(md("### Current Selection:"))
display(editor_info.to_frame('values'))

## Interactive evolution of conflict score of this editor, using ConflictCalculatorListener, more details see ##
## https://github.com/gesiscss/wikiwho_demo/blob/master/visualization/calculator_listener.py                  ##
display(md(f"#### Evolution of the Conflict Score of *{editor_info['name']}*"))

# Dataframe containing the info for interactive
elegibles_actions = remove_stopwords(sources["elegibles_all"], lng=lng_selected)
editor_df = elegibles_actions[elegibles_actions['editor'] == str(editor_input_id)].copy()
           
# Create a ConflictCalculatorListener instance.
conflicts_cal_listener = ConflictCalculatorListener(editor_df)

# Set parameters
begin_date = date(2002, 3, 1)
end_date = date(2019, 12, 16)
frequency = 'Daily' # 'Yearly','Monthly', 'Weekly', 'Daily'

# The metrics we need:
# ['Conflict Score', 'Absolute Conflict Score', 'Conflict Ratio', 'Number of Conflicts',
#  'Total Elegible Actions', 'Total Conflict Time', 'Total Elegible Time', 
# 'Time per Conflict Action', 'Time per Elegible Action', ('None')]
# Note: only 'red_line' has 'None' option.
black_line = 'Conflict Score'
red_line = 'None'
           
print('Time range from', begin_date.strftime("%Y-%m-%d"), 'to', end_date.strftime("%Y-%m-%d"))
print('Total Page conflict score:', con_manager_all.get_page_conflict_score())
           
conflicts_cal_listener.listen(
    _range1 = begin_date, 
    _range2 = end_date,
    granularity = frequency,
    black = black_line,
    red = red_line
)
           
# store the editor_input_id and editor_info for the usage in next notebook
%store editor_input_id
%store editor_info

QgridWidget(grid_options={'fullWidthRows': True, 'syncColumnCellResize': True, 'forceFitColumns': True, 'defau…

Editor's metadata can be found in:
https://en.wikipedia.org/w/api.php?action=query&list=users&ususerids=1413513&usprop=blockinfo|editcount|registration|gender&format=json


### Current Selection:

Unnamed: 0,values
userid,1413513
name,KleenupKrew
editcount,1323
registration,2006-05-10T16:13:03Z
gender,unknown


#### Evolution of the Conflict Score of *KleenupKrew*

Time range from 2002-03-01 to 2019-12-16
Total Page conflict score: 0.8878011576569786


Stored 'editor_input_id' (int64)
Stored 'editor_info' (Series)


---

# B. Modified pages of an editor

Provided through the Xtools API

The following is some metadata about the creation and deletion
of pages in Wikipedia by the editor.

In [9]:
# create and display the button
button1 = widgets.Button(description="Get Modified Pages", layout=Layout(width='160px'))
display(button1)

xtools_api = XtoolsAPI(lng=lng_selected)
xtools_dv = XtoolsDV(xtools_api)
def on_click_modified_pages(b):
    with out1:
        clear_output()
        display(md(f"***Editor: {the_editor['name']}***"))        
        try:
            display(md('Loading data...'))
            editor_info_xtools = xtools_dv.get_modified_pages_counts_per_editor(the_editor['name'])
            clear_output()
            display(md(f"***Editor: {the_editor['name']}***"))
            url = f"{xtools_dv.api.base}user/pages_count/{xtools_dv.api.project}/{urllib.parse.quote(the_editor['name'])}"
            print(url)            
            display(editor_info_xtools.to_frame('value'))
        except:
            clear_output()
            display(md(f'**There are no modified pages by this editor.**'))
            
out1 = Output()
display(out1)

# set the event
button1.on_click(on_click_modified_pages)

# trigger the event with the default value
on_click_modified_pages(button1)

Button(description='Get Modified Pages', layout=Layout(width='160px'), style=ButtonStyle())

Output()

<span style="color: #626262"> Try yourself! This is what will happen when you click 'Get Modified Pages' button: </span>

In [10]:
### ---------------------------------------------------------------------------------- ###
### TRY YOURSELF! THIS IS WHAT WILL HAPPEN WHEN YOU CLICK 'Get Modified Pages' BUTTON  ###
### ---------------------------------------------------------------------------------- ###

## This is the page you used and the editor you select in the above grid. ##
print('Selected editor:', editor_info['name'])

## User Xtools API to obtain the info of modified pages, more details see: ##
## https://github.com/gesiscss/wikiwho_demo/blob/master/external/xtools.py ##
## https://github.com/gesiscss/wikiwho_demo/blob/master/external/api.py    ##

# Create a Xtools instance.
xtoolsapi = XtoolsAPI(lng=lng_selected)
xtoolsdv = XtoolsDV(xtoolsapi)

# Try to do the retrieval.
try:
    display(md('Loading data...'))
    
    # Use 'get_modified_pages_counts_per_editor()' method to obtain data from Xtools.
    editor_modify_pages = xtoolsdv.get_modified_pages_counts_per_editor(editor_info['name'])
    clear_output()
    print('Selected editor:', editor_info['name'])
    
    # Metadata.
    url = f"{xtoolsdv.api.base}user/pages_count/{xtoolsdv.api.project}/{urllib.parse.quote(editor_info['name'])}"
    print(url)
    
    # Display the result in the form of pd.DataFrame
    display(editor_modify_pages.to_frame('value'))

except:
    clear_output()
    display(md(f'**There are no modified pages by this editor.**'))

Selected editor: KleenupKrew
https://xtools.wmflabs.org/api/user/pages_count/en.wikipedia.org/KleenupKrew


Unnamed: 0,value
Created pages:,3
Deleted pages,0


---

# C. Select one page created by an editor

Provided through the Xtools API

---

# C. Select one page created by an editor

Provided through the Xtools API

The following table shows a list of created paged by the editor with some general 
information about the page:

<details>    
    <summary style="cursor: pointer;font-weight:bold">Columns description</summary>
    
- *page_title*: title of the page
- *page_len*: an estimated amount of words in the page
- *rev_id*: the id of the last revision
- *rev_len*: the number of revisions made on that page
- *rev_timestamp*: the timestamp of the last revision (last modification)
</details>

In [11]:
def run_below(ev):
    display(Javascript('Jupyter.notebook.execute_cells([19])'))

button = widgets.Button(description="Get Pages Created By Editor", layout=Layout(width='200px'))
button.on_click(run_below)
display(button)

Button(description='Get Pages Created By Editor', layout=Layout(width='200px'), style=ButtonStyle())

In [12]:
def select_page(page):
    global the_page
    page_inputname=page
    
    wikipedia_dv = WikipediaDV(WikipediaAPI(lng=lng_selected))
    try:
        the_page = wikipedia_dv.get_page(int(page_inputname))
    except:
        the_page = wikipedia_dv.get_page(page_inputname)

    with out2:
        %store the_page
        clear_output()
        display(md("### Current Selection:"))
        
        if 'invalid' in the_page:
            display(f"The page {page_inputname} was not found, try a different page")
        else:
            # display the data that will be passed to the first notebook
            display(the_page.to_frame('values'))
            display(HTML(f'<a href="{get_notebook_by_number(1)}" target="_blank">Go to the first workbook</a>'))

def on_selection_change(change):
    try:
        select_page(qg_obj_new.get_selected_df().iloc[0].page_title)
    except:
        print('Problem parsing the name. Execute the cell again and try a different page.')
        display(HTML(f'<a href="{get_notebook_by_number(1)}" target="_blank">Go to next workbook</a>'))

display(md(f"***Editor: {the_editor['name']}***"))
try:
    display(md("Loading data..."))
    created_pages = xtools_dv.get_created_pages_per_editor(the_editor['name']).T
    clear_output()
    display(md(f"***Editor: {the_editor['name']}***"))
    url = f"{xtools_dv.api.base}user/pages/{xtools_dv.api.project}/{urllib.parse.quote(the_editor['name'])}"
    display(md("The metadata of pages created can be found in:"))
    print(url)
except:
    clear_output()
    display(md(f'**There are no created pages by this editor.**'))
    display(HTML(f'<a href="{get_notebook_by_number(1)}" target="_blank">Go to next workbook</a>'))

           
if 'created_pages' in locals():
    out2 = Output()
    qg_obj_new = qgrid.show_grid(created_pages[['page_title', 'page_len', 'rev_id', 'rev_len', 'rev_timestamp']])
    qg_obj_new.observe(on_selection_change, names=['_selected_rows'])

if 'qg_obj_new' in locals():
    display(md("### Select one page row for the next notebook:"))
    display(qg_obj_new)
    display(out2)

***Editor: User2004***

The metadata of pages created can be found in:

https://xtools.wmflabs.org/api/user/pages/en.wikipedia.org/User2004


### Select one page row for the next notebook:

QgridWidget(grid_options={'fullWidthRows': True, 'syncColumnCellResize': True, 'forceFitColumns': True, 'defau…

Output()

<span style="color: #626262"> Try yourself! This is what will happen when you click 'Get Pages Create by Editor' button: </span>

In [13]:
### ------------------------------------------------------------------------------------------- ###
### TRY YOURSELF! THIS IS WHAT WILL HAPPEN WHEN YOU CLICK 'Get Pages Created By Editor' BUTTON  ###
### ------------------------------------------------------------------------------------------- ###

## This is the page you used and the editor you select in the above grid. ##
print('Selected editor:', editor_info['name'])

## Use Xtools API to obtain the info of created pages, more details see:   ##
## https://github.com/gesiscss/wikiwho_demo/blob/master/external/xtools.py ##
## https://github.com/gesiscss/wikiwho_demo/blob/master/external/api.py    ##

# (If the selected editor has created a page before): try. (Otherwise): except.
try:
    # Use 'get_created_pages_per_editor()' method to get access to the pages info created by this editor.
    print('Loading data...')
    pages_created = xtoolsdv.get_created_pages_per_editor(editor_info['name']).T
    clear_output()
    print('Selected editor:', editor_info['name'])

    # The corresponding metadata can be obtained by the following url:
    url_created = f"{xtoolsdv.api.base}user/pages/{xtoolsdv.api.project}/{urllib.parse.quote(editor_info['name'])}"
    print("The metadata of pages created can be found in:")
    print(url_created)

    # Display the created pages information in qgrid.
    display(qgrid.show_grid(pages_created[['page_title', 'page_len', 'rev_id', 'rev_len', 'rev_timestamp']]))

except:
    display(md(f'**There are no created pages by this editor.**'))
    display(HTML(f'<a href="{get_notebook_by_number(1)}" target="_blank">Go to next workbook</a>'))

## Select one page title you are interested in and use WikipediaAPI to approach the page basic information ##
## More details please see:                                                                                ##
## https://github.com/gesiscss/wikiwho_demo/blob/master/external/wikipedia.py                              ##
## https://github.com/gesiscss/wikiwho_demo/blob/master/external/api.py                                    ##

# Select one page by inputting its title, e.g. "Michael Jenkins":
selected_page_title = pages_created['page_title'].iloc[0]

# Create a WikipwdiaAPI(DV) instance.
wikipediadv = WikipediaDV(WikipediaAPI(lng=lng_selected))

# Search for the page basic information with the method "get_page()".
# Note: global variable "the_page" will be changed!
the_page = wikipedia_dv.get_page(selected_page_title)

# Display the basic information of the page in pd.DataFrame
if 'invalid' in the_page:
    display(f"The page {selected_page_title} was not found, try a different page")
else:
    # Display the data that will be passed to the first notebook
    # If you are interested in the code of "get_notebook_by_number()" function, please refer to
    # https://github.com/gesiscss/wikiwho_demo/blob/master/utils/notebooks.py
    print("Current selected page:")
    display(the_page.to_frame('values'))
    display(HTML(f'<a href="{get_notebook_by_number(1)}" target="_blank">Go to the first workbook</a>'))

Selected editor: KleenupKrew
The metadata of pages created can be found in:
https://xtools.wmflabs.org/api/user/pages/en.wikipedia.org/KleenupKrew


QgridWidget(grid_options={'fullWidthRows': True, 'syncColumnCellResize': True, 'forceFitColumns': True, 'defau…

Current selected page:


Unnamed: 0,values
page_id,5458315
title,Les Humphries Singers
ns,0


In [16]:
re_hide = """
<script>
var update_input_visibility = function () {
    Jupyter.notebook.get_cells().forEach(function(cell) {
        if (cell.metadata.hide_input) {
            cell.element.find("div.input").hide();
        }
    })
};
update_input_visibility();
</script
"""
display(HTML(re_hide))

scroll_to_top = """
<script>
document.getElementById('notebook').scrollIntoView();
</script
"""
display(HTML(scroll_to_top))