# Pages report

This notebook is a workaround to build indivual page reports. It defines the main functions to display a report (`display_report(pagename)`). That allow other notebooks to import and explore the various data about the computed data from wikipedia pages and subsequent relationshipts (networks of users-pages, pages-pages and users-users). It also include synthesis of time wise analysis like page views analytics.

This page is mainly used by the [page explorer](page explorer.ipynb) notebook.

In [5]:
# -*- coding: utf-8 -*-
%matplotlib inline
%config InlineBackend.figure_formats=['svg']

import json
import codecs

import math

import numpy as np

import pandas as pd
import datetime as dt

import matplotlib as mpl
import matplotlib.pyplot as plt
import matplotlib.dates as dates

import networkx as nx

import seaborn as sns

sns.set(style="whitegrid")
import networkx as nx
from IPython.display import display, HTML

# importing datasets

In [6]:
#base_dir = '../listgeometry'

# list of page names
#pagenames_file = '%s/pagenames' % basedir
#pages = codecs.open(pagenames_file,"r", "utf-8-sig").readlines()
#pages = map(lambda x: x.strip(), pages)

# page graph obtained by projecting page-editor bi-partite graph
#pages_graph = nx.read_gexf("%s/graph/projected_graph_page.gexf" % basedir)

# page graph obtained by projecting page-editor bi-partite graph
#pages_editors_graph = nx.read_gexf("%s/graph/pages-editors-graph.gexf" % directory)

In [7]:
def table_to_html(data, cols=[]):
    html = "<table>"
    html += "<tr>"
    for column_content in cols:
        html += "<th>%s</th>" % (column_content)
    html += "</tr>"
    
    for d in data:
        html += "<tr>"
        for column_content in d:
            html += "<td>%s</td>" % (column_content)
        html += "</tr>"
    html += "</table>"
        
    return HTML(html)

## top editors

In [8]:
def display_top_editors(directory,page):    
    pages_editors_graph = nx.read_gexf("%s/graph/pages-editors-graph.gexf" % directory)
    
    
    top_editors = pages_editors_graph["p:%s" % (page)]    
#    print top_editors.items()
    top_editors = sorted(top_editors.items(), key=lambda x: (-x[1]["revisions"], -pages_editors_graph.node[x[0]]["revisions"]))

    # print top_editors
    
    data = []

    for name, edits in top_editors[0:10]:
        data.append(["<a href=\"http://en.wikipedia.org/wiki/User:{0}\" target=\"_blank\">{0}</a>".format(name.split(":")[1]),
                     edits["revisions"],
                    pages_editors_graph.node[name]["revisions"]])

    display(table_to_html(data, ["editor name", "edits on that page", "edits over the corpus"]))

if __name__ == "__main__":
    display_top_editors(base_dir,"Pi")    

NameError: name 'base_dir' is not defined

## pageviews and revisions

In [None]:
def display_pageviews_revisions(directory,page):
    pageviews = pd.DataFrame.from_csv("%s/data/pageviews/%s.weekly.csv" % (directory,page))
    revisions = pd.DataFrame.from_csv("%s/data/revisions/%s.weekly.csv" % (directory,page))

    pageviews.plot(figsize=(12, 2), subplots=False, linewidth="0.5", ylim=0, colormap="Spectral", rot=0)    
    revisions.plot(figsize=(12, 2), linewidth="0.5", ylim=0)
    plt.show() 

if __name__ == "__main__":
    display_pageviews_revisions(base_dir,"Pi")

## local graph

In [None]:
def display_local_graph(directory,page):
    g1 = nx.read_gexf("%s/data/reading_maps/pages-coedited-reduced-3.gexf" % directory)
    nbunch = [ page ]
    nbunch.extend( list(g1.to_undirected()[page]))
    g2 = g1.subgraph(nbunch)

    #nx.draw_spring(g2)
    
    pos = nx.spring_layout(g2,iterations=50)
   
    nx.draw_networkx_nodes(g2, pos)
    nx.draw_networkx_edges(g2, pos)
    nx.draw_networkx_labels(g2, pos)
    
    plt.axis('off')
    plt.show()

if __name__ == "__main__":
    display_local_graph(base_dir,"Paraboloid")

# final report

In [None]:
def display_report(page):
    directory=base_dir
    pages_graph = nx.read_gexf("%s/graph/projected_graph_page.gexf" % directory)
    pages_editors_graph = nx.read_gexf("%s/graph/pages-editors-graph.gexf" % directory)
    
    display(HTML("<h2>%s</h2>" % (page)))
    
    #display(HTML("<div style=\"float:left\">"))    
    display(HTML("<h3>co-edited pages</h3>"))

    nb = sorted(pages_graph["p:%s" % (page)].items(),
            key=lambda (k,x): -int(x["coeditors"]))

    data = []

    # calculate rank in neighbor top co-edited ranking
    for name, info in nb:
        nb_mirror = sorted(pages_graph[name].items(),
                key=lambda (k,x): -int(x["coeditors"]))
        nb_mirror = [ x[0] for x in nb_mirror ]
        
        editors = pages_editors_graph[name]        
        info["editors"] = len(editors)
        info["exclusive editors"] = len([n for n in editors if len(pages_editors_graph[n]) == 1 ])

        info["ranking"] = nb_mirror.index("p:%s" % (page)) + 1

    #print nb

    for name, info in nb[0:10]:
        data.append([ u"<a href=\"http://en.wikipedia.org/wiki/{0}\" target=\"_blank\">{0}</a>".format(name.split(":")[1]),
                     info["editors"],
                     info["coeditors"],
                     float(info["coeditors"]) / float(info["editors"]),
                     info["exclusive editors"],
                     info["ranking"]])

    display(table_to_html(data, ["page name", "editors", "co-editors", "co-editors/editors", "exclusive editors" ,"ranking"]))
    #display(HTML("</div>"))    

    #display(HTML("<div style=\"float:left\">"))    
    display(HTML("<h3>ranked first in</h3>"))

    nb_list = [ x[0] for x in nb ]
    data = []

    nb2 = sorted(nb, key=lambda (x): x[1]["ranking"])
    for name, info in nb2[0:10]:
        editors = pages_editors_graph[name]        
        info["editors"] = len(editors)
        info["exclusive editors"] = len([n for n in editors if len(pages_editors_graph[n]) == 1 ])

        data.append([ u"<a href=\"http://en.wikipedia.org/wiki/{0}\" target=\"_blank\">{0}</a>".format(name.split(":")[1]),
                     info["editors"],
                     info["coeditors"],
                     float(info["coeditors"]) / float(info["editors"]),
                     info["exclusive editors"],
                     info["ranking"]])  

    display(table_to_html(data, ["page name", "editors", "co-editors", "co-editors/editors", "exclusive editors" ,"ranking"]))
#     display(HTML("</div>"))    
#     display(HTML("<div style=\"clear:both\"></div>"))
    
    display(HTML("<h3>pageviews and revisions</h3>"))
    display_pageviews_revisions(directory,page)
    
    display(HTML("<h3>top editors</h3>"))
    display_top_editors(directory,page)

    display(HTML("<h3>local subgraph</h3>"))
    display_local_graph(directory,page)
    
if __name__ == "__main__":
    display_report("3-sphere")