In [1]:
from IPython import display

display.Markdown('README.md')

# Pywikibot to Gephi


[![nbviewer](https://raw.githubusercontent.com/jupyter/design/master/logos/Badges/nbviewer_badge.svg)](https://nbviewer.jupyter.org/github/UncleCJ/pywikibot-gephi-experiments/blob/dev2/pywikibot2gephi.ipynb) [![Binder](https://mybinder.org/badge_logo.svg)](https://mybinder.org/v2/gh/UncleCJ/pywikibot-gephi-experiments/dev2?filepath=pywikibot2gephi.ipynb)


See also:

* https://www.mediawiki.org/wiki/Manual:Pywikibot/PAWS
* https://www.mediawiki.org/wiki/Manual:Pywikibot
* https://www.wikidata.org/wiki/Wikidata:Pywikibot_-_Python_3_Tutorial
* https://wikitech.wikimedia.org/wiki/PAWS/PAWS_examples_and_recipes
* https://towardsdatascience.com/newbies-guide-to-python-igraph-4e51689c35b4
* https://igraph.org/python/api/master/igraph.html
* https://doc.wikimedia.org/pywikibot/master/index.html
* https://public.paws.wmcloud.org/309423/ds4ux/jupyter-intro.ipynb
* https://public.paws.wmcloud.org/19781798/WikidataMapMakingWorkshop/WikidataMapMakingWorkshop.ipynb
* https://pythonrepo.com/repo/markusschanta-awesome-jupyter-python-repl
* [Previous tutorial: How to use Gephi to visualize from Wikidata](https://gist.github.com/UncleCJ/2408aef8eab09cc1da3404c5af43537b)
* [My advent-of-code solutions, also in Jupyter notebooks](https://github.com/UncleCJ/advent-of-code)
* https://www.janmeppe.com/blog/how-to-add-new-kernel-in-jupyter-notebook/
* https://ipython.readthedocs.io/en/stable/api/generated/IPython.display.html
* https://pyup.io/changelogs/pywikibot/
* https://heardlibrary.github.io/digital-scholarship/host/wikidata/bot/
* https://heardlibrary.github.io/digital-scholarship/host/wikidata/pywikibot/
* https://cambridge-intelligence.com/python-graph-visualization-using-jupyter-regraph/
* http://compbio.ucsd.edu/bringing-interactivity-network-visualization-jupyter-notebooks-visjs2jupyter/
* https://ipython-books.github.io/64-visualizing-a-networkx-graph-in-the-notebook-with-d3js/
* https://melaniewalsh.github.io/Intro-Cultural-Analytics/06-Network-Analysis/02-Making-Network-Viz-with-Bokeh.html
* https://github.com/adamhajari/nbgitconvert

In [2]:
import pywikibot
import igraph as ig
import igraph.remote.gephi as igg

In [3]:
display.Code('user-config.py')

In [4]:
# Instantiate pywikibot mediawiki as defined in user-config.py
# (potentially leveraging PYWIKIBOT_ROOT/families [look this up])
site = pywikibot.Site()

In [5]:
page1 = pywikibot.Page(site, 'Eps1.1 ones-and-zer0es.mpeg')
page2 = pywikibot.Page(site, 'Elliot Alderson')

In [6]:
page1_attributes = {
    'name': page1.title(), 
    # Note how several attributes/properties are methods, while this is an int
    #  - you can specify to pywikibot.Page whether you want to fetch the content
    'pageid': page1.pageid,
    'revision_count': page1.revision_count(),
    # pywikibot frequently returns objects which may not be serializable
    'namespace': str(page1.namespace()),
    # again, here we convert Category objects into a string
    'categories': ';'.join([category.title() for category in page1.categories()]),
    # Contributors are a dict of usernames and number of revisions
    'contributors': ';'.join(page1.contributors().keys()),
    }

# We will have a function to perform this shortly
page2_attributes = {
    'name': page2.title(), 
    'pageid': page2.pageid,
    'revision_count': page2.revision_count(),
    'namespace': str(page2.namespace()),
    'categories': ';'.join([category.title() for category in page2.categories()]),
    'contributors': ';'.join(page2.contributors().keys()),
    }

In [None]:
g = ig.Graph()

# Seems igraph doesn't provide much convenience,
# better keep track of vertex IDs (edges will be fine)
vertex_ids = ig.UniqueIdGenerator()

In [None]:
for attributes in page1_attributes, page2_attributes:
    # Gephi expect vertex/node names in the `Label` field
    attributes['Label'] = attributes['name']
    # igraph.UniqueIdGenerator will retrieve an ID if key exists, or register the next one
    attributes['id'] = vertex_ids[attributes['name']]
    # ... so make sure to add the vertice/vertex if you generate IDs:
    g.add_vertices(1, attributes)

In [None]:
for v in g.vs:
    print(v)

In [None]:
for attributes in [page1_attributes, page2_attributes]:
    assert attributes['name'] == g.vs[attributes['id']]['name']
    print(f'Page "{attributes["name"]}" got the vertex ID we expected')

g.add_edge(page1_attributes['id'], page2_attributes['id'])

In [None]:
gephi = igg.GephiConnection()
streamer = igg.GephiGraphStreamer()
streamer.post(g, gephi)

In [None]:
# import pickle

# with open('mrrobotgraph.pickle', 'wb') as handle:
#     pickle.dump(g, handle, protocol=pickle.HIGHEST_PROTOCOL)

# with open('mrrobot_idgen.pickle', 'wb') as handle:
#     pickle.dump(id_gen, handle, protocol=pickle.HIGHEST_PROTOCOL)

In [None]:
# with open('mrrobotgraph.pickle', 'rb') as handle:
#     g = pickle.load(handle)

# with open('mrrobot_idgen.pickle', 'rb') as handle:
#     id_gen = pickle.load(handle)

In [None]:
print(f'vertices: {len(g.vs)}, edges: {len(g.es)}')

print(g.vs[0])
print(g.vs[1])

# print(g)