In [1]:
# Makes the plots appear within the notebook
%matplotlib inline

# Two fundamental packages for doing data manipulation
import numpy as np                   # http://www.numpy.org/
import pandas as pd                  # http://pandas.pydata.org/

# Package for requesting data via the web and parsing resulting JSON
import requests
import json
from bs4 import BeautifulSoup

# Packages for analyzing complex networks
import networkx as nx                # https://networkx.github.io/

In [2]:
page_title = "2004 Chadian coup d'état attempt"

In [3]:
def image_getter_en(page_title):
    
    _S="https://en.wikipedia.org/w/api.php?action=query&format=json&titles={0}&prop=images&imlimit=100".format(page_title)
    
    json_string = requests.get(_S).json()
    
    _pageID=list(json_string['query']['pages'].keys())[0]
    
    if 'images' in json_string['query']['pages'][_pageID]:
        
    
        _image_list = json_string['query']['pages'][_pageID]['images']
    
        extension_check = ['.png', '.jpg']
    
        _final_list_en = []
    
        for image in _image_list:
            if any(ext in image["title"] for ext in extension_check): 
                _final_list_en.append(image["title"])  
   
        return _final_list_en

    else:
        print('No images found in the page')
        return []

In [4]:


_S="https://en.wikipedia.org/w/api.php?action=query&format=json&titles={0}&prop=images&imlimit=100".format(page_title)
    
    
json_string = requests.get(_S).json()


json_string


{'batchcomplete': '',
 'query': {'pages': {'5949224': {'ns': 0,
    'pageid': 5949224,
    'title': "2004 Chadian coup d'état attempt"}}}}

In [5]:
page_image_list = image_getter_en(page_title)
page_image_list

No images found in the page


[]

In [6]:
def image_page_getter_en(page_image_list):
   
    _filelink_dict=dict()   
   
    for image in page_image_list:
        
        _S="https://en.wikipedia.org/w/api.php?action=query&format=json&titles={0}&prop=fileusage&funamespace=0|6|-2&fulimit=500".format(image)
    
        json_string = requests.get(_S).json()    
                
        if  'fileusage' in json_string['query']['pages']['-1']:
            
            _image_list = json_string['query']['pages']['-1']['fileusage']
        
            clean_image_list= []
        
            for linked_image in _image_list:
                title = linked_image['title']
                clean_image_list.append(title)
    
            _filelink_dict[image] = clean_image_list
            
        else : _filelink_dict[image] = []

    return _filelink_dict

In [7]:
def global_page_getter(page_image_list):
    
    _filelink_dict=dict()   
   
    for image in page_image_list:
        
        _S="https://en.wikipedia.org/w/api.php?action=query&format=json&titles={0}&prop=globalusage&guprop=url|namespace&gulimit=500".format(image)
   
        json_string = requests.get(_S).json()    
        
        if  '-1' in json_string['query']['pages']:
            
            _image_list = json_string['query']['pages']['-1']['globalusage']
            
            # Empty dictionary to be keyed by language with a list of article titles as values
            clean_image_dict = {}
        
            for linked_image in _image_list:
                
                 if linked_image['ns'] == '0':     
                    title = linked_image['title']
                    lang = linked_image['wiki'].split('.')[0]
                    
                    if lang in clean_image_dict:
                        
                        clean_image_dict[lang].append(title)
                        
                    else:
                        
                        clean_image_dict[lang] = [title]
                    
    
            _filelink_dict[image] = clean_image_dict
            
        else: 
            _filelink_dict[image] = {} 
    
    return _filelink_dict
    

In [41]:


_filelink_dict=dict()  
for image in page_image_list:
                
    _S="https://en.wikipedia.org/w/api.php?action=query&format=json&titles={0}&prop=globalusage&guprop=url|namespace&gulimit=500".format(image)
   
    json_string = requests.get(_S).json()    
    _filelink_dict[image] = json_string 
    
    
    

In [42]:
_filelink_dict

{}

In [43]:
image_link_dict = global_page_getter(page_image_list)
image_link_dict

{}

In [44]:
def graph_networks(image_link_dict):
    
    
    g = nx.DiGraph()

    for filename, link_d in image_link_dict.items():
        for lang, page_list in link_d.items():
            g.add_edge(lang,filename,weight=len(page_list))
        
    print("There are {0} edges and {1} nodes in the page-image network.".format(g.number_of_edges(), g.number_of_nodes()))

    nx.write_gexf(g, '{0}.gexf'.format(page_title))
    
    return g

In [45]:
def super_image_function(page_title):
    
    page_image_list = image_getter_en(page_title)
    
    _filelink_dict = global_page_getter(page_image_list)
    
   
    
    return _filelink_dict    
    

In [47]:
final_image_dict = {} #create dictionary with page titles 

graph_dictionary = {}

for page_title in page_title_list: #run program and ignore errors 
    
    try:
        
        
        fid = super_image_function(page_title)
        final_image_dict[page_title] = fid

        g = graph_networks(fid)
        
        graph_dictionary[page_title] = g
        

    
    except KeyboardInterrupt:
        raise
        
    except:
        print("{0} had an error!".format(page_title))
        pass
    

There are 82 edges and 74 nodes in the page-image network.
There are 39 edges and 37 nodes in the page-image network.
There are 0 edges and 0 nodes in the page-image network.
There are 0 edges and 0 nodes in the page-image network.
There are 11 edges and 12 nodes in the page-image network.
There are 0 edges and 0 nodes in the page-image network.
There are 0 edges and 0 nodes in the page-image network.
There are 0 edges and 0 nodes in the page-image network.
There are 0 edges and 0 nodes in the page-image network.
There are 53 edges and 40 nodes in the page-image network.
There are 0 edges and 0 nodes in the page-image network.
There are 1 edges and 2 nodes in the page-image network.
There are 101 edges and 74 nodes in the page-image network.
There are 38 edges and 36 nodes in the page-image network.
There are 30 edges and 29 nodes in the page-image network.
There are 58 edges and 37 nodes in the page-image network.
There are 0 edges and 0 nodes in the page-image network.
There are 0 ed

# Make a network