Import Libs

In [1]:
# Makes the plots appear within the notebook
%matplotlib inline

# Two fundamental packages for doing data manipulation
import numpy as np                   # http://www.numpy.org/
import pandas as pd                  # http://pandas.pydata.org/

# Two related packages for plotting data
import matplotlib.pyplot as plt      # http://matplotlib.org/

# Package for requesting data via the web and parsing resulting JSON
import requests
import json
from bs4 import BeautifulSoup

# Packages for analyzing complex networks
import networkx as nx                # https://networkx.github.io/

pd.options.display.max_columns = 100
pd.options.display.max_rows = 110

In [18]:

page_title = "2006 Fijian coup d'état"

_S="https://en.wikipedia.org/w/api.php?action=query&format=json&prop=langlinks&titles={0}&llprop=autonym|langname&lllimit=500".format(page_title)
    
req = requests.get(_S)

json_string = json.loads(req.text)

json_string

{'batchcomplete': '',
 'query': {'pages': {'8177734': {'langlinks': [{'*': 'Golpe de Estado en Fiyi de 2006',
      'autonym': 'español',
      'lang': 'es',
      'langname': 'Spanish'},
     {'*': 'Fidžin vallankaappaus 2006',
      'autonym': 'suomi',
      'lang': 'fi',
      'langname': 'Finnish'},
     {'*': 'Kudeta Fiji 2006',
      'autonym': 'Bahasa Indonesia',
      'lang': 'id',
      'langname': 'Indonesian'},
     {'*': 'Zamach stanu na Fidżi (2006)',
      'autonym': 'polski',
      'lang': 'pl',
      'langname': 'Polish'},
     {'*': 'Golpe de Estado nas Fiji de 2006',
      'autonym': 'português',
      'lang': 'pt',
      'langname': 'Portuguese'},
     {'*': 'Государственный переворот в Фиджи (2006)',
      'autonym': 'русский',
      'lang': 'ru',
      'langname': 'Russian'},
     {'*': '2006年斐濟軍事政變',
      'autonym': '中文',
      'lang': 'zh',
      'langname': 'Chinese'}],
    'ns': 0,
    'pageid': 8177734,
    'title': "2006 Fijian coup d'état"}}}}

In [3]:

def link_getter(page_title):
    
    _S="https://en.wikipedia.org/w/api.php?action=query&format=json&prop=langlinks&titles={0}&llprop=autonym|langname&lllimit=500".format(page_title)
    
    req = requests.get(_S)

    json_string = json.loads(req.text)
    
    _pageID=list(json_string['query']['pages'].keys())[0]

    if 'langlinks' in json_string['query']['pages'][_pageID]:
        
        _langlink_list=json_string['query']['pages'][_pageID]['langlinks']

    
        _langlink_dict=dict()

        for d in _langlink_list:
            _lang=d['lang']
            _title=d['*']
            _langlink_dict[_lang]=_title
            
        _langlink_dict['en'] = page_title
    
        return _langlink_dict
    
    else:
        return {}

In [4]:

def name_getter(page_title):
    
    _S="https://en.wikipedia.org/w/api.php?action=query&format=json&prop=langlinks&titles={0}&llprop=autonym|langname&lllimit=500".format(page_title)
    
    req = requests.get(_S)

    json_string = json.loads(req.text)
    
    _pageID=list(json_string['query']['pages'].keys())[0]
    
    if 'langlinks' in json_string['query']['pages'][_pageID]:
    
    
        _langlink_list=json_string['query']['pages'][_pageID]['langlinks']
    
        _langname_dict=dict()

        for t in _langlink_list:
            _lang=t['lang']
            _langname=t['langname']
            _title=t['*']
            _langname_dict[_lang]=_langname
        
        _langname_dict['en'] = _title
    
        return _langname_dict
    else:
        return {}

In [19]:
_langAbrev_dict = {}

names_and_langs = name_getter(page_title)

for lang,langname in names_and_langs.items():
    _langAbrev_dict[lang] = langname

In [21]:
titles_and_lang_dict = {}

pages_and_langs = link_getter(page_title)

for lang,title in pages_and_langs.items():
    titles_and_lang_dict[lang] = title

In [23]:
def get_page_outlinks(page_title,lang='en',redirects=1):
    # Replace spaces with underscores
    page_title = page_title.replace(' ','_')
    
    bad_titles = ['Special:','Wikipedia:','Help:','Template:','Category:','International Standard','Portal:','s:','File:']
    
    # Get the response from the API for a query
    # After passing a page title, the API returns the HTML markup of the current article version within a JSON payload
    req = requests.get('https://{2}.wikipedia.org/w/api.php?action=parse&format=json&page={0}&redirects={1}&prop=text&disableeditsection=1&disabletoc=1'.format(page_title,redirects,lang))
    
    # Read the response into JSON to parse and extract the HTML
    json_string = json.loads(req.text)
    
    # Initialize an empty list to store the links
    outlinks_list = [] 
    
    if 'parse' in json_string.keys():
        page_html = json_string['parse']['text']['*']

        # Parse the HTML into Beautiful Soup
        soup = BeautifulSoup(page_html,'lxml')

        # Delete tags associated with templates
        for tag in soup.find_all('tr'):
            tag.replace_with('')

        # For each paragraph tag, extract the titles within the links
        for para in soup.find_all('p'):
            for link in para.find_all('a'):
                if link.has_attr('title'):
                    title = link['title']
                    # Ignore links that aren't interesting
                    if all(bad not in title for bad in bad_titles):
                        outlinks_list.append(title)

        # For each unordered list, extract the titles within the child links
        for unordered_list in soup.find_all('ul'):
            for item in unordered_list.find_all('li'):
                for link in item.find_all('a'):
                    if link.has_attr('title'):
                        title = link['title']
                        # Ignore links that aren't interesting
                        if all(bad not in title for bad in bad_titles):
                            outlinks_list.append(title)

    return outlinks_list

In [24]:
outlinks_per_lang = {}

language_titles = link_getter(page_title)

for lang,title in language_titles.items():
    #print("The language is",lang,"and the article title is", title)
    outlinks_per_lang[lang] = get_page_outlinks(title,lang)

In [25]:
def get_outlink_translations(outlinks_per_lang):
    translation_dict = dict()
    for lang,links in outlinks_per_lang.items():
        _page_titles = list(set(links))

        translation_dict[lang] = {}

        for _page_title in _page_titles:
            _S="https://{1}.wikipedia.org/w/api.php?action=query&format=json&prop=langlinks&titles={0}&redirects=1&lllimit=500&formatversion=2".format(_page_title,lang)
            response = requests.get(_S).json()
            if 'pages' in response['query'].keys():
                langlink_dict = response['query']['pages'][0]
                translation_dict[lang][_page_title] = {}
                if 'langlinks' in langlink_dict.keys():
                    for _ll in langlink_dict['langlinks']:
                        _ll_title = _ll['title']
                        _ll_lang = _ll['lang']
                        translation_dict[lang][_page_title][_ll_lang] = _ll_title
            else:
                translation_dict[lang][_page_title] = {}
                
    return translation_dict

In [26]:
translation_dict = get_outlink_translations(outlinks_per_lang)

In [27]:
len_transoutlink_dict_counterEn = {}

for lang,titles_dict in translation_dict.items():
    len_transoutlink_dict_counterEn[lang]=0         #starting at fa at a count of 0 
    for art_title,lang_dict in titles_dict.items(): #go into links on fa 
        if 'en' in lang_dict.keys():               #check for en version in links 
            len_transoutlink_dict_counterEn[lang] +=1   #if one is found add to counter 
            

In [28]:
len_transoutlink_dict_enList = {}

for lang,titles_dict in translation_dict.items():
    len_transoutlink_dict_enList[lang]=[]         #starting at fa at a count of 0 
    for art_title,lang_dict in titles_dict.items(): #go into links on fa 
        if 'en' in lang_dict.keys():  
            _title = lang_dict['en']        #check for en version in links 
            len_transoutlink_dict_enList[lang].append(_title)   

In [29]:
len_transoutlink_dict = {}

for langlinks_lang,titles in translation_dict.items():
    len_transoutlink_dict[langlinks_lang] = len(titles)

In [30]:
s_d2 = pd.Series(titles_and_lang_dict)

s_d1 = pd.Series(_langAbrev_dict)

s_d3 = pd.Series(len_transoutlink_dict)

s_d4 = pd.Series(len_transoutlink_dict_counterEn)

In [31]:
df_mix = pd.DataFrame({'lang' : s_d1,
                       'title' : s_d2, 
                       'total links' : s_d3,
                       'english links': s_d4})

df_mix = df_mix[['lang', 'title', 'total links', 'english links']]

In [32]:
df_mix['percent en linkbacks'] = df_mix['english links']/df_mix['total links']

df_mix

Unnamed: 0,lang,title,total links,english links,percent en linkbacks
en,2006年斐濟軍事政變,2006 Fijian coup d'état,137,0,0.0
es,Spanish,Golpe de Estado en Fiyi de 2006,35,28,0.8
fi,Finnish,Fidžin vallankaappaus 2006,10,10,1.0
id,Indonesian,Kudeta Fiji 2006,10,5,0.5
pl,Polish,Zamach stanu na Fidżi (2006),7,7,1.0
pt,Portuguese,Golpe de Estado nas Fiji de 2006,13,12,0.923077
ru,Russian,Государственный переворот в Фиджи (2006),8,8,1.0
zh,Chinese,2006年斐濟軍事政變,43,33,0.767442


In [33]:
def super_function (page_title):
    
    _langlink_dict = link_getter(page_title) #get list of titles per lang  i.e. 'en' : "2013 Egyptian coup d'état"
    
    _langname_dict = name_getter(page_title)#get Language name of each lang with abrev as the key ex: En = English
    
    _langAbrev_dict = {}    #create language name dictionay

    names_and_langs = name_getter(page_title)

    for lang,langname in names_and_langs.items():
        _langAbrev_dict[lang] = langname
        
    
    titles_and_lang_dict = {} # create page title dictionary 

    pages_and_langs = link_getter(page_title)

    for lang,title in pages_and_langs.items():
        titles_and_lang_dict[lang] = title
    
    outlinks_list = get_page_outlinks(page_title,lang='en',redirects=1) #get outlinks of each page for each language
    
    outlinks_per_lang = {} #create dictionary of outlinks for each language 

    language_titles = link_getter(page_title)

    for lang,title in language_titles.items():
    #print("The language is",lang,"and the article title is", title)
        outlinks_per_lang[lang] = get_page_outlinks(title,lang)
    
    translation_dict = get_outlink_translations(outlinks_per_lang) #get ranslations for all outlinks in all other languages 
    
    len_transoutlink_dict_counterEn = {} #make a dictionary for only outlinks that link back in english (number of links only)

    for lang,titles_dict in translation_dict.items():
        len_transoutlink_dict_counterEn[lang]=0         #starting at fa at a count of 0 
        for art_title,lang_dict in titles_dict.items(): #go into links on fa 
            if 'en' in lang_dict.keys():               #check for en version in links 
                len_transoutlink_dict_counterEn[lang] +=1
    
    len_transoutlink_dict = {} #number of outlinks for each ctranslated page 

    for langlinks_lang,titles in translation_dict.items():
        len_transoutlink_dict[langlinks_lang] = len(titles)

    s_d2 = pd.Series(titles_and_lang_dict)

    s_d1 = pd.Series(_langAbrev_dict)

    s_d3 = pd.Series(len_transoutlink_dict)

    s_d4 = pd.Series(len_transoutlink_dict_counterEn)   
    
    df_mix = pd.DataFrame({'lang' : s_d1,  #make a data frame with all dictionaries 
                       'title' : s_d2, 
                       'total links' : s_d3,
                       'english links': s_d4})

    df_mix = df_mix[['lang', 'title', 'total links', 'english links']] #rearrange coumns 
    
    df_mix['percent en linkbacks'] = df_mix['english links']/df_mix['total links']

  
    return df_mix

In [35]:
super_function ("2002 Venezuelan coup d'état attempt")

Unnamed: 0,lang,title,total links,english links,percent en linkbacks
ca,Catalan,Colp d'estat a Veneçuela de 2002,22,18,0.818182
en,Державний переворот у Венесуелі (2002),2002 Venezuelan coup d'état attempt,172,0,0.0
es,Spanish,Golpe de Estado en Venezuela de 2002,190,145,0.763158
fr,French,Coup d'État de 2002 au Venezuela,74,62,0.837838
hy,Armenian,Պետական հեղաշրջման փորձ Վենեսուելայում (2002),12,9,0.75
nl,Dutch,Mislukte staatsgreep in Venezuela in 2002,15,10,0.666667
no,Norwegian,Kuppforsøket i Venezuela i 2002,4,4,1.0
pl,Polish,Zamach stanu w Wenezueli (2002),21,16,0.761905
pt,Portuguese,Golpe de Estado na Venezuela de 2002,33,30,0.909091
ru,Russian,Попытка государственного переворота в Венесуэл...,50,49,0.98


In [34]:
page_title_list = ["2002 Venezuelan coup d'état attempt", "Oakwood mutiny", "2004 Chadian coup d'état attempt","2004 Equatorial Guinea coup d'état attempt" ,"2004 Haitian coup d'état", "2005 Mauritanian coup d'état", "2006 Chadian coup d'état attempt", "2006 Fijian coup d'état", "2006 Malagasy coup d'état attempt", "2006 Thai coup d'état", "2007 Laotian coup d'état conspiracy allegation", "Manila Peninsula siege", "2008 Guinean coup d'état", "2008 Mauritanian coup d'état", "2012 Malian coup d'état", "2013 Egyptian coup d'état", "2013 Libyan coup d'état attempt", "2014 Gambian coup d'état attempt", "2014 Libyan coup d'état attempts", "2014 Thai coup d'état", "Houthi takeover in Yemen", "2015 Burkinabé coup d'état", "2015 Burundian coup d'état attempt", "2016 Libyan coup d'état attempt", "2016 Turkish coup d'état attempt", "Black September", "1971 Moroccan coup attempt", "1971 Sudanese coup d'état", "1971 Ugandan coup d'état", "1972 Moroccan coup attempt", "1972 Republic of the Congo coup d'état attempt", "1973 Chilean coup d'état", "1973 Laotian coup", "1973 Rwandan coup d'état", "1974 Nigerien coup d'état", "1975 Chadian coup d'état", "Assassination of Sheikh Mujibur Rahman", "1976 Argentine coup d'état", "1976 Burundian coup d'état", "1977 Benin coup d'état attempt", "Saur Revolution", "Operation Storm-333", "1979 Equatorial Guinea coup d'état", "Coup d'état of December Twelfth", "1977 Sudan Juba coup d'état attempt", "Coup d'état of May Seventeenth", "Nojeh coup plot", "1980 Liberian coup d'état", "1980 Upper Voltan coup d'état", "1980 Surinamese coup d'état", "1980 Turkish coup d'état", "1981 Central African Republic coup d'état", "1982 Kenyan coup d'état attempt", "1983 Upper Voltan coup d'état attempt", "1984 Cameroonian coup attempt", "1987 Burundian coup d'état", "1987 Fijian coups d'état", "1987 Republic of the Congo coup d'état attempt", "1987 Tunisian coup d'état", "June 1988 Haitian coup d'état", "September 1988 Haitian coup d'état", "1989 Burkinabé coup d'état attempt", "1989 Panamanian coup d'état attempt", "1989 Philippine coup attempt", "1989 Sudanese coup d'état", "Jamaat al Muslimeen coup attempt", "1991 Haitian coup d'état", "1991 Lesotho coup d'état", "1991 Malian coup d'état", "1992 Peruvian constitutional crisis", "1992 Sierra Leonean coup d'état", "1992 Venezuelan coup d'état attempts", "Black May (1992)", "1993 Azeri coup d'état", "1993 Guatemalan constitutional crisis", "1994 Gambian coup d'état", "1995 Azerbaijani coup d'état attempt", "1996 Burundian coup d'état", "1996 Nigerien coup d'état", "1997 clashes in Cambodia", "1997 Turkish military memorandum", "1997 Zambian coup d'état attempt", "1999 Ivorian coup d'état", "1999 Nigerien coup d'état", "1999 Pakistani coup d'état", "2000 Ecuadorian coup d'état", "2000 Fijian coup d'état", "2001 Central African Republic coup d'état attempt", "2001 Burundian coup d'état attempt"]

In [35]:
df_dict = {} #create dictionary with page titles 


for page_title in page_title_list: #run program and ignore errors 
    
    try:
        df = super_function (page_title)

        df.to_csv(page_title + '.csv') #create CSV files 

        df_dict[page_title] = df
    
    except KeyboardInterrupt:
        raise
        
    except:
        print("{0} had an error!".format(page_title))
        pass
    

2016 Libyan coup d'état attempt had an error!
1972 Republic of the Congo coup d'état attempt had an error!
1973 Laotian coup had an error!
1976 Burundian coup d'état had an error!
1977 Benin coup d'état attempt had an error!
1977 Sudan Juba coup d'état attempt had an error!
1981 Central African Republic coup d'état had an error!
1984 Cameroonian coup attempt had an error!
1987 Republic of the Congo coup d'état attempt had an error!
1991 Lesotho coup d'état had an error!
1997 Zambian coup d'état attempt had an error!
2000 Fijian coup d'état had an error!
2001 Burundian coup d'état attempt had an error!
