# full project using map to extrant inbound and outbound links from wikipedia

In [26]:
import json
from urllib import request, parse
import os
from multiprocessing import Pool

In [3]:
def link_to_title(link):
    """
    Creates a helper function for getting the title from a link result
    """
    
    return link["title"]

In [11]:
def clean_if_key(page, key):
    """
    Creates a helper function that gets titles for the links found, if they exist
    """
    
    if key in page.keys():
        return map(link_to_title, page[key])
    else: return []

In [29]:
def get_wiki_links(pageTitle):
    """
    Quotes the title to ensure it’s URL-safe
    """
    
    safe_title = parse.quote(pageTitle)
    url = "https://en.wikipedia.org/w/api.php?action=query&prop=links|linkshere&pllimit=5&lhlimit=5&titles={}&format=json&formatversion=2".format(safe_title)
    page = request.urlopen(url).read() # Sends an HTTP request to the URL and reads the response
    j = json.loads(page) # Parses the response as JSON    
    jpage = j["query"]["pages"][0]
    inbound = clean_if_key(jpage, "links") # Cleans the inbound and outbound links if they exist
    outbound = clean_if_key(jpage, "linkshere")
    return {"title": pageTitle, 
            "in-links": list(inbound),
            "out-links": list(outbound)} # Returns the page’s title and its inbound and outbound links    

In [7]:
def flatten_network(page):
    return page["in-links"] + page["out-links"]

In [23]:
def pages_to_edges(page):
    """
    represent the graph edges as tuples between the in-links and out-links
    """
    a = [(page["title"], p) for p in page["out-links"]]
    b = [(p, page["title"]) for p in page["in-links"]]
    return a + b

In [30]:
# main function
if __name__ == "__main__":
    print("number of CPUs available: {}\n".format(os.cpu_count()))
    
    root = get_wiki_links("Parallel_computing")
    initial_network = flatten_network(root)
    
    with Pool() as P:
        all_pages = P.map(get_wiki_links, initial_network)
        edges = P.map(pages_to_edges, all_pages)
    
    print("\n----- RESULT 1 ----")
    print(all_pages)
    print("\n----- RESULT 2 ----")
    print(edges)
    

number of CPUs available: 2


----- RESULT 1 ----
[{'title': '16-bit', 'in-links': ['16-bit computing', 'Wikipedia:Protection policy', 'Wikipedia:Redirect', 'Category:Redirects from moves'], 'out-links': ['ASCII', 'Amiga', 'Apple II series', 'Atari ST', 'Advanced Micro Devices']}, {'title': '4-bit', 'in-links': ['4-bit computing', 'Wikipedia:Protection policy', 'Wikipedia:Redirect', 'Category:Redirects from moves'], 'out-links': ['Advanced Micro Devices', 'Intel', 'Intel 80286', 'Intel 8080', 'Intel 8086']}, {'title': '64-bit', 'in-links': ['64-bit computing', 'Category:Redirects from moves'], 'out-links': ['ASCII', 'Accelerated Graphics Port', 'Advanced Micro Devices', 'Athlon', 'Buffer overflow']}, {'title': '8-bit', 'in-links': ['8-bit computing', 'Wikipedia:Protection policy', 'Wikipedia:Redirect', 'Category:Redirects from moves'], 'out-links': ['ASCII', 'Amiga', 'Acorn Electron', 'Apple III', 'Atari ST']}, {'title': 'AMD', 'in-links': ['Advanced Micro Devices', 'Stock exchange', '

In [32]:
len(edges)

10

In [33]:
edges[0]

[('16-bit', 'ASCII'),
 ('16-bit', 'Amiga'),
 ('16-bit', 'Apple II series'),
 ('16-bit', 'Atari ST'),
 ('16-bit', 'Advanced Micro Devices'),
 ('16-bit computing', '16-bit'),
 ('Wikipedia:Protection policy', '16-bit'),
 ('Wikipedia:Redirect', '16-bit'),
 ('Category:Redirects from moves', '16-bit')]

In [41]:
[print("edge {}:\n {}\n".format(i, edges[i])) for  i in range(len(edges))]

edge 0:
 [('16-bit', 'ASCII'), ('16-bit', 'Amiga'), ('16-bit', 'Apple II series'), ('16-bit', 'Atari ST'), ('16-bit', 'Advanced Micro Devices'), ('16-bit computing', '16-bit'), ('Wikipedia:Protection policy', '16-bit'), ('Wikipedia:Redirect', '16-bit'), ('Category:Redirects from moves', '16-bit')]

edge 1:
 [('4-bit', 'Advanced Micro Devices'), ('4-bit', 'Intel'), ('4-bit', 'Intel 80286'), ('4-bit', 'Intel 8080'), ('4-bit', 'Intel 8086'), ('4-bit computing', '4-bit'), ('Wikipedia:Protection policy', '4-bit'), ('Wikipedia:Redirect', '4-bit'), ('Category:Redirects from moves', '4-bit')]

edge 2:
 [('64-bit', 'ASCII'), ('64-bit', 'Accelerated Graphics Port'), ('64-bit', 'Advanced Micro Devices'), ('64-bit', 'Athlon'), ('64-bit', 'Buffer overflow'), ('64-bit computing', '64-bit'), ('Category:Redirects from moves', '64-bit')]

edge 3:
 [('8-bit', 'ASCII'), ('8-bit', 'Amiga'), ('8-bit', 'Acorn Electron'), ('8-bit', 'Apple III'), ('8-bit', 'Atari ST'), ('8-bit computing', '8-bit'), ('Wikipedi

[None, None, None, None, None, None, None, None, None, None]