# Assignment 2

### Import

In [63]:
import urllib2
import json
import re
import networkx as nx
import numpy as np
import matplotlib.pyplot as plt
import collections
import numpy as np
from collections import Counter
from __future__ import division
import io

# Part I: Comunity Structure

** Explain the concept of modularity in your own words. ** 

**Modularity** is a measurement that allows us to quantify the goodness of a partition of a network into communities, where partition is a division of a network into an arbitrary number of groups such that each node belongs to one and only one group. More specifically, Modularity is a concept that measures systematic deviations from a random configuration of a network. This helps us indentifying groups that are embedded in a network, and finding nodes that interact more frequantly with each other than in a random network. Therefore modularity is simply a measurment of the systematic deviations from a random configuration. :Networks with high modularity have dense connections between the nodes within modules but sparse connections between nodes in different modules

The modularity of a network can be calculated with the following equation

Equation 9.12

$$ M_c = \sum_{c=1}^{n_c}[ \dfrac{L_c}{L} - (\dfrac{k_c}{2L}) ] $$

where:

    * L : total number of links in the network
    * lc = number of links in each community 
    * kc = total degree of the nodes



In [64]:
philosophers = {
    "aestheticians":{ "title":"title=List_of_aestheticians", "names":""},
    "epistemologists":{ "title":"title=List_of_epistemologists", "names":""},
    "ethicists":{ "title":"title=List_of_ethicists", "names":"" },
    "logicians":{ "title":"title=List_of_logicians", "names":"" },
    "metaphysicians":{ "title":"title=List_of_metaphysicians", "names":"" },
    "social and political philosophers":{ "title":"title=Index_of_sociopolitical_thinkers", "names":""}
}

In [65]:
# set the parameters (explained in detail here https://www.mediawiki.org/wiki/API:Tutorial)
baseurl = "https://en.wikipedia.org/w/index.php?"
title = ""
action = "action=edit"

for i in philosophers:
    # construct the query
    query = "{}{}&{}".format(
        baseurl,
        philosophers[i]["title"],
        action
    )
    
    # use urllib and regex to get the list of philosophers
    wikiresponse = urllib2.urlopen(query)
    wikisource = wikiresponse.read()
    wikisource.decode('utf-8','ignore')
    philosophers[i]["names"] = re.findall(r'\*.*?\[\[(.*?)[\]\|]', wikisource)

# Because the last 4 elements of the ethicists list and social and political philosophers are not philosophers
# they are left out
philosophers["ethicists"]["names"] = philosophers["ethicists"]["names"][:-4]
philosophers["social and political philosophers"]["names"] = philosophers["social and political philosophers"]["names"][:-4] 

In [66]:
G = nx.DiGraph()

# First of all get all the philosophers
all_philophers = []
for i in philosophers:
    all_philophers = all_philophers + philosophers[i]["names"]

all_philosophers_uniq = set(all_philophers)
    
counter = 0
# Iterate over all the philosophers, We use set to erase duplicates
for philo in all_philosophers_uniq:
    
    # Add the philosopher to the graph
    G.add_node(philo)
    
    # Create the uri for the file in the os file system
    fileUri = "/Users/GretarAtli/Dropbox/Dtu/Social_graphs_and_interactions/Philosophers/{}.txt".format(philo.replace(" ","_"))

    # Open the file and get the content 
    file = open(fileUri) 
    theArticle = file.read()

    # use regex and the set.intersection function to find all philosophers that are linked from the 
    # wikipage of each philosopher
    linkedPhilosophers = set(re.findall(r'.*?\[\[(.*?)[\]\|]', theArticle)).intersection(all_philosophers_uniq)

    # Go through all the linked philosophers and add directed edge for each one
    for linkedPhilosopher in linkedPhilosophers:
        G.add_edge(philo,linkedPhilosopher)

In [55]:
philosophers

{'aestheticians': {'names': ['Virgil Aldrich',
   'Anandavardhana',
   'Sri Aurobindo',
   'John Anderson (philosopher)',
   'Yves Marie Andr\xc3\xa9',
   'Thomas Aquinas',
   'Aristotle',
   'Rudolf Arnheim',
   'Mazen Asfour',
   'Georg Anton Friedrich Ast',
   'Augustine of Hippo',
   'Jody Azzouni',
   'Abhinavagupta',
   'Victor Basch',
   'Yusuf Balasagun',
   'Roland Barthes',
   'Georges Bataille',
   'Alexander Gottlieb Baumgarten',
   'Monroe Beardsley',
   'Vissarion Belinsky',
   'Clive Bell',
   'Walter Benjamin',
   'Arnold Berleant',
   'George Birkhoff',
   'Max Black',
   'Maurice Blanchot',
   'Harold Bloom',
   'Georg Brandes',
   'Jean Anthelme Brillat-Savarin',
   'Ferruccio Busoni',
   'John Cage',
   'Cesare Brandi',
   'Stanley Cavell',
   'R. G. Collingwood',
   'Victor Cousin',
   'Benedetto Croce',
   'Gregory Currie',
   'Arthur Danto',
   'William C. Dowling',
   'John Dewey',
   'Jacques Derrida',
   'Umberto Eco',
   'Jonathan Edwards (theologian)',
   'R

In [67]:
# Create a undirected graph for the network
ud_G =  G.to_undirected()

# Find all philosophers that appear in more than one list
philo_counted = collections.Counter(all_philophers)
overlapping_philo =[philo for philo,value in philo_counted.items() if value > 1]

# create the communities for the philosophers
philosophers_communities = {
    "aestheticians" : [],
    "epistemologists" : [],
    "ethicists" : [],
    "logicians" : [],
    "metaphysicians" : [],
    "social and political philosophers" : []
}

# Now we create a list of tuple pairs (name,community) that contain all philosophers that only appear in one list
philo_community_list = []
for community in philosophers.items():
    for name in community[1]["names"]:
        # add the philosopher if he is not overlapping 
        if name not in overlapping_philo:
            philo_community_list.append((name,community[0]))

# know we go through the list of overlapping philosophers and add them to the list with a correct community
for philo in overlapping_philo:
    neighbors = [neighbor for neighbor in philo_community_list if neighbor[0] in ud_G.neighbors(philo)]
    community = ""
    if neighbors: # check if neighbors is not an empty list
        # find the most common community
        community = max(Counter(neighbor_tuple[1] for neighbor_tuple in neighbors))
    else:
        # in case of no neighbor we choose the community by random
        community = np.random.choice(["aestheticians","epistemologists","ethicists","logicians","metaphysicians","social and political philosophers"])
    # add the phylosopher to the philo_community_list
    philo_community_list.append((philo,community))
    
# Now the fun starts, finally we can calculate the modularity of these communities 
# We need the following informations:
# L : total number of links in the network
# lc = number of links in each community (6 numbers in total)
# kc = total degree of the nodes

# We start by adding the philosophers into their communities in a dictionary

for philo in philo_community_list:
    philosophers_communities[philo[1]].append(philo[0])

# Then we loop throught the communities and calculate the modularity (Mc) for each community
modularity = []
L = len(ud_G.edges()) # L : total number of links in the network
for community in philosophers_communities.values():
    Lc = sum([len(ud_G.edges(philo)) for philo in community]) # total number of edges in the community
    kc = sum([ud_G.degree(philo) for philo in community]) # total degree of the community
    M = (Lc/L)-((kc/(2*L))**2) # calculate the modularity for the community with equation 9.12
    modularity.append(M)

"The modularity is {}".format(sum(modularity))

NetworkXError: The node ('Christian Wolff (philosopher)', 2) is not in the graph.

In [58]:
community

('ethicists',
 {'names': ['Pierre Ab\xc3\xa9lard',
   'B. R. Ambedkar',
   'John Stevens Cabot Abbott',
   'Mortimer Adler',
   'Nayef Al-Rodhan',
   'Thomas Aquinas',
   'Nomy Arpaly',
   'Ambrose',
   'Andronicus of Rhodes',
   'Julia Annas',
   'G. E. M. Anscombe',
   'Karl-Otto Apel',
   'Jacob M. Appel',
   'Aristotle',
   'Aristoxenus',
   'John Arthur (philosopher)',
   'Ashoka',
   'Augustine of Hippo',
   'Avicenna',
   'Joxe Azurmendi',
   "Bah\xc3\xa1'u'll\xc3\xa1h",
   'Franz Xaver von Baader',
   'Francis Bacon',
   'Alain Badiou',
   'Samuel Bailey',
   'Tom Beauchamp',
   'Friedrich Eduard Beneke',
   'Jeremy Bentham',
   'Thomas Berry',
   'Maurice Blanchot',
   'Dietrich Bonhoeffer',
   'Murray Bookchin',
   'George Boole',
   'Nick Bostrom',
   'Daniel Brock',
   'Martin Buber',
   'Gautama Buddha',
   'Mario Bunge',
   'Judith Butler',
   'Albert Camus',
   'Arthur Caplan',
   'Roger Chao',
   'James Childress',
   'Randy Cohen',
   'Confucius',
   'John M. Cooper (p