In [3]:
import csv

In [4]:
from operator import itemgetter

In [5]:
import networkx as nx

In [6]:
from networkx.algorithms import community #This part of networkx, for community detection, needs to be imported separately.

In [7]:
with open('quakers_nodelist.csv', 'r') as nodecsv: #Open the file
    nodereader = csv.reader(nodecsv) # read the csv
    #Retrieve the data
    nodes = [n for n in nodereader][1:]
    
node_names = [n[0] for n in nodes] # get a list of only node names

with open('quakers_edgelist.csv', 'r') as edgecsv: # open the file
    edgereader = csv.reader(edgecsv) # read the csv
    edges =[tuple(e) for e in edgereader][1:] # retrieve the data

In [8]:
print(len(node_names))

119


In [9]:
print(len(edges))

174


In [10]:
G = nx.Graph()

In [11]:
G.add_nodes_from(node_names)
G.add_edges_from(edges)

In [12]:
print(G)

Graph with 119 nodes and 174 edges


In [13]:
hist_sig_dict = {}
gender_dict = {}
birth_dict = {}
death_dict = {}
id_dict = {}

In [14]:
for node in nodes:
    hist_sig_dict[node[0]] = node[1]
    gender_dict[node[0]] = node[2]
    birth_dict[node[0]] = node[3]
    death_dict[node[0]] = node[4]
    id_dict[node[0]] = node[5]

In [15]:
nx.set_node_attributes(G, hist_sig_dict, 'historical_significance')
nx.set_node_attributes(G, gender_dict, 'gender')
nx.set_node_attributes(G, birth_dict, 'birth_year')
nx.set_node_attributes(G, death_dict, 'death_year')
nx.set_node_attributes(G, id_dict, 'sdfb_id')

In [16]:
for n in G.nodes():
    print(n, G.nodes[n]['birth_year'])

Joseph Wyeth 1663
Alexander Skene of Newtyle 1621
James Logan 1674
Dorcas Erbery 1656
Lilias Skene 1626
William Mucklow 1630
Thomas Salthouse 1630
William Dewsbury 1621
John Audland 1630
Richard Claridge 1649
William Bradford 1663
Fettiplace Bellers 1687
John Bellers 1654
Isabel Yeamans 1637
George Fox the younger 1551
George Fox 1624
John Stubbs 1618
Anne Camm 1627
John Camm 1605
Thomas Camm 1640
Katharine Evans 1618
Lydia Lancaster 1683
Samuel Clarridge 1631
Thomas Lower 1633
Gervase Benson 1569
Stephen Crisp 1628
James Claypoole 1634
Thomas Holme 1626
John Freame 1665
John Swinton 1620
William Mead 1627
Henry Pickworth 1673
John Crook 1616
Gilbert Latey 1626
Ellis Hookes 1635
Joseph Besse 1683
James Nayler 1618
Elizabeth Hooten 1562
George Whitehead 1637
John Whitehead 1630
William Crouch 1628
Benjamin Furly 1636
Silvanus Bevan 1691
Robert Rich 1607
John Whiting 1656
Christopher Taylor 1614
Thomas Lawson 1630
Richard Farnworth 1630
William Coddington 1601
Thomas Taylor 1617
Richard 

In [17]:
density = nx.density(G)
print('Network density:', density)

Network density: 0.02478279447372169


In [18]:
fell_whitehead_path = nx.shortest_path(G, source="Margaret Fell", target='George Whitehead')
print("Shortest path between Fell and Whitehead:", fell_whitehead_path)

Shortest path between Fell and Whitehead: ['Margaret Fell', 'George Fox', 'George Whitehead']


In [19]:
print('Length of that path:', len(fell_whitehead_path)-1)

Length of that path: 2


In [20]:
# If your Graph has more than one component, this will return False:
print(nx.is_connected(G))

False


In [21]:
# Next, use nx.connected_components to get the list of components,
# then use the max() command to find the largest one:
components = nx.connected_components(G)
largest_component = max(components, key=len)

In [22]:
# Create a "subgraph" of just the largest component
# Then calculate the diameter of the subgraph, just like you did with density.
#

subgraph = G.subgraph(largest_component)
diameter = nx.diameter(subgraph)
print("Network diameter of largest component:", diameter)

Network diameter of largest component: 8


In [23]:
triadic_closure = nx.transitivity(G)
print('Traidic closure:', triadic_closure)

Traidic closure: 0.16937799043062202


In [24]:
degree_dict = dict(G.degree(G.nodes()))
nx.set_node_attributes(G, degree_dict, 'degree')

In [25]:
print(G.nodes['William Penn'])

{'historical_significance': 'Quaker leader and founder of Pennsylvania', 'gender': 'male', 'birth_year': '1644', 'death_year': '1718', 'sdfb_id': '10009531', 'degree': 18}


In [26]:
sorted_degree = sorted(degree_dict.items(), key=itemgetter(1), reverse=True)

In [27]:
print("Top 20 nodes by degree:")
for d in sorted_degree[:20]:
    print(d)    

Top 20 nodes by degree:
('George Fox', 22)
('William Penn', 18)
('James Nayler', 16)
('George Whitehead', 13)
('Margaret Fell', 13)
('Benjamin Furly', 10)
('Edward Burrough', 9)
('George Keith', 8)
('Thomas Ellwood', 8)
('Francis Howgill', 7)
('John Perrot', 7)
('John Audland', 6)
('Richard Farnworth', 6)
('Alexander Parker', 6)
('John Story', 6)
('John Stubbs', 5)
('Thomas Curtis', 5)
('John Wilkinson', 5)
('William Caton', 5)
('Anthony Pearson', 5)


In [31]:
import numpy

In [35]:
# unable to run betweenness centrality due to weird error
eigenvector_dict = nx.eigenvector_centrality(G)

In [37]:
nx.set_node_attributes(G, eigenvector_dict, 'eigenvector')

In [38]:
sorted_eigenvector = sorted(eigenvector_dict.items(), key=itemgetter(1), reverse=True)

print("Top 20 nodes by eigenvector centrality:")
for b in sorted_eigenvector[:20]:
    print(b)

Top 20 nodes by eigenvector centrality:
('George Fox', 0.4491750710859924)
('James Nayler', 0.3352974100447867)
('William Penn', 0.27032201153998675)
('Margaret Fell', 0.253170949905681)
('George Whitehead', 0.2497455334914196)
('Edward Burrough', 0.23147427604862303)
('Francis Howgill', 0.19095393782681047)
('Benjamin Furly', 0.1878520634691651)
('John Perrot', 0.18496928077956107)
('George Keith', 0.1838469086791535)
('Thomas Ellwood', 0.17608142535843857)
('Richard Farnworth', 0.15368535029296415)
('John Crook', 0.1327158126880779)
('Rebecca Travers', 0.1184804064465093)
('Alexander Parker', 0.11587808682088323)
('Anthony Pearson', 0.11120476725256785)
('William Dewsbury', 0.11057869321157118)
('John Stubbs', 0.10693500692141822)
('John Audland', 0.09830889719333748)
('Thomas Salthouse', 0.0954862854413877)


In [39]:
#First get the top 20 nodes by eigenvector as a list
top_eigenvector = sorted_eigenvector[:20]

#Then find and print their degree
for tb in top_eigenvector: # Loop through top_betweenness
    degree = degree_dict[tb[0]] # Use degree_dict to access a node's degree, see footnote 2
    print("Name:", tb[0], "| Eigenvector Centrality:", tb[1], "| Degree:", degree)

Name: George Fox | Eigenvector Centrality: 0.4491750710859924 | Degree: 22
Name: James Nayler | Eigenvector Centrality: 0.3352974100447867 | Degree: 16
Name: William Penn | Eigenvector Centrality: 0.27032201153998675 | Degree: 18
Name: Margaret Fell | Eigenvector Centrality: 0.253170949905681 | Degree: 13
Name: George Whitehead | Eigenvector Centrality: 0.2497455334914196 | Degree: 13
Name: Edward Burrough | Eigenvector Centrality: 0.23147427604862303 | Degree: 9
Name: Francis Howgill | Eigenvector Centrality: 0.19095393782681047 | Degree: 7
Name: Benjamin Furly | Eigenvector Centrality: 0.1878520634691651 | Degree: 10
Name: John Perrot | Eigenvector Centrality: 0.18496928077956107 | Degree: 7
Name: George Keith | Eigenvector Centrality: 0.1838469086791535 | Degree: 8
Name: Thomas Ellwood | Eigenvector Centrality: 0.17608142535843857 | Degree: 8
Name: Richard Farnworth | Eigenvector Centrality: 0.15368535029296415 | Degree: 6
Name: John Crook | Eigenvector Centrality: 0.132715812688077

In [40]:
communities = community.greedy_modularity_communities(G)

In [41]:
modularity_dict = {}
for i, c in enumerate(communities):
    for name in c:
        modularity_dict[name] = i

In [43]:
nx.set_node_attributes(G, modularity_dict, 'modularity')

In [44]:
# First get a list of just the nodes in that class
class0 = [n for n in G.nodes() if G.nodes[n]['modularity'] == 0]

# Then create a dictionary of the eigenvector centralities of those nodes
class0_eigenvector = {n:G.nodes[n]['eigenvector'] for n in class0}

# Then sort that dictionary and print the first 5 results
class0_sorted_by_eigenvector = sorted(class0_eigenvector.items(), key=itemgetter(1), reverse=True)

print("Modularity Class 0 Sorted by Eigenvector Centrality:")
for node in class0_sorted_by_eigenvector[:5]:
    print("Name:", node[0], "| Eigenvector Centrality:", node[1])

Modularity Class 0 Sorted by Eigenvector Centrality:
Name: James Nayler | Eigenvector Centrality: 0.3352974100447867
Name: Margaret Fell | Eigenvector Centrality: 0.253170949905681
Name: Francis Howgill | Eigenvector Centrality: 0.19095393782681047
Name: Richard Farnworth | Eigenvector Centrality: 0.15368535029296415
Name: Anthony Pearson | Eigenvector Centrality: 0.11120476725256785


In [45]:
for i, c in enumerate(communities): # Loop through the list of communities
    if len(c) > 2: # Filter out modularity classes with 2 or fewer nodes
        print('Class '+str(i)+':', list(c)) # Print out the classes and their members

Class 0: ['Thomas Lower', 'Thomas Aldam', 'Francis Howgill', 'George Fox the younger', 'Margaret Fell', 'William Tomlinson', 'Dorcas Erbery', 'Anthony Pearson', 'Elizabeth Leavens', 'Gervase Benson', 'Robert Rich', 'James Nayler', 'William Gibson', 'Thomas Holme', 'Richard Farnworth', 'Martha Simmonds', 'Hannah Stranger']
Class 1: ['Isaac Norris', 'George Keith', 'Richard Claridge', 'James Logan', 'Joseph Besse', 'William Penn', 'Jane Sowle', 'Edward Haistwell', 'David Lloyd', 'Tace Sowle', 'Isabel Yeamans', 'Anne Conway Viscountess Conway and Killultagh', 'John Bartram', 'Samuel Bownas', 'William Bradford', 'Peter Collinson', 'Thomas Story']
Class 2: ['Ellis Hookes', 'John Perrot', 'Thomas Salthouse', 'William Coddington', 'William Crouch', 'Mary Prince', 'Mary Fisher', 'John Crook', 'William Mucklow', 'William Dewsbury', 'Elizabeth Hooten', 'George Fox', 'Edward Burrough', 'Leonard Fell', 'William Mead']
Class 3: ['George Whitehead', 'Daniel Quare', 'Silvanus Bevan', 'Henry Pickworth

In [55]:
nx.write_gexf(G, 'quaker_network.gexf')

In [48]:
import numpy as np

In [52]:
# fixing errors sigh
np.float64 = float
np.float32 = float
np.float16 = float
np.float_ = float

In [54]:
np.int_ = int
np.int8 = int
np.int16 = int
np.int32 = int
np.int64 = int
np.uint8 = int
np.uint16 = int
np.uint32 = int
np.uint64 = int
np.intc = int
np.intp = int