# Using NetworkX for Network Analysis of Macbeth

We already know several ways to import the text of Macbeth into a Python variable, as well as to find the occurrence of character's names in a string.

In [None]:
import requests
from bs4 import BeautifulSoup

In [None]:
response = requests.get('https://raw.githubusercontent.com/benjum/NetworksInPython/main/data/Macbeth.txt')

In [None]:
response.text

In [None]:
x = response.text

It's a little bit of work to figure out how to splice the elements of the play, but once we settle on a method, we can use a nested set of indexing and splitting to give us what we want.

In [None]:
x.split('Act')[1].split('SCENE')[1]

In [None]:
acts = x.split('ACT')[6:]

In [None]:
acts[-1]

In [None]:
acts[1].split('SCENE')[-1]

In [None]:
acts[1].split('SCENE')[-1].find('OLD MAN')

We'll use the characters in the play as our nodes.  For quantification, we will calculate the number of scenes that a character is in, as well as the number of times that any pair of characters occur in a scene together.

In [None]:
characters = [
    'DUNCAN',
    'MALCOLM',
    'DONALBAIN',
    'MACBETH',
    'BANQUO',
    'MACDUFF',
    'LENNOX',
    'ROSS',
    'MENTEITH',
    'ANGUS',
    'CAITHNESS',
    'FLEANCE',
    'SIWARD',
    'YOUNG SIWARD',
    'SEYTON',
    'SON',
    'DOCTOR',
    'SOLDIER',
    'PORTER',
    'OLD MAN',
    'LADY MACBETH',
    'LADY MACDUFF',
    'GENTLEWOMAN',
    'HECATE',
    'FIRST WITCH',
    'SECOND WITCH',
    'THIRD WITCH'
]

In [None]:
len(acts)

In [None]:
# Initialize a dictionary for characters
charnum = {}
for k in characters:
    charnum[k] = 0

# Initialize a dictionary for all pairs of characters
connections = {}
for i in range(len(characters)-1):
    for j in range(i+1,len(characters)):
        connections[(characters[i],characters[j])] = 0

# For every act and scene,
# increment the count for a character if they occur in the scene
# and increment the count for character pairs if they co-occur
for i in acts:
    for j in i.split('SCENE')[1:]:
        scenechars = []
        for k in characters:
            if j.find(k) != -1:
                scenechars.append(k)
                charnum[k] += 1
        for a in range(len(scenechars)-1):
            for b in range(a+1,len(scenechars)):
                connections[(scenechars[a],scenechars[b])] += 1

In [None]:
charnum

In [None]:
connections

Now we have the info to make some networks.

In [None]:
import networkx as nx
import matplotlib.pyplot as plt

In [None]:
nxgraphitems = []

# for k,v in charnum.items():
#     network_node = k
#     graphitems.append(network_node)

for k,v in connections.items():
    if v != 0:
        network_edge = (k[0],k[1])
        nxgraphitems.append(network_edge)
    
nxgraphitems

In [None]:
G = nx.Graph()

In [None]:
G.add_edges_from(nxgraphitems)

In [None]:
nx.draw(G, with_labels=True)
plt.show()

In [None]:
G.nodes

In [None]:
charnum.keys()

In [None]:
charnum.values()

In [None]:
nx.draw(G, 
        with_labels=True, 
        nodelist=list(charnum.keys()), 
        node_size=[i*50 for i in charnum.values()])
plt.show()

In [None]:
plt.figure(figsize=(8,8))

pos = nx.spring_layout(G)
# pos = nx.spring_layout(G, k=2) # adjust the spacing
# pos = nx.circular_layout(G)      # use a different layout

nx.draw(G, 
        with_labels=True, 
        nodelist=list(charnum.keys()), 
        node_size=[i*50 for i in charnum.values()],
        pos = pos
        )
plt.show()

In [None]:
nx.degree_centrality(G)

In [None]:
dc = nx.degree_centrality(G)
for i in sorted(dc, key=dc.get, reverse=True):
    print('{:15s}: {:.3f}'.format(i.title(), dc[i]))

In [None]:
list(G.neighbors('MACBETH'))

In [None]:
list(G.neighbors('OLD MAN'))

In [None]:
nx.shortest_path(G,'OLD MAN','YOUNG SIWARD')

In [None]:
nx.betweenness_centrality(G)

In [None]:
dc = nx.betweenness_centrality(G)
for i in sorted(dc, key=dc.get, reverse=True):
    print('{:15s}: {:.3f}'.format(i.title(), dc[i]))

In [None]:
from networkx.algorithms.community.centrality import girvan_newman

In [None]:
communities = girvan_newman(G)

In [None]:
node_groups = []
for com in next(communities):
    node_groups.append(list(com))
 
print(node_groups)
 
color_map = []
for node in G:
    if node in node_groups[0]:
        color_map.append('blue')
    else:
        color_map.append('green')
nx.draw(G, node_color=color_map, with_labels=True)
plt.show()

In [None]:
nx.density(G)

In [None]:
from networkx.algorithms.community import greedy_modularity_communities

In [None]:
greedy_modularity_communities(G)

In [None]:
communities = greedy_modularity_communities(G)

# Create empty dictionary
modularity_class = {}

#Loop through each community in the network
for community_number, community in enumerate(communities):
    #For each member of the community, add their community number
    for name in community:
        modularity_class[name] = community_number

In [None]:
modularity_class

In [None]:
communities = greedy_modularity_communities(G)

node_groups = []
for com in communities:
    node_groups.append(list(com))
 
print(node_groups)
 
color_map = []
for node in G:
    if node in node_groups[0]:
        color_map.append('blue')
    elif node in node_groups[1]:
        color_map.append('yellow')
    else:
        color_map.append('green')
nx.draw(G, node_color=color_map, with_labels=True)
plt.show()

In [None]:
plt.figure(figsize=(8,8))

pos = nx.spring_layout(G)
# pos = nx.spring_layout(G, k=2) # adjust the spacing
# pos = nx.circular_layout(G)      # use a different layout

nx.draw(G, 
        with_labels=True, 
        nodelist=list(charnum.keys()), 
        node_size=[i*50 for i in charnum.values()],
        pos = pos,
        node_color=color_map
        )
plt.show()

We could do a lot of customization to make a better visualization.

On the other hand, NetworkX is intended primarily for analysis, not for visualization... -> Bokeh and Dash Cytoscape are two other (better) options for network visualizations with Python.