The purpose of this exercise is to analyze [the political books network](http://www-personal.umich.edu/~mejn/netdata)


In [1]:
import networkx as nx
from networkx.algorithms import community
import pandas as pd

In [2]:
# Read the network data
net = nx.read_gml('data/polbooks.gml')

In [3]:
len(net.nodes)

105

In [4]:
len(net.edges)

441

# What books are purchased with `Worse Than Watergate`?

In [5]:
list(net.neighbors("Worse Than Watergate"))

['The Price of Loyalty',
 'House of Bush, House of Saud',
 'The Sorrows of Empire',
 'Against All Enemies',
 'American Dynasty',
 'Big Lies',
 'The Lies of George W. Bush',
 'Plan of Attack',
 'Bush at War',
 'The New Pearl Harbor',
 'Bushwomen',
 'The Politics of Truth',
 'Fanatics and Fools',
 'Bushwhacked',
 'The Exception to the Rulers',
 'Freethinkers']

# What is the shortest path between `MoveOn's 50 Ways to Love Your Country` and `Empire`?

In [6]:
nx.shortest_path(net)["MoveOn's 50 Ways to Love Your Country"]["Empire"]

["MoveOn's 50 Ways to Love Your Country",
 'American Dynasty',
 'The Great Unraveling',
 'Rogue Nation',
 'Empire']

# What are the top 5 central books? Use different centrality measures and compare

In [7]:
degree = pd.Series(nx.degree_centrality(net))
betweenness = pd.Series(nx.betweenness_centrality(net))

In [8]:
degree.head()

1000 Years for Revenge     0.057692
Bush vs. the Beltway       0.038462
Charlie Wilson's War       0.038462
Losing Bin Laden           0.221154
Sleeping With the Devil    0.076923
dtype: float64

In [9]:
betweenness.head()

1000 Years for Revenge     0.007433
Bush vs. the Beltway       0.000049
Charlie Wilson's War       0.000521
Losing Bin Laden           0.076093
Sleeping With the Devil    0.062928
dtype: float64

# Using community detection, uncover communities from the network, what do they mean?

In [10]:
communities = community.greedy_modularity_communities(net)

In [11]:
communities

[frozenset({'1000 Years for Revenge',
            'A National Party No More',
            'Arrogance',
            'Betrayal',
            'Bias',
            'Breakdown',
            'Bush Country',
            'Bush vs. the Beltway',
            "Charlie Wilson's War",
            'Dangerous Dimplomacy',
            'Deliver Us from Evil',
            'Dereliction of Duty',
            'Endgame',
            'Fighting Back',
            'Ghost Wars',
            'Give Me a Break',
            'Hating America',
            "Hillary's Scheme",
            'Hollywood Interrupted',
            'Legacy',
            'Let Freedom Ring',
            'Losing Bin Laden',
            'Meant To Be',
            'Off with Their Heads',
            'Persecution',
            'Power Plays',
            "Rumsfeld's War",
            'Shut Up and Sing',
            'Slander',
            'Sleeping With the Devil',
            'Spin Sisters',
            'Tales from the Left Coast',
            'Ten 