In [17]:
import pandas as pd
import numpy as np
import networkx as nx
import matplotlib.pyplot as plt
import warnings
warnings.filterwarnings('ignore')

%matplotlib inline 

In [18]:
node1 = pd.read_csv('./datasets/game_of_thrones_network/asoiaf-book1-nodes.csv')
node2 = pd.read_csv('./datasets/game_of_thrones_network/asoiaf-book2-nodes.csv')
node3 = pd.read_csv('./datasets/game_of_thrones_network/asoiaf-book3-nodes.csv')
node4 = pd.read_csv('./datasets/game_of_thrones_network/asoiaf-book4-nodes.csv')
node5 = pd.read_csv('./datasets/game_of_thrones_network/asoiaf-book5-nodes.csv')

In [19]:
edges1 = pd.read_csv('./datasets/game_of_thrones_network/asoiaf-book1-edges.csv')
edges2 = pd.read_csv('./datasets/game_of_thrones_network/asoiaf-book2-edges.csv')
edges3 = pd.read_csv('./datasets/game_of_thrones_network/asoiaf-book3-edges.csv')
edges4 = pd.read_csv('./datasets/game_of_thrones_network/asoiaf-book4-edges.csv')
edges5 = pd.read_csv('./datasets/game_of_thrones_network/asoiaf-book5-edges.csv')

In [20]:
node1.head()

Unnamed: 0,Id,Label
0,Addam-Marbrand,Addam Marbrand
1,Aegon-I-Targaryen,Aegon I Targaryen
2,Aemon-Targaryen-(Maester-Aemon),Aemon Targaryen (Maester Aemon)
3,Aerys-II-Targaryen,Aerys II Targaryen
4,Aggo,Aggo


In [21]:
edges1.head()

Unnamed: 0,Source,Target,Type,weight,book
0,Addam-Marbrand,Jaime-Lannister,Undirected,3,1
1,Addam-Marbrand,Tywin-Lannister,Undirected,6,1
2,Aegon-I-Targaryen,Daenerys-Targaryen,Undirected,5,1
3,Aegon-I-Targaryen,Eddard-Stark,Undirected,4,1
4,Aemon-Targaryen-(Maester-Aemon),Alliser-Thorne,Undirected,4,1


In [22]:
nodes = pd.concat([node1,node2,node3,node4,node5], axis=0)
edges = pd.concat([edges1,edges2,edges3,edges4,edges5], axis=0)

In [23]:
# node1.shape
# edges1.shape
# nodes.shape
# edges.shape

In [32]:
# nodes.to_dict('records')

In [29]:
nodesDict = nodes.to_dict('records')

In [39]:
G = nx.Graph()
for row in nodesDict:
    G.add_node(row['Id'], label = row['Label'])

In [40]:
list(G.node(data=True))[0:10]

[('Addam-Marbrand', {'label': 'Addam Marbrand'}),
 ('Aegon-I-Targaryen', {'label': 'Aegon I Targaryen'}),
 ('Aemon-Targaryen-(Maester-Aemon)',
  {'label': 'Aemon Targaryen (Maester Aemon)'}),
 ('Aerys-II-Targaryen', {'label': 'Aerys II Targaryen'}),
 ('Aggo', {'label': 'Aggo'}),
 ('Albett', {'label': 'Albett'}),
 ('Alliser-Thorne', {'label': 'Alliser Thorne'}),
 ('Alyn', {'label': 'Alyn'}),
 ('Arthur-Dayne', {'label': 'Arthur Dayne'}),
 ('Arya-Stark', {'label': 'Arya Stark'})]

In [44]:
edgesDict = edges.to_dict('records')
# edgesDict[0:3]

In [45]:
for row in edgesDict:
    G.add_edge(row['Source'], row['Target'],weight = row['weight'], book = row['book'])

In [47]:
# list(G.edges(data=True))[0:5]

## Find the most important charachter 
### One with the most number of mentions / HUBS

In [50]:
hubs = {}
for node in G.node:
    hubs[node] = len(list(G.neighbors(node)))
# hubs

In [53]:
import pandas as pd
hubs1 = pd.DataFrame.from_dict(hubs, orient = 'index').reset_index()
hubs1.columns = ['Node','NeighborCounts']
hubs1.head()
hubs1.sort_values('NeighborCounts', ascending= False).head(10)

Unnamed: 0,Node,NeighborCounts
170,Tyrion-Lannister,122
88,Jon-Snow,114
77,Jaime-Lannister,101
24,Cersei-Lannister,97
157,Stannis-Baratheon,89
9,Arya-Stark,84
154,Sansa-Stark,75
22,Catelyn-Stark,75
147,Robb-Stark,74
45,Eddard-Stark,74


In [55]:
hubs1['Centrality'] = hubs1['NeighborCounts']/hubs1.shape[0]
hubs1.head()

Unnamed: 0,Node,NeighborCounts,Centrality
0,Addam-Marbrand,12,0.015075
1,Aegon-I-Targaryen,9,0.011307
2,Aemon-Targaryen-(Maester-Aemon),26,0.032663
3,Aerys-II-Targaryen,19,0.023869
4,Aggo,10,0.012563


In [59]:
centralities = nx.degree_centrality(G).items()
sorted(centralities, key=lambda x:x[1], reverse=True)[0:10]

[('Tyrion-Lannister', 0.15345911949685534),
 ('Jon-Snow', 0.14339622641509434),
 ('Jaime-Lannister', 0.1270440251572327),
 ('Cersei-Lannister', 0.1220125786163522),
 ('Stannis-Baratheon', 0.11194968553459118),
 ('Arya-Stark', 0.10566037735849056),
 ('Catelyn-Stark', 0.09433962264150943),
 ('Sansa-Stark', 0.09433962264150943),
 ('Eddard-Stark', 0.0930817610062893),
 ('Robb-Stark', 0.0930817610062893)]

### PageRank
PageRank works by counting the number and quality of links to a page to determine a rough estimate of how important the website is. The underlying assumption is that more important websites are likely to receive more links from other websites.

In [62]:
sorted(nx.pagerank_numpy(G, weight='weight').items(), key=lambda x:x[1], reverse=True)[0:10]

[('Jon-Snow', 0.027992146813780025),
 ('Tyrion-Lannister', 0.025277842384417746),
 ('Daenerys-Targaryen', 0.01885814202097024),
 ('Arya-Stark', 0.01749804128882223),
 ('Jaime-Lannister', 0.016650941386770285),
 ('Stannis-Baratheon', 0.01584458228302253),
 ('Cersei-Lannister', 0.013640631109425561),
 ('Theon-Greyjoy', 0.013299171649120731),
 ('Robb-Stark', 0.012163019013952977),
 ('Catelyn-Stark', 0.01159150700426701)]

### Check the evolution of characters as new books are released

In [None]:
# Make a graph for each book 
# evol = [nx.degree_centrality(book) for book in books]
# evol_df = pd.DataFrame.from_records(evol).fillna(0)
# evol_df[['Eddard-Stark', 'Tyrion-Lannister', 'Jon-Snow']].plot()