In [1]:
import pandas as pd
import numpy as np
import json
from collections import defaultdict

import networkx as nx
import matplotlib.pyplot as plt
%matplotlib inline

from fa2 import ForceAtlas2

In [2]:
with open("starwars.json") as f:
    starwars_json = json.load(f)

In [3]:
DG = nx.DiGraph()
# Create and add nodes
nodes = list(set(starwars_json["data"]["characters"].keys()).difference(set(starwars_json["failed"]["characters"].keys())))
for n in nodes:
    movies = [x for x in starwars_json["data"]["movies"].keys() if n in starwars_json["data"]["movies"][x]["characters"]]
    series = defaultdict(list)
    for s in starwars_json["data"]["series"].keys():
        for e in starwars_json["data"]["series"][s]:
            if n in starwars_json["data"]["series"][s][e]["characters"]:
                series[s].append(e)
    attribute = {"movies": movies, "series": series}
    DG.add_node(n, data=attribute)
    
# Create links between nodes
for n in nodes:
    links = starwars_json["data"]["characters"][n]["connections"]
    if len(links) > 0:
        for l in links:
            DG.add_edge(n, l)
    

In [9]:
G = DG.to_undirected()
[len(c) for c in sorted(nx.connected_components(G), key=len, reverse=True)]

[1692,
 4,
 3,
 3,
 3,
 2,
 2,
 2,
 2,
 2,
 2,
 2,
 2,
 2,
 2,
 2,
 1,
 1,
 1,
 1,
 1,
 1,
 1,
 1,
 1,
 1,
 1,
 1,
 1,
 1,
 1,
 1,
 1,
 1,
 1,
 1,
 1,
 1,
 1,
 1,
 1,
 1,
 1,
 1,
 1,
 1,
 1,
 1,
 1,
 1,
 1,
 1,
 1,
 1,
 1,
 1,
 1,
 1,
 1,
 1,
 1,
 1,
 1,
 1,
 1,
 1,
 1,
 1,
 1,
 1,
 1,
 1,
 1,
 1,
 1,
 1,
 1,
 1,
 1,
 1,
 1,
 1,
 1,
 1,
 1,
 1,
 1,
 1,
 1,
 1,
 1,
 1,
 1,
 1,
 1,
 1,
 1,
 1,
 1,
 1,
 1,
 1,
 1,
 1,
 1,
 1,
 1,
 1,
 1,
 1,
 1,
 1,
 1,
 1,
 1,
 1,
 1,
 1,
 1,
 1,
 1,
 1,
 1,
 1,
 1,
 1,
 1,
 1,
 1,
 1,
 1,
 1,
 1,
 1,
 1,
 1,
 1,
 1,
 1,
 1,
 1,
 1,
 1,
 1,
 1,
 1,
 1,
 1,
 1,
 1,
 1,
 1,
 1,
 1,
 1,
 1,
 1,
 1,
 1,
 1,
 1,
 1,
 1,
 1,
 1,
 1,
 1,
 1,
 1,
 1,
 1,
 1,
 1,
 1,
 1,
 1,
 1,
 1,
 1,
 1,
 1,
 1,
 1,
 1,
 1,
 1,
 1,
 1,
 1,
 1,
 1,
 1,
 1,
 1,
 1,
 1,
 1,
 1,
 1,
 1,
 1,
 1,
 1,
 1,
 1,
 1,
 1,
 1,
 1,
 1,
 1,
 1,
 1,
 1,
 1,
 1,
 1,
 1,
 1,
 1,
 1,
 1,
 1,
 1,
 1,
 1,
 1,
 1,
 1,
 1,
 1,
 1,
 1,
 1,
 1,
 1,
 1]

In [None]:
# Setup for the force atlas algorithm.
forceatlas2 = ForceAtlas2(
                          # Behavior alternatives
                          outboundAttractionDistribution=False,  # Dissuade hubs
                          linLogMode=False,  # NOT IMPLEMENTED
                          adjustSizes=False,  # Prevent overlap (NOT IMPLEMENTED)
                          edgeWeightInfluence=1.0,

                          # Performance
                          jitterTolerance=0.01,  # Tolerance
                          barnesHutOptimize=True,
                          barnesHutTheta=0.12,
                          multiThreaded=False,  # NOT IMPLEMENTED

                          # Tuning
                          scalingRatio=0.02,
                          strongGravityMode=False,
                          gravity=1.0,

                          # Log
                          verbose=True)

# Create an undirected version of the graph
G = DG.to_undirected()

# Calculate node positions with the force atlas algorithm using the undirected graph
positions = forceatlas2.forceatlas2_networkx_layout(G, pos=None, iterations=2000)

 12%|█████████▌                                                                     | 242/2000 [01:48<13:35,  2.16it/s]

In [None]:
nodes = DG.nodes(data=True)
nodes_2, degrees = zip(*DG.degree())

# node size proportional to the degree
node_sizes_deg = [7*x for x in degrees]

# # Color according to the party
# node_colors = ['r' if x[1]['party'] == "Republican" else "b" for x in nodes]
# nx.draw(DG, positions, node_color=node_colors, node_size=node_sizes_deg, with_labels=False, edgecolors="white", edge_color='k', width=0.1)
nx.draw(DG, positions, node_color='red', node_size=50, with_labels=False, edgecolors="white", edge_color='k', width=0.1)

In [5]:
nodes = DG.nodes(data=True)
nodes_2, degrees = zip(*DG.degree())

df = pd.DataFrame(data=dict(name=nodes_2, degree=degrees))

df = df.sort_values(by="degree", ascending=False)
print df[:100]

      degree                   name
752      633       Anakin Skywalker
885      435         Obi-Wan Kenobi
1761     324          Darth Sidious
372      315            Ahsoka Tano
370      299               Han Solo
1460     277           Ezra Bridger
709      255         Luke Skywalker
1725     251                  R2-D2
613      248            Leia Organa
28       246          Padmé Amidala
1461     240                  Dooku
508      219                    Rex
1852     210            Sabine Wren
824      209                  canon
625      208           Kanan Jarrus
30       195                  C-3PO
1130     195              Chewbacca
735      193                 C1-10P
1568     191          Hera Syndulla
527      180  Jabba Desilijic Tiure
1306     178       Garazeb Orrelios
31       162             Mace Windu
670      154                   Yoda
29       154               Grievous
1873     151                   Maul
1337     145         Asajj Ventress
1403     143           Hondo