## Getting Started with Networkx

NetworkX documentation: https://networkx.github.io/documentation/latest/

In [None]:
# sudo pip install networkx

import networkx as nx

g = nx.Graph()
g

In [None]:
print g.nodes()
print g.edges()

In [None]:
import numpy as np

g.add_node(1)
print g.nodes()

In [None]:
g.add_nodes_from([2,3])
print g.nodes()

In [None]:
range(4,11,1)

In [None]:
g.add_nodes_from(range(4,11,1))
print g.nodes()

Now let's add edges

In [None]:
g.add_edge(1,2)

In [None]:
print g.edges()
print g.edge[1]

In [None]:
g.add_edges_from([(2,3),(3,4)])
for i in range(4,10,1):
    g.add_edge(i,i+1)
g.edges()

In [None]:
print 'nodes:',g.number_of_nodes()
print 'edges:',g.number_of_edges()

We can annotate nodes and edges with attributes

In [None]:
g[1][2]['weight'] = 1.0
g[1][2]

In [None]:
g.node[1]['name']='Gilad'
g.node[1]['color']='Blue'
g.node[1]['weight']=20

In [None]:
print g.node[1]
print g[1]

In [None]:
for i in range(1,10,1):
    g[i][i+1]['weight'] = i*1.0

# shows all edges + attributes (same as running g[1])
print 'edge info:',g.edge[1]
print 'node info:',g.node[1]

And it is easy to iterate over the nodes and edges of a graph

In [None]:
# naming every node in the graph
names = ['one','two','three','four','five','six','seven','eight','nine','ten']
for node in g.nodes():
    g.node[node]['name'] = names[node-1]

In [None]:
# print all edge weights
for u,v in g.edges():
    print g.node[u]['name'],u,'->', v, g[u][v]['weight']

In [None]:
# find neighbors of a particular node
for node in g.nodes():
    print node, g.neighbors(node)

## Class Graph

In [None]:
# handy library that helps us get all combinations of pairs, given an array of items
from itertools import combinations

arr = [1,2,3,4,5]
for i,j in combinations(arr,2):
    print i,j

In [None]:
g = nx.Graph()

floc = 'data/student_classes.txt'
f = open(floc)

for row in f:
    r = row.strip().split(',')
    class_name = r[0]
    class_students = r[1:]
    print class_name, class_students
    
    # add nodes
    for student in class_students:
        if student!='':
            g.add_node(student)
        
    # add edges
    for s1,s2 in combinations(class_students,2):
        g.add_edge(s1,s2)

In [None]:
# all our nodes
print g.nodes(), len(g.nodes())

In [None]:
# all our edges
print g.edges()

In [None]:
%pylab inline
nx.draw(g, node_color='#A0CBE2', with_labels=True)

### Centrality Measures

Documentation of various centrality measures here - https://networkx.github.io/documentation/latest/reference/algorithms.centrality.html

In [None]:
nx.degree(g)

In [None]:
# list of frequency of each degree value
y = nx.degree_histogram(g)
y

In [None]:
# needed to plot the histogram -> effectively number of "buckets"
x = range(len(nx.degree_histogram(g)))
x

In [None]:
plot(x,y)
title('degree histogram')
ylabel('number of students')
xlabel('degree')

In [None]:
# degree centrality for node v is the fraction of nodes it is connected to
deg = nx.degree_centrality(g)
deg

In [None]:
# top 10 most central students
sorted(deg.items(), key=lambda x:-x[1])[:10]

In [None]:
# betweenness centrality is equal to the number of shortest paths from all vertices to all others 
# that pass through that node. A node with high betweenness centrality has a large influence on the transfer
# of items through the network, under the assumption that item transfer follows the shortest paths.

betweenness = nx.betweenness_centrality(g)
print betweenness

In [None]:
sorted(betweenness.items(), key=lambda x:-x[1])[:10]

In [None]:
# Calculating positioning of nodes, so that we can plot the next graphs in exactly the same place

pos=nx.spring_layout(g)
nx.draw(g, pos, with_labels=True) 
print g.number_of_edges()

In [None]:
# Betweenness Centrality -> what is this calculating?

eb = nx.edge_betweenness_centrality(g)
eb_il = eb.items()
eb_il.sort(key=lambda x: x[1], reverse=True)
print eb_il[0][0]

In [None]:
components = sorted(nx.connected_component_subgraphs(g), key=len, reverse=True)
for c in components:
    print c.number_of_nodes()

In [None]:
nx.write_gexf(g, 'class_graph.gexf')

## Assignment

1. Download Gephi (https://gephi.org/) and load our class graph into it. Explore various statistics and layouts. Find anything intereseting? Can you identify clusters based on Modularity (screenshot below)? Why people clustered this way? 

2. Pull Twitter data that can help with the portrait of your target. Where are graphs helpful? What are you able to do, and what would you like to do? Where are there limitations?

<img src="itp_class_graph.jpg" />