In [1]:
# makes plots appear in ipython
%matplotlib inline

In [2]:
__author__ = 'michael'

import sys
sys.path.insert(0, '../python_code')

import helper_functions
import pandas as pd
import viz_functions_michael as viz
import networkx as nx
import matplotlib.pyplot as plt
import os
import datetime
from operator import itemgetter
import numpy as np
import calendar
from __future__ import division
import time
from collections import OrderedDict
import math
import json
import webbrowser

proj_cwd = os.path.dirname(os.getcwd())
data_dir = os.path.join(proj_cwd, 'data')
data_scotus_dir = os.path.join(data_dir, 'scotus') ## for reading the edgelist and nodelist data
if not os.path.exists(data_scotus_dir):
    os.makedirs(data_scotus_dir)
doc_dir = os.path.join(proj_cwd, 'docs') ## for saving the plots later
doc_scotus_dir = os.path.join(doc_dir, 'scotus') ## for saving the plots under 'scotus' folder in 'docs'
if not os.path.exists(doc_scotus_dir):
    os.makedirs(doc_scotus_dir)

###############
# BUILD A GRAPH
###############
# Load data from the CSVs
edgelist_data = helper_functions.csv_to_list(data_scotus_dir,
                                             'citations_sublist.csv',
                                             1, 0)
node_data = helper_functions.csv_to_list(data_scotus_dir,
                                         'consolidation.csv',
                                         1, 0)

# Instantiate a directed graph object, D
D = nx.DiGraph()

# Add our nodes to D
for row in node_data:
    # It is really easy to add arbitrary info about each node or edge. For example, here, I load each node with a
    # date, judges and citation_id attribute.
    case_id = int(row[0])
    month, day, year = ['', '', ''] if row[3] is '' else [int(element) for element in row[3].rsplit('/')]
    file_date = '' if month is '' else datetime.date(year=year, month=month, day=day)
    judges = row[4]
    citation_id = '' if row[5] is '' else int(row[5])
    D.add_node(case_id,
               date=file_date,
               judges=judges,
               citation_id=citation_id,
               year=year)


for row in edgelist_data:
    citer = row[0]
    cited = row[1]
    # Edges point from citer to cited -- so the node with the highest in degree represents the most cited decision
    D.add_edge(int(row[0]), int(row[1]), random_attribute = 'random_string')

In [27]:
#'''
G = D.copy()

nodes_to_delete = []
for each_node in G.nodes():
    if not (1980 <= G.node[each_node]['year'] <= 1989):
        nodes_to_delete.append(each_node)

G.remove_nodes_from(nodes_to_delete)

years = []
cases_without_years = []
for each_node in G.nodes():
    if G.node[each_node]['year'] == '':
        cases_without_years.append(each_node)
    else:
        years.append(G.node[each_node]['year'])

print 'getting 1980s SCOTUS done'
print 'number of nodes (cases) in 1980s SCOTUS: ', len(G)
print 'number of edges in 1980s SCOTUS: ', G.number_of_edges()
print ''

print 'FOR DIRECTED GRAPH (G)'
time1 = time.time()
close_cent_dict_G = nx.closeness_centrality(G)
time2 = time.time()
print 'finished computing closeness centrality dictionary for G: took --- %s seconds ---' % (time2-time1)
eigen_cent_dict_G = nx.eigenvector_centrality_numpy(G) ## eigenvector_centrality(G) runs into convergence error
time3 = time.time()
print '(NUMPY) finished computing eigenvector centrality dictionary for G: took --- %s seconds ---' % (time3-time2)
between_cent_dict_G = nx.betweenness_centrality(G)
time4 = time.time()
print 'finished computing betweenness centrality dictionary for G: took --- %s seconds ---' % (time4-time3)
harmonic_cent_dict_G = nx.harmonic_centrality(G)
time5 = time.time()
print 'finished computing harmonic centrality dictionary for G: took --- %s seconds ---' % (time5-time4)

print ''
G2 = G.to_undirected()
print 'FOR UNDIRECTED GRAPH (G2)'
time1 = time.time()
close_cent_dict_G2 = nx.closeness_centrality(G2)
time2 = time.time()
print 'finished computing closeness centrality dictionary for G2: took --- %s seconds ---' % (time2-time1)
eigen_cent_dict_G2 = nx.eigenvector_centrality_numpy(G2) ## to be parallel with above
time3 = time.time()
print '(NUMPY) finished computing eigenvector centrality dictionary for G2: took --- %s seconds ---' % (time3-time2)
between_cent_dict_G2 = nx.betweenness_centrality(G2)
time4 = time.time()
print 'finished computing betweenness centrality dictionary for G2: took --- %s seconds ---' % (time4-time3)
harmonic_cent_dict_G2 = nx.harmonic_centrality(G2)
time5 = time.time()
print 'finished computing harmonic centrality dictionary for G2: took --- %s seconds ---' % (time5-time4)
#'''
print ''
print 'testing closeness_centrality on G (directed) and G2 (undirected)'
# Check if values are the same between directed and undirected
close_cent_list_G = [close_cent_dict_G[n] for n in G.nodes()]
close_cent_list_G2 = [close_cent_dict_G2[n] for n in G2.nodes()]
print close_cent_list_G == close_cent_list_G2
print 'mininum closenses centrality in G: ', min(close_cent_list_G)
print 'minimum closeness centrality on G2: ', min(close_cent_list_G2)
print 'maximum closeness centrality in G: ', max(close_cent_list_G)
print 'maximum closeness centrality on G2: ', max(close_cent_list_G2)

print ''
print 'testing eigenvector_centrality on G (directed) and G2 (undirected)'
eigen_cent_list_G = [eigen_cent_dict_G[n] for n in G.nodes()]
eigen_cent_list_G2 = [eigen_cent_dict_G2[n] for n in G2.nodes()]
print eigen_cent_list_G == eigen_cent_list_G2
print '(NUMPY) minimum eigenvector centrality on G: ', min(eigen_cent_list_G)
print '(NUMPY) minimum eigenvector centrality on G2: ', min(eigen_cent_list_G2)
print '(NUMPY) maximum eigenvector centrality on G: ', max(eigen_cent_list_G)
print '(NUMPY) maximum eigenvector centrality on G2: ', max(eigen_cent_list_G2)

print ''
print 'testing betweenness_centrality on G (directed) and G2 (undirected)'
between_cent_list_G = [between_cent_dict_G[n] for n in G.nodes()]
between_cent_list_G2 = [between_cent_dict_G2[n] for n in G2.nodes()]
print between_cent_list_G == between_cent_list_G2
print 'minimum betweenness centrality on G: ', min(between_cent_list_G)
print 'minimum betweenness centrality on G2: ', min(between_cent_list_G2)
print 'maximum betweenness centrality on G: ', max(between_cent_list_G)
print 'maximum betweenness centrality on G2: ', max(between_cent_list_G2)

print ''
print 'testing harmonic_centrality on G (directed) and G2 (undirected)'
harmonic_cent_list_G = [harmonic_cent_dict_G[n] for n in G.nodes()]
harmonic_cent_list_G2 = [harmonic_cent_dict_G2[n] for n in G2.nodes()]
print harmonic_cent_list_G == harmonic_cent_list_G2
print 'minimum harmonic centrality on G: ', min(harmonic_cent_list_G)
print 'minimum harmonic centrality on G2: ', min(harmonic_cent_list_G2)
print 'maximum harmonic centrality on G: ', max(harmonic_cent_list_G)
print 'maximum harmonic centrality on G2: ', max(harmonic_cent_list_G2)

getting 1980s SCOTUS done
number of nodes (cases) in 1980s SCOTUS:  2186
number of edges in 1980s SCOTUS:  7425

FOR DIRECTED GRAPH (G)
finished computing closeness centrality dictionary for G: took --- 0.382999897003 seconds ---
(NUMPY) finished computing eigenvector centrality dictionary for G: took --- 0.0620000362396 seconds ---
finished computing betweenness centrality dictionary for G: took --- 7.20399999619 seconds ---
finished computing harmonic centrality dictionary for G: took --- 0.451999902725 seconds ---

FOR UNDIRECTED GRAPH (G2)
finished computing closeness centrality dictionary for G2: took --- 7.38499999046 seconds ---
(NUMPY) finished computing eigenvector centrality dictionary for G2: took --- 0.0379998683929 seconds ---
finished computing betweenness centrality dictionary for G2: took --- 25.5370001793 seconds ---
finished computing harmonic centrality dictionary for G2: took --- 8.61999988556 seconds ---

testing closeness_centrality on G (directed) and G2 (undirec

In [47]:
## Testing eigenvector_centrality(graph_object) vs. eigenvector_centrality_numpy(graph_object)

print '..................COMPUTE DICT:..................'

try:
    test_eigen_cent_dict_G = nx.eigenvector_centrality(G)
except Exception:
    print 'COULD NOT compute eigenvector centrality dictionary for G'
    pass
else:
    print 'FINISHED computing eigenvector centrality dictionary for G'

try:
    test_eigen_cent_dict_G2 = nx.eigenvector_centrality(G2)
except Exception:
    print 'COULD NOT compute eigenvector centrality dictionary for G2'
    pass
else:
    print 'FINISHED computing eigenvector centrality dictionary for G2'    

print ''
print '..................TEST:..................'
try:
    test_eigen_cent_list_G = [test_eigen_cent_dict_G[n] for n in G.nodes()]
except Exception:
    print 'COULD NOT compute the list for eigenvector centrality of G from the respective dictionary'
    pass
else:
    print 'FINISHED computing the list for eigenvector centrality of G from the respective dictionary'
    
try:
    test_eigen_cent_list_G2 = [test_eigen_cent_dict_G2[n] for n in G2.nodes()]
except Exception:
    print 'COULD NOT compute the list for eigenvector centrality of G2 from the respective dictionary'
    pass
else:
    print 'FINISHED computing the list for eigenvector centrality of G2 from the respective dictionary'

print ''

try:
    test_eigen_cent_list_G == test_eigen_cent_list_G2
except Exception:
    print 'the list for eigenvector centrality of G, the list for eigenvector centrality of G2, or both UNDEFINED => EXIT TEST'
    pass
else:
    if test_eigen_cent_list_G == test_eigen_cent_list_G2:
        print 'True: centrality values between G and G2 are the same'
    else:
        print 'False: centrality values between G and G2 are NOT the same'
    print 'minimum eigenvector centrality on G: ', min(test_eigen_cent_list_G)
    print 'minimum eigenvector centrality on G2: ', min(test_eigen_cent_list_G2)
    print 'maximum eigenvector centrality on G: ', max(test_eigen_cent_list_G)
    print 'maximum eigenvector centrality on G2: ', max(test_eigen_cent_list_G2)    
    
print ''
print ''
print ''
print '..................COMPUTE DICT (using NUMPY):..................'
try:
    test_eigen_cent_numpy_dict_G = nx.eigenvector_centrality_numpy(G)
except Exception:
    print 'COULD NOT compute eigenvector centrality dictionary for G (using NUMPY)'
    pass
else:
    print 'FINISHED computing eigenvector centrality dictionary for G (using NUMPY)'

try:
    test_eigen_cent_numpy_dict_G2 = nx.eigenvector_centrality_numpy(G2)
except Exception:
    print 'COULD NOT compute eigenvector centrality dictionary for G2 (using NUMPY)'
    pass
else:
    print 'FINISHED computing eigenvector centrality dictionary for G2 (using NUMPY)'

print ''
print '..................TEST (using NUMPY):..................'

try:
    test_eigen_cent_numpy_list_G = [test_eigen_cent_numpy_dict_G[n] for n in G.nodes()]
except Exception:
    print 'COULD NOT compute the list for eigenvector centrality of G from the respective dictionary (using NUMPY)'
    pass
else:
    print 'FINISHED computing the list for eigenvector centrality of G from the respective dictionary (using NUMPY)'

try:
    test_eigen_cent_numpy_list_G2 = [test_eigen_cent_numpy_dict_G2[n] for n in G.nodes()]
except Exception:
    print 'COULD NOT compute the list for eigenvector centrality of G2 from the respective dictionary (using NUMPY)'
    pass
else:
    print 'FINISHED computing the list for eigenvector centrality of G2 from the respective dictionary (using NUMPY)'

print ''

try:
    test_eigen_cent_numpy_list_G == test_eigen_cent_numpy_list_G2
except Exception:
    print 'the list for eigenvector centrality of G, the list for eigenvector centrality of G2, or both UNDEFINED => EXIT TEST (using NUMPY)'
    pass
else:
    if test_eigen_cent_numpy_list_G == test_eigen_cent_numpy_list_G2:
        print 'True: centrality values between G and G2 are the same (using NUMPY)'
    else:
        print 'False: centrality values between G and G2 are NOT the same (using NUMPY)'
    print 'minimum eigenvector centrality on G (using NUMPY):', min(test_eigen_cent_numpy_list_G)
    print 'minimum eigenvector centrality on G2 (using NUMPY): ', min(test_eigen_cent_numpy_list_G2)
    print 'maximum eigenvector centrality on G (using NUMPY): ', max(test_eigen_cent_numpy_list_G)
    print 'maximum eigenvector centrality on G2 (using NUMPY): ', max(test_eigen_cent_numpy_list_G2)

..................COMPUTE DICT:..................
COULD NOT compute eigenvector centrality dictionary for G
FINISHED computing eigenvector centrality dictionary for G2

..................TEST:..................
COULD NOT compute the list for eigenvector centrality of G from the respective dictionary
FINISHED computing the list for eigenvector centrality of G2 from the respective dictionary

the list for eigenvector centrality of G, the list for eigenvector centrality of G2, or both UNDEFINED => EXIT TEST



..................COMPUTE DICT (using NUMPY):..................
FINISHED computing eigenvector centrality dictionary for G (using NUMPY)
FINISHED computing eigenvector centrality dictionary for G2 (using NUMPY)

..................TEST (using NUMPY):..................
FINISHED computing the list for eigenvector centrality of G from the respective dictionary (using NUMPY)
FINISHED computing the list for eigenvector centrality of G2 from the respective dictionary (using NUMPY)

False: 

In [49]:
def eigenvector_centrality_numpy_TEST(G, G2):

    print '..................COMPUTE DICT (using NUMPY):..................'
    try:
        test_eigen_cent_numpy_dict_G = nx.eigenvector_centrality_numpy(G)
    except Exception:
        print 'COULD NOT compute eigenvector centrality dictionary for G (using NUMPY)'
        pass
    else:
        print 'FINISHED computing eigenvector centrality dictionary for G (using NUMPY)'

    try:
        test_eigen_cent_numpy_dict_G2 = nx.eigenvector_centrality_numpy(G2)
    except Exception:
        print 'COULD NOT compute eigenvector centrality dictionary for G2 (using NUMPY)'
        pass
    else:
        print 'FINISHED computing eigenvector centrality dictionary for G2 (using NUMPY)'

    print ''
    print '..................TEST (using NUMPY):..................'

    try:
        test_eigen_cent_numpy_list_G = [test_eigen_cent_numpy_dict_G[n] for n in G.nodes()]
    except Exception:
        print 'COULD NOT compute the list for eigenvector centrality of G from the respective dictionary (using NUMPY)'
        pass
    else:
        print 'FINISHED computing the list for eigenvector centrality of G from the respective dictionary (using NUMPY)'

    try:
        test_eigen_cent_numpy_list_G2 = [test_eigen_cent_numpy_dict_G2[n] for n in G.nodes()]
    except Exception:
        print 'COULD NOT compute the list for eigenvector centrality of G2 from the respective dictionary (using NUMPY)'
        pass
    else:
        print 'FINISHED computing the list for eigenvector centrality of G2 from the respective dictionary (using NUMPY)'

    print ''

    try:
        test_eigen_cent_numpy_list_G == test_eigen_cent_numpy_list_G2
    except Exception:
        print 'the list for eigenvector centrality of G, the list for eigenvector centrality of G2, or both UNDEFINED => EXIT TEST (using NUMPY)'
        pass
    else:
        if test_eigen_cent_numpy_list_G == test_eigen_cent_numpy_list_G2:
            print 'True: centrality values between G and G2 are the same (using NUMPY)'
        else:
            print 'False: centrality values between G and G2 are NOT the same (using NUMPY)'
        print 'minimum eigenvector centrality on G (using NUMPY):', min(test_eigen_cent_numpy_list_G)
        print 'minimum eigenvector centrality on G2 (using NUMPY): ', min(test_eigen_cent_numpy_list_G2)
        print 'maximum eigenvector centrality on G (using NUMPY): ', max(test_eigen_cent_numpy_list_G)
        print 'maximum eigenvector centrality on G2 (using NUMPY): ', max(test_eigen_cent_numpy_list_G2)

In [50]:
eigenvector_centrality_numpy_TEST(G, G2)

..................COMPUTE DICT (using NUMPY):..................
FINISHED computing eigenvector centrality dictionary for G (using NUMPY)
FINISHED computing eigenvector centrality dictionary for G2 (using NUMPY)

..................TEST (using NUMPY):..................
FINISHED computing the list for eigenvector centrality of G from the respective dictionary (using NUMPY)
FINISHED computing the list for eigenvector centrality of G2 from the respective dictionary (using NUMPY)

False: centrality values between G and G2 are NOT the same (using NUMPY)
minimum eigenvector centrality on G (using NUMPY): -2.84585043941e-05
minimum eigenvector centrality on G2 (using NUMPY):  -1.82506322817e-18
maximum eigenvector centrality on G (using NUMPY):  0.477661791089
maximum eigenvector centrality on G2 (using NUMPY):  0.251937643107


In [51]:
eigenvector_centrality_numpy_TEST(G, G2)

..................COMPUTE DICT (using NUMPY):..................
FINISHED computing eigenvector centrality dictionary for G (using NUMPY)
FINISHED computing eigenvector centrality dictionary for G2 (using NUMPY)

..................TEST (using NUMPY):..................
FINISHED computing the list for eigenvector centrality of G from the respective dictionary (using NUMPY)
FINISHED computing the list for eigenvector centrality of G2 from the respective dictionary (using NUMPY)

False: centrality values between G and G2 are NOT the same (using NUMPY)
minimum eigenvector centrality on G (using NUMPY): -6.71215959909e-06
minimum eigenvector centrality on G2 (using NUMPY):  -2.08182504123e-18
maximum eigenvector centrality on G (using NUMPY):  0.47981266511
maximum eigenvector centrality on G2 (using NUMPY):  0.251937643107


In [54]:
eigenvector_centrality_numpy_TEST(G, G2)

..................COMPUTE DICT (using NUMPY):..................
COULD NOT compute eigenvector centrality dictionary for G (using NUMPY)
FINISHED computing eigenvector centrality dictionary for G2 (using NUMPY)

..................TEST (using NUMPY):..................
COULD NOT compute the list for eigenvector centrality of G from the respective dictionary (using NUMPY)
FINISHED computing the list for eigenvector centrality of G2 from the respective dictionary (using NUMPY)

the list for eigenvector centrality of G, the list for eigenvector centrality of G2, or both UNDEFINED => EXIT TEST (using NUMPY)
