In [1]:
# makes plots appear in ipython
%matplotlib inline

In [2]:
__author__ = 'michael'

import sys
sys.path.insert(0, '../python_code')

import helper_functions
import pandas as pd
import viz_functions_michael as viz
import networkx as nx
import matplotlib.pyplot as plt
import os
import datetime
from operator import itemgetter
import numpy as np
import calendar
from __future__ import division
import time
from collections import OrderedDict
import math
import json
import webbrowser

proj_cwd = os.path.dirname(os.getcwd())
data_dir = os.path.join(proj_cwd, 'data')
data_scotus_dir = os.path.join(data_dir, 'scotus') ## for reading the edgelist and nodelist data
if not os.path.exists(data_scotus_dir):
    os.makedirs(data_scotus_dir)
doc_dir = os.path.join(proj_cwd, 'docs') ## for saving the plots later
doc_scotus_dir = os.path.join(doc_dir, 'scotus') ## for saving the plots under 'scotus' folder in 'docs'
if not os.path.exists(doc_scotus_dir):
    os.makedirs(doc_scotus_dir)

###############
# BUILD A GRAPH
###############
# Load data from the CSVs
edgelist_data = helper_functions.csv_to_list(data_scotus_dir,
                                             'citations_sublist.csv',
                                             1, 0)
node_data = helper_functions.csv_to_list(data_scotus_dir,
                                         'consolidation.csv',
                                         1, 0)

# Instantiate a directed graph object, D
D = nx.DiGraph()

# Add our nodes to D
for row in node_data:
    # It is really easy to add arbitrary info about each node or edge. For example, here, I load each node with a
    # date, judges and citation_id attribute.
    case_id = int(row[0])
    month, day, year = ['', '', ''] if row[3] is '' else [int(element) for element in row[3].rsplit('/')]
    file_date = '' if month is '' else datetime.date(year=year, month=month, day=day)
    judges = row[4]
    citation_id = '' if row[5] is '' else int(row[5])
    D.add_node(case_id,
               date=file_date,
               judges=judges,
               citation_id=citation_id,
               year=year)


for row in edgelist_data:
    citer = row[0]
    cited = row[1]
    # Edges point from citer to cited -- so the node with the highest in degree represents the most cited decision
    D.add_edge(int(row[0]), int(row[1]), random_attribute = 'random_string')

In [3]:
G = D.copy()

nodes_to_delete = []
for each_node in G.nodes():
    if not (1980 <= G.node[each_node]['year'] <= 1989):
        nodes_to_delete.append(each_node)

G.remove_nodes_from(nodes_to_delete)

years = []
cases_without_years = []
for each_node in G.nodes():
    if G.node[each_node]['year'] == '':
        cases_without_years.append(each_node)
    else:
        years.append(G.node[each_node]['year'])

print 'getting 1980s SCOTUS done'
print 'number of nodes (cases) in 1980s SCOTUS: ', len(G)
print 'number of edges in 1980s SCOTUS: ', G.number_of_edges()
print ''

print 'FOR DIRECTED GRAPHS'
time1 = time.time()
close_cent_dict_G = nx.closeness_centrality(G)
time2 = time.time()
print 'finished computing closeness centrality dictionary: took --- %s seconds ---' % (time2-time1)
eigen_cent_dict_G = nx.eigenvector_centrality_numpy(G) ## eigenvector_centrality(G) runs into convergence error
time3 = time.time()
print 'finished computing eigenvector centrality dictionary: took --- %s seconds ---' % (time3-time2)
between_cent_dict_G = nx.betweenness_centrality(G)
time4 = time.time()
print 'finished computing betweenness centrality dictionary: took --- %s seconds ---' % (time4-time3)
harmonic_cent_dict_G = nx.harmonic_centrality(G)
time5 = time.time()
print 'finished computing harmonic centrality dictionary: took --- %s seconds ---' % (time5-time4)

print ''
G2 = G.to_undirected()
print 'FOR UNDIRECTED GRAPHS'
time1 = time.time()
close_cent_dict_G2 = nx.closeness_centrality(G2)
time2 = time.time()
print 'finished computing closeness centrality dictionary: took --- %s seconds ---' % (time2-time1)
eigen_cent_dict_G2 = nx.eigenvector_centrality_numpy(G2) ## to be parallel with above
time3 = time.time()
print 'finished computing eigenvector centrality dictionary: took --- %s seconds ---' % (time3-time2)
between_cent_dict_G2 = nx.betweenness_centrality(G2)
time4 = time.time()
print 'finished computing betweenness centrality dictionary: took --- %s seconds ---' % (time4-time3)
harmonic_cent_dict_G2 = nx.harmonic_centrality(G2)
time5 = time.time()
print 'finished computing harmonic centrality dictionary: took --- %s seconds ---' % (time5-time4)

# Check if values are the same between directed and undirected
close_cent_list_G = [close_cent_dict_G[n] for n in G.nodes()]
close_cent_list_G2 = [close_cent_dict_G2[n] for n in G.nodes()]
print close_cent_list_G == close_cent_list_G2

eigen_cent_list_G = [eigen_cent_dict_G[n] for n in G.nodes()]
eigen_cent_list_G2 = [eigen_cent_dict_G2[n] for n in G.nodes()]
print eigen_cent_list_G == eigen_cent_list_G2

between_cent_list_G = [between_cent_dict_G[n] for n in G.nodes()]
between_cent_list_G2 = [between_cent_dict_G2[n] for n in G.nodes()]
print between_cent_list_G == between_cent_list_G2

harmonic_cent_list_G = [harmonic_cent_dict_G[n] for n in G.nodes()]
harmonic_cent_list_G2 = [harmonic_cent_dict_G2[n] for n in G.nodes()]
print harmonic_cent_list_G == harmonic_cent_list_G2


getting 1980s SCOTUS done
number of nodes (cases) in 1980s SCOTUS:  2186
number of edges in 1980s SCOTUS:  7425

FOR DIRECTED GRAPHS
finished computing closeness centrality dictionary: took --- 0.292999982834 seconds ---
finished computing eigenvector centrality dictionary: took --- 0.249000072479 seconds ---
finished computing betweenness centrality dictionary: took --- 8.63199996948 seconds ---
finished computing harmonic centrality dictionary: took --- 0.371999979019 seconds ---

FOR UNDIRECTED GRAPHS
finished computing closeness centrality dictionary: took --- 8.01999998093 seconds ---
finished computing eigenvector centrality dictionary: took --- 0.0279998779297 seconds ---
finished computing betweenness centrality dictionary: took --- 24.1700000763 seconds ---
finished computing harmonic centrality dictionary: took --- 7.71000003815 seconds ---
False
False
False
False
