In [16]:
import sys
sys.path.append('../../code/')
import os
import json
from datetime import datetime
import time

import numpy as np
import pandas as pd
import matplotlib.pyplot as plt
import scipy.stats as stats

import networkx as nx

from load_data import load_citation_network, case_info

import operator

%load_ext autoreload
%autoreload 2
%matplotlib inline

data_dir = '../../data/'
court_name = 'scotus'

The autoreload extension is already loaded. To reload it, use:
  %reload_ext autoreload


### Load SCOTUS Network (Directed Graph)

In [10]:
# this will be a little slow the first time you run it
G = load_citation_network(data_dir, court_name)

print 'loaded %s network with %d cases and %d edges' % (court_name, len(G.nodes()), len(G.edges()))

loaded scotus network with 33248 cases and 250465 edges


### Acquire Dictionaries for Centrality Measures

In [13]:
time1 = time.time()
close_cent_dict = nx.closeness_centrality(G)
time2  = time.time()
print 'finished computing closeness centrality dictionary: took --- %s seconds ---' % (time2-time1)
eigen_cent_dict = nx.eigenvector_centrality(G)
time3 = time.time()
print 'finished computing eigenvector centrality dictionary: took --- %s seconds ---' % (time3-time2)
between_cent_dict = nx.betweenness_centrality(G)
time4 = time.time()
print 'finished computing betweenness centrality dictionary: took --- %s seconds ---' % (time4-time3)
page_rank_dict = nx.pagerank(G)
time5 = time.time()
print 'finished computing page rank dictionary: took --- %s seconds ---' % (time5-time4)
hubs_auths_dict = nx.hits(G)
time6 = time.time()
print 'finished computing hubs dictionary and authorities dictionary: took --- %s seconds ---' % (time6-time5)

finished computing closeness centrality dictionary: took --- 568.20600009 seconds ---
finished computing eigenvector centrality dictionary: took --- 18.8650000095 seconds ---
finished computing betweenness centrality dictionary: took --- 5141.90199995 seconds ---
finished computing page rank dictionary: took --- 4.35199999809 seconds ---
finished computing hubs dictionary and authorities dictionary: took --- 34.6640000343 seconds ---


### Top 10 In-Degree for SCOTUS (Directed)

In [44]:
ind = sorted(G.in_degree().iteritems(), key=operator.itemgetter(1), reverse=True)[:10]
ind_cases = [x[0] for x in ind]
ind_values = [x[1] for x in ind]
ind_dates = [G.node[each_case]['date'] for each_case in ind_cases]

data = {'Case ID': ind_cases,
        'In-Degree': ind_values,
        'Date': ind_dates
       }
df = pd.DataFrame(data, columns = ['In-Degree', 'Case ID', 'Date'],
                 index=[1,2,3,4,5,6,7,8,9,10])
df

Unnamed: 0,In-Degree,Case ID,Date
1,608,96405,1906-02-19
2,294,109532,1976-07-22
3,267,107252,1966-06-13
4,234,91573,1886-02-01
5,229,106545,1963-03-18
6,220,111221,1984-06-25
7,214,102605,1936-02-17
8,200,103012,1938-04-25
9,198,103355,1940-05-20
10,186,106761,1964-03-09


### Top 10 Out-Degree for SCOTUS (Directed)

In [46]:
outd = sorted(G.out_degree().iteritems(), key=operator.itemgetter(1), reverse=True)[:10]
outd_cases = [x[0] for x in outd]
outd_values = [x[1] for x in outd]
outd_dates = [G.node[each_case]['date'] for each_case in outd_cases]

data = {'Case ID': outd_cases,
        'Out-Degree': outd_values,
        'Date': outd_dates
       }
df = pd.DataFrame(data, columns = ['Out-Degree', 'Case ID', 'Date'],
                 index=[1,2,3,4,5,6,7,8,9,10])
df

Unnamed: 0,Out-Degree,Case ID,Date
1,193,105210,1954-05-03
2,182,104616,1949-02-14
3,178,106366,1962-03-26
4,163,102224,1934-03-05
5,163,108329,1971-05-03
6,158,101864,1932-02-23
7,154,106267,1961-10-09
8,153,106548,1963-03-18
9,150,97966,1913-06-09
10,143,108221,1970-12-21


### Top 10 Closeness Centrality for SCOTUS (Directed)

In [37]:
ccd = sorted(close_cent_dict.iteritems(), key=operator.itemgetter(1), reverse=True)[:10]
ccd_cases = [x[0] for x in ccd]
ccd_values = [x[1] for x in ccd]
ccd_dates = [G.node[each_case]['date'] for each_case in ccd_cases]
ccd_indegs = [G.in_degree(each_case) for each_case in ccd_cases]
ccd_outdegs = [G.out_degree(each_case) for each_case in ccd_cases]

data = {'Case ID': ccd_cases,
        'Closeness Centrality': ccd_values,
        'Date': ccd_dates,
        'In-Degree': ccd_indegs,
        'Out-Degree': ccd_outdegs
       }
df = pd.DataFrame(data, columns = ['Closeness Centrality', 'Case ID', 'Date', 'In-Degree', 'Out-Degree'],
                 index=[1,2,3,4,5,6,7,8,9,10])
df

Unnamed: 0,Closeness Centrality,Case ID,Date,In-Degree,Out-Degree
1,0.186302,1741,2010-01-21,4,135
2,0.186093,803267,2012-06-28,0,118
3,0.186093,809122,2012-06-28,0,118
4,0.183343,118011,1996-03-27,42,134
5,0.182886,149702,2010-06-28,8,122
6,0.182269,145795,2008-06-12,7,99
7,0.179324,149008,2010-06-21,4,68
8,0.178671,131149,2003-12-10,15,102
9,0.177511,145706,2007-06-25,9,67
10,0.176388,1269289,2001-06-28,12,50


### Top 10 Eigenvector Centrality for SCOTUS (Directed)

In [36]:
ecd = sorted(eigen_cent_dict.iteritems(), key=operator.itemgetter(1), reverse=True)[:10]
ecd_cases = [x[0] for x in ecd]
ecd_values = [x[1] for x in ecd]
ecd_dates = [G.node[each_case]['date'] for each_case in ecd_cases]
ecd_indegs = [G.in_degree(each_case) for each_case in ecd_cases]
ecd_outdegs = [G.out_degree(each_case) for each_case in ecd_cases]

data = {'Case ID': ecd_cases,
        'Eigenvector Centrality': ecd_values,
        'Date': ecd_dates,
        'In-Degree': ecd_indegs,
        'Out-Degree': ecd_outdegs
       }
df = pd.DataFrame(data, columns = ['Eigenvector Centrality', 'Case ID', 'Date', 'In-Degree', 'Out-Degree'],
                 index=[1,2,3,4,5,6,7,8,9,10])
df

Unnamed: 0,Eigenvector Centrality,Case ID,Date,In-Degree,Out-Degree
1,0.371655,85131,1816-02-16,181,0
2,0.353776,85534,1827-03-12,124,0
3,0.279975,86696,1852-03-18,133,3
4,0.239934,85160,1816-03-20,94,0
5,0.239103,85283,1820-02-16,27,1
6,0.233147,87748,1866-01-29,65,1
7,0.230106,89173,1876-01-17,78,2
8,0.189831,86480,1849-02-18,66,10
9,0.186612,88605,1873-03-18,60,0
10,0.153327,88075,1869-11-18,49,0


### Top 10 Betweenness Centrality for SCOTUS (Directed)

In [35]:
bcd = sorted(between_cent_dict.iteritems(), key=operator.itemgetter(1), reverse=True)[:10]
bcd_cases = [x[0] for x in bcd]
bcd_values = [x[1] for x in bcd]
bcd_dates = [G.node[each_case]['date'] for each_case in bcd_cases]
bcd_indegs = [G.in_degree(each_case) for each_case in bcd_cases]
bcd_outdegs = [G.out_degree(each_case) for each_case in bcd_cases]

data = {'Case ID': bcd_cases,
        'Betweenness Centrality': bcd_values,
        'Date': bcd_dates,
        'In-Degree': bcd_indegs,
        'Out-Degree': bcd_outdegs
       }
df = pd.DataFrame(data, columns = ['Betweenness Centrality', 'Case ID', 'Date', 'In-Degree', 'Out-Degree'],
                 index=[1,2,3,4,5,6,7,8,9,10])
df

Unnamed: 0,Betweenness Centrality,Case ID,Date,In-Degree,Out-Degree
1,0.00318,102605,1936-02-17,214,103
2,0.00308,101864,1932-02-23,120,158
3,0.002482,106366,1962-03-26,161,178
4,0.00237,103012,1938-04-25,200,49
5,0.002271,97966,1913-06-09,140,150
6,0.002236,101894,1932-04-11,78,133
7,0.002167,106545,1963-03-18,229,50
8,0.002022,106170,1961-02-20,133,85
9,0.001794,104616,1949-02-14,15,182
10,0.001769,104894,1951-04-30,86,138


### Top 10 Page Rank for SCOTUS (Directed)

In [34]:
prd = sorted(page_rank_dict.iteritems(), key=operator.itemgetter(1), reverse=True)[:10]
prd_cases = [x[0] for x in prd]
prd_values = [x[1] for x in prd]
prd_dates = [G.node[each_case]['date'] for each_case in prd_cases]
prd_indegs = [G.in_degree(each_case) for each_case in prd_cases]
prd_outdegs = [G.out_degree(each_case) for each_case in prd_cases]

data = {'Case ID': prd_cases,
        'Page Rank': prd_values,
        'Date': prd_dates,
        'In-Degree': prd_indegs,
        'Out-Degree': prd_outdegs
       }
df = pd.DataFrame(data, columns = ['Page Rank', 'Case ID', 'Date', 'In-Degree', 'Out-Degree'],
                 index=[1,2,3,4,5,6,7,8,9,10])
df

Unnamed: 0,Page Rank,Case ID,Date,In-Degree,Out-Degree
1,0.001539,85131,1816-02-16,181,0
2,0.001348,91573,1886-02-01,234,0
3,0.001194,85534,1827-03-12,124,0
4,0.000977,88661,1873-04-14,124,0
5,0.000895,85160,1816-03-20,94,0
6,0.000883,89675,1878-01-18,107,3
7,0.000859,98094,1914-02-24,138,11
8,0.000854,88804,1874-01-30,93,0
9,0.000847,85330,1821-03-18,157,0
10,0.000752,87010,1856-02-19,91,7


### Top 10 Hubs for SCOTUS (Directed)

In [39]:
hd = sorted(hubs_auths_dict[0].iteritems(), key=operator.itemgetter(1), reverse=True)[:10]
hd_cases = [x[0] for x in hd]
hd_values = [x[1] for x in hd]
hd_dates = [G.node[each_case]['date'] for each_case in hd_cases]
hd_indegs = [G.in_degree(each_case) for each_case in hd_cases]
hd_outdegs = [G.out_degree(each_case) for each_case in hd_cases]

data = {'Case ID': hd_cases,
        'Hubs': hd_values,
        'Date': hd_dates,
        'In-Degree': hd_indegs,
        'Out-Degree': hd_outdegs
       }
df = pd.DataFrame(data, columns = ['Hubs', 'Case ID', 'Date', 'In-Degree', 'Out-Degree'],
                 index=[1,2,3,4,5,6,7,8,9,10])
df

Unnamed: 0,Hubs,Case ID,Date,In-Degree,Out-Degree
1,0.002173,108611,1972-06-29,52,133
2,0.002,107082,1965-06-07,104,105
3,0.0019,109380,1976-02-27,159,127
4,0.001889,106267,1961-10-09,25,154
5,0.001831,108839,1973-10-09,81,125
6,0.001769,108798,1973-05-29,39,86
7,0.001766,109505,1976-10-04,49,80
8,0.001761,109836,1978-06-26,70,90
9,0.00176,149702,2010-06-28,8,122
10,0.001679,108329,1971-05-03,52,163


### Top 10 Authorities for SCOTUS (Directed)

In [41]:
ad = sorted(hubs_auths_dict[1].iteritems(), key=operator.itemgetter(1), reverse=True)[:10]
ad_cases = [x[0] for x in ad]
ad_values = [x[1] for x in ad]
ad_dates = [G.node[each_case]['date'] for each_case in ad_cases]
ad_indegs = [G.in_degree(each_case) for each_case in ad_cases]
ad_outdegs = [G.out_degree(each_case) for each_case in ad_cases]

data = {'Case ID': ad_cases,
        'Authorities': ad_values,
        'Date': ad_dates,
        'In-Degree': ad_indegs,
        'Out-Degree': ad_outdegs
       }
df = pd.DataFrame(data, columns = ['Authorities', 'Case ID', 'Date', 'In-Degree', 'Out-Degree'],
                 index=[1,2,3,4,5,6,7,8,9,10])
df

Unnamed: 0,Authorities,Case ID,Date,In-Degree,Out-Degree
1,0.00373,103355,1940-05-20,198,9
2,0.003242,106514,1963-01-14,175,63
3,0.003122,103243,1939-11-22,150,10
4,0.003095,106761,1964-03-09,186,43
5,0.003023,103347,1940-04-22,151,26
6,0.002707,105746,1958-06-30,169,25
7,0.002447,105751,1958-06-30,151,56
8,0.002431,102991,1938-03-28,112,18
9,0.002395,103870,1943-06-14,143,17
10,0.002288,101097,1927-05-16,135,59
