In [75]:
import sys

sys.path.append('../../code/')
import os
import json
from datetime import datetime
import time
from math import *

import numpy as np
import pandas as pd
import matplotlib.pyplot as plt
import scipy.stats as stats

import igraph as ig

from collections import *
import random

from load_data import load_citation_network_igraph, case_info

%load_ext autoreload
%autoreload 2
%matplotlib inline

data_dir = '../../data/'
court_name = 'scotus'

The autoreload extension is already loaded. To reload it, use:
  %reload_ext autoreload


In [76]:
# this will be a little slow the first time you run it
G = load_citation_network_igraph(data_dir, court_name)

print 'loaded %s network with %d cases and %d edges' % (court_name, len(G.vs), len(G.es))

1 seconds for 250465 edges
loaded scotus network with 33248 cases and 250465 edges


In [77]:
def make_subgraph_dict(G):
    time1 = time.time()

    subgraph_dict = {}
    min_year = 1754
    max_year = 2016

    for i in range (min_year,max_year+2):
        sub_vs = G.vs.select(year_lt=i)
        sub_G = G.subgraph(sub_vs)
        #each year in this dict is the subgraph of cases strictly before that year
        subgraph_dict[i] = sub_G

    time2 = time.time()
    print "Making sub-graph dict took " + str(time2-time1) + " seconds"
    return subgraph_dict

In [78]:
def time_decay_indegree(graph, vertex, threshold=10):
    td_indeg = 0
    vertex_year = vertex["year"]
    neighbors = graph.neighbors(vertex.index, mode='IN')
    for neighbor in neighbors:
        neighbor_year = graph.vs[neighbor]["year"]
        if neighbor_year - vertex_year <= threshold:
            td_indeg += 1
    return td_indeg

In [79]:
def make_case_dict(subgraph_dict, metric="indegree"):
    time1 = time.time()

    case_tuple_dict = {}
    min_year = 1754
    max_year = 2016
    
    for i in range (min_year,max_year+2):
        sub_G = subgraph_dict[i]

        tuple_list = []
        igraph_index_list = []
        name_list = []
        year_list = []
        metric_list = []
        
        if metric == "pagerank":
            metric_list = sub_G.pagerank()

        for j in range(0,len(sub_G.vs)):
            vertex = sub_G.vs[j]
            igraph_index_list.append(vertex.index)
            name_list.append(vertex['name'])
            year_list.append(vertex['year'])
            if metric == "indegree":
                metric_list.append(vertex.indegree())
            if metric == "timedecay":
                metric_list.append(time_decay_indegree(sub_G, vertex, 10))
        tuple_list = zip(igraph_index_list, name_list, year_list, metric_list)

        #sorts the tuples by their metric so each case's rank is now its index + 1
        sorted_tuple_list = sorted(tuple_list, key=lambda tup: tup[3], reverse=True)

        case_tuple_dict[i] = sorted_tuple_list

    time2 = time.time()
    print "Making sorted case tuples for " + metric + " took " + str(time2-time1) + " seconds"
    return case_tuple_dict

In [80]:
def calculate_score_for_case(case_index, past_cases_dict):
    all_past_cases = past_cases_dict[G.vs[case_index]['year']]
    
    neighbors = G.neighbors(case_index, mode='OUT')
    neighbors_names = [G.vs[i]['name'] for i in neighbors]
    
    ranks = [i+1.0 for i, v in enumerate(all_past_cases) if v[1] in neighbors_names]

    scores = []
    for some_rank in ranks:
        some_score = 1 - some_rank/len(all_past_cases)
        scores.append(some_score)

    final_score = sum(scores)
    return final_score

In [81]:
def calculate_score_from_case_dict(G, case_tuple_dict):
    time1 = time.time()

    score_M = 0
    for i in G.vs():
        score_M += calculate_score_for_case(i.index, case_tuple_dict)

    time2 = time.time()
    print "Total score was: " + str(score_M)
    print "This took " + str(time2-time1) + " seconds"
    return score_M

In [84]:
subgraph_dict = make_subgraph_dict(G)
case_dict_indegree  = make_case_dict(subgraph_dict, "indegree")
case_dict_pagerank  = make_case_dict(subgraph_dict, "pagerank")
case_dict_timedecay = make_case_dict(subgraph_dict, "timedecay")

Making sub-graph dict took 19.236000061 seconds
Making sorted case tuples for indegree took 43.4519999027 seconds
Making sorted case tuples for pagerank took 86.5289998055 seconds
Making sorted case tuples for timedecay took 81.6449999809 seconds


In [85]:
print "indegree"
indegree_score = calculate_score_from_case_dict(G, case_dict_indegree)

indegree
Total score was: 174094.901772
This took 233.620000124 seconds


In [86]:
print "pagerank"
pagerank_score = calculate_score_from_case_dict(G, case_dict_pagerank)

pagerank
Total score was: 149192.224771
This took 148.082000017 seconds


In [87]:
print "indegree with time decay"
timedecay_score = calculate_score_from_case_dict(G, case_dict_timedecay)

indegree with time decay
Total score was: 180027.49242
This took 143.429000139 seconds
