In [1]:
top_directory = '/Users/iaincarmichael/Dropbox/Research/law/law-net/'

from __future__ import division

import os
import sys
import time
from math import *
import copy
import cPickle as pickle

# data
import numpy as np
import pandas as pd

# viz
import matplotlib.pyplot as plt


# graph
import igraph as ig

# our code
sys.path.append(top_directory + 'code/')
from load_data import load_and_clean_graph, case_info
from pipeline.download_data import download_bulk_resource
from pipeline.make_clean_data import *
from viz import print_describe


sys.path.append(top_directory + 'explore/vertex_metrics_experiment/code/')
from make_snapshots import *
from make_edge_df import *
from attachment_model_inference import *
from compute_ranking_metrics import *
from pipeline_helper_functions import *
from make_case_text_files import *
from bag_of_words import *
from similarity_matrix import *
from rankscore_experiment import *

# directory set up
data_dir = top_directory + 'data/'
experiment_data_dir = data_dir + 'vertex_metrics_experiment/'

court_name = 'scotus'

# jupyter notebook settings
%load_ext autoreload
%autoreload 2
%matplotlib inline

In [2]:
G = load_and_clean_graph(data_dir, court_name)

# with similarity

In [8]:

# metrics = ['indegree', 'outdegree', 'degree',
#             'd_pagerank', 'authorities', 'hubs']

metrics = ['indegree', 'outdegree', 'similarity', 'all']

include_similarity = True


test_params = {'active_years':  range(1900, 2015 + 1),
               'seed': 4332,
               'num_test_cases': 1000}

In [9]:
scores_sim = get_individual_rankscores_LR(G, test_params,
                                          metrics,
                                          include_similarity,
                                          experiment_data_dir,
                                          print_progress=True)

indegree
(1/1000) at 12:05:24
(2/1000) at 12:05:25
(4/1000) at 12:05:25
(8/1000) at 12:05:26
(16/1000) at 12:05:28
(32/1000) at 12:05:31
(64/1000) at 12:05:38
(128/1000) at 12:05:54
(256/1000) at 12:06:23
(512/1000) at 12:07:15
outdegree
(1/1000) at 12:09:03
(2/1000) at 12:09:03
(4/1000) at 12:09:04
(8/1000) at 12:09:05
(16/1000) at 12:09:07
(32/1000) at 12:09:12
(64/1000) at 12:09:20
(128/1000) at 12:09:34
(256/1000) at 12:09:58
(512/1000) at 12:10:47
similarity
(1/1000) at 12:12:25
(2/1000) at 12:12:25
(4/1000) at 12:12:26
(8/1000) at 12:12:26
(16/1000) at 12:12:28
(32/1000) at 12:12:32
(64/1000) at 12:12:38
(128/1000) at 12:12:51
(256/1000) at 12:13:15
(512/1000) at 12:14:03
all
(1/1000) at 12:15:37
(2/1000) at 12:15:37
(4/1000) at 12:15:38
(8/1000) at 12:15:39
(16/1000) at 12:15:41
(32/1000) at 12:15:45
(64/1000) at 12:15:53
(128/1000) at 12:16:20
(256/1000) at 12:16:57
(512/1000) at 12:17:56


In [15]:
scores_sim.mean(axis=0)

indegree      0.932342
outdegree     0.932782
similarity    0.930509
all           0.933654
dtype: float64

# no similarity

In [16]:

# metrics = ['indegree', 'outdegree', 'degree',
#             'd_pagerank', 'authorities', 'hubs']

metrics = ['indegree', 'outdegree', 'all']

include_similarity = False


test_params = {'active_years':  range(1900, 2015 + 1),
               'seed': 4332,
               'num_test_cases': 1000}

In [17]:
scores = get_individual_rankscores_LR(G, test_params,
                                      metrics,
                                      include_similarity,
                                      experiment_data_dir,
                                      print_progress=True)

indegree
(1/1000) at 12:31:14
(2/1000) at 12:31:14
(4/1000) at 12:31:14
(8/1000) at 12:31:15
(16/1000) at 12:31:17
(32/1000) at 12:31:20
(64/1000) at 12:31:26
(128/1000) at 12:31:37
(256/1000) at 12:31:59
(512/1000) at 12:32:41
outdegree
(1/1000) at 12:34:04
(2/1000) at 12:34:04
(4/1000) at 12:34:05
(8/1000) at 12:34:05
(16/1000) at 12:34:07
(32/1000) at 12:34:10
(64/1000) at 12:34:15
(128/1000) at 12:34:27
(256/1000) at 12:34:48
(512/1000) at 12:35:30
all
(1/1000) at 12:36:54
(2/1000) at 12:36:54
(4/1000) at 12:36:54
(8/1000) at 12:36:55
(16/1000) at 12:36:57
(32/1000) at 12:37:00
(64/1000) at 12:37:06
(128/1000) at 12:37:17
(256/1000) at 12:37:39
(512/1000) at 12:38:22


In [18]:
scores.mean(axis=0)

indegree     0.671354
outdegree    0.776584
all          0.793527
dtype: float64