In [1]:
import sys

sys.path.append('../../code/')
import os
import json
from datetime import datetime
import time
from math import *

import numpy as np
import pandas as pd
import matplotlib.pyplot as plt
import scipy.stats as stats

import igraph as ig

import random as random

from collections import *

from load_data import load_citation_network_igraph, case_info

%load_ext autoreload
%autoreload 2

data_dir = '../../data/'
court_name = 'scotus'

from __future__ import division

pd.set_option('display.notebook_repr_html', False)

In [2]:
def get_snapshot_vertex_metrics(G, years, vertex_metrics):
    """
    Creates the data frames with vertex metics in given years

    Parameters
    -----------
    G: igraph network object with each node assigned a year

    years: sequence of years we want to compute

    vertex_metrics: which vertex metrics we want to compute (pagerank, indegree, etc)
    Output
    --------
    writes csv files of the vertex metric data frame for each year in years
    """

    #create a vertex df for each year T
    for T in years:
        #get subgraph at particular time
        G_T = get_network_at_time(G, T)
        
        #creates dataframe using 'name' attribute as index because it is consistent throughout
        #all truncations of the network
        df_T = pd.DataFrame(index = G_T.vs['name'])
        df_T['year'] = G_T.vs['year']
        
        #add column for each metric
        for metric in vertex_metrics:
             df_T[metric] = create_metric_column(G_T, metric) # i.e. add a column

        df_T.to_csv('vertex_metrics_' + str(T) + '.csv')

In [3]:
def get_network_at_time(G, T):
    """
    Returns the subgraph of what the network G looked like at time T (i.e. all cases upto and including time T and their citations to previous cases)

    Parameters
    ------------
    G: igraph network object where each node has a time attribute

    T: year to truncate at

    Output
    -------
    G_T: an igraph object of what the network looked like at time T
    """
    #select vertices whose year is less than or equal to T
    vertices = G.vs.select(year_le=T)
    #create a subgraph based on those vertices
    G_T = G.subgraph(vertices)
    return G_T


In [4]:
def create_metric_column(G_T, metric):
    """
    Returns an array of the pageranks for vertices in a network G_T

    Parameters
    ------------
    G_T: igraph network object where each node has a time attribute

    metric: string of the vertex metrics we want to compute (pagerank, indegree, etc)
    
    Output
    -------
    metric: an array of size G_T.vs that contains the metric for G_T's vertices
            or does not return value on invalid metric parameter
    """
    #calculates metric which matched parameter
    if metric == 'pagerank':
        metric_column = G_T.pagerank()
    elif metric == 'indegree':
        metric_column = G_T.indegree()
    else:
        return
    
    return metric_column

Testing above defs

In [5]:
#This def is not required, I just used it to make excuted code concise
def load_scotus_graph():
    G = load_citation_network_igraph(data_dir, court_name)
    all_edges = G.get_edgelist() # list of tuples
    bad_edges = []
    for edge in all_edges:
        citing_year = G.vs(edge[0])['year'][0]
        cited_year = G.vs(edge[1])['year'][0]
    
        if citing_year < cited_year:
            bad_edges.append(edge)

    G.delete_edges(bad_edges)
    return G

In [6]:
G = load_scotus_graph()
years = [year for year in range(1760, 2021) if year % 10 == 0]
metrics = ['indegree','pagerank']
get_snapshot_vertex_metrics(G, years, metrics)

0 seconds for 250465 edges
