In [1]:
%matplotlib inline

import numpy as np
import pandas as pd
import networkx as nx
import matplotlib.pyplot as plt
import scipy.stats as st
import seaborn as sns
sns.set(color_codes=True)

In [6]:
data = pd.read_csv('./RawDatafromJim_Complete.csv', usecols=[0,1,3,6,7,16])
data = data.apply(lambda x: pd.to_numeric(x, errors='ignore'))
data["weight"]=1

In [55]:
#build case year lookups
citing_data = data.filter(['citing_case','citing_case_year','citing_opinion_type'], axis=1)
citing_data.drop_duplicates(inplace=True, keep='first')
cited_data = data.filter(['cited_case','cited_case_year','cited_usid'], axis=1)
cited_data.drop_duplicates(inplace=True, keep='first')
#case_info = pd.merge(cited_data, citing_data, left_on='cited_case', right_on='citing_case', how='left')
#case_info.drop_duplicates(inplace=True, keep='first')
#case_info

In [56]:
def case_data_for_year(calc_year):
    #limit cases to based in year or before
    year_data = data.loc[data['citing_case_year'] <= calc_year]
    #build network
    G = nx.from_pandas_dataframe(year_data,"citing_case","cited_case",["weight"],nx.DiGraph())
    #calculate hub and authority
    h,a = nx.hits(G,1000,normalized=False)
    if(len(h) > 0 and len(a) > 0):
        d = []
        for key in h:
            d.append({'calc_year': calc_year, 'case':key,  'hub_raw': h[key], 'auth_raw': a[key] })
        year_case_scores = pd.DataFrame(d)
        year_case_scores['hub_percentile'] = year_case_scores.rank(pct=True)['hub_raw']
        year_case_scores['hub_rank'] = year_case_scores["hub_raw"].rank(ascending=False)
        year_case_scores['auth_percentile'] = year_case_scores.rank(pct=True)['auth_raw']
        year_case_scores['auth_rank'] = year_case_scores["auth_raw"].rank(ascending=False)
        
    else:
        return None
    return year_case_scores


In [57]:
#years_to_analyize = data.cited_case_year.unique()
#years_to_analyize.sort()
#years_to_analyize = years_to_analyize[0:10]
years_to_analyize = [1800]

In [58]:
case_data = pd.DataFrame()
for year in years_to_analyize:
    cdy = case_data_for_year(year)
    if(cdy is None):
        continue
    else:
        case_data = case_data.append(cdy)

In [59]:
case_data = pd.merge(case_data, citing_data, left_on='case', right_on='citing_case', how='left')
case_data = pd.merge(case_data, cited_data, left_on='case', right_on='cited_case', how='left')

In [61]:
case_data = case_data.filter(['calc_year','case','cited_case_year','citing_opinion_type', 'cited_usid', 'hub_rank', 'hub_percentile','auth_rank','auth_percentile','hub_raw','auth_raw'])

In [62]:
case_data.sort_values(by="hub_rank",inplace=True)
case_data

Unnamed: 0,calc_year,case,cited_case_year,citing_opinion_type,cited_usid,hub_rank,hub_percentile,auth_rank,auth_percentile,hub_raw,auth_raw
34,1800,1800 U.S. LEXIS 304,1800.0,0.0,4 U.S. 22,1.0,1.0,24.5,0.328947,1.0,3.743738e-37
14,1800,1796 U.S. LEXIS 403,,0.0,,2.0,0.973684,33.0,0.328947,0.420599,0.0
20,1800,1797 U.S. LEXIS 202,,0.0,,3.0,0.947368,33.0,0.328947,0.3842752,0.0
18,1800,1797 U.S. LEXIS 201,1797.0,0.0,3 U.S. 339,4.0,0.921053,33.0,0.328947,0.3377041,0.0
31,1800,1800 U.S. LEXIS 300,1800.0,0.0,4 U.S. 20,5.0,0.894737,6.0,0.868421,0.2758104,0.4162778
15,1800,1797 U.S. LEXIS 200,1797.0,0.0,3 U.S. 336,6.0,0.868421,10.5,0.75,0.2596637,0.1963051
22,1800,1797 U.S. LEXIS 206,1797.0,0.0,3 U.S. 369,7.0,0.842105,33.0,0.328947,0.1572463,0.0
28,1800,1799 U.S. LEXIS 245,1799.0,0.0,4 U.S. 7,8.0,0.815789,3.5,0.934211,0.1402492,0.5310916
11,1800,1796 U.S. LEXIS 401,1796.0,0.0,3 U.S. 285,9.0,0.789474,24.5,0.328947,0.06613763,3.743738e-37
17,1800,1796 U.S. LEXIS 411,,0.0,,10.0,0.763158,33.0,0.328947,0.05898867,0.0
