In [None]:
%matplotlib inline

import numpy as np
import pandas as pd
import networkx as nx
import matplotlib.pyplot as plt
import scipy.stats as st
import seaborn as sns
sns.set(color_codes=True)

In [None]:
data = pd.read_csv('./RawDatafromJim_Complete.csv', usecols=[0,1,3,6,7,16])
data

In [None]:
data = pd.read_csv('./RawDatafromJim_Complete.csv', usecols=[0,1,3,6,7,16])
data.citing_opinion_type = data.citing_opinion_type.astype(str)
data["citing_case-opinion_type"] = data["citing_case"] + "-" + data["citing_opinion_type"]
data["cited_case-opinion_type"] = data["cited_case"] + "-0"
data = data.apply(lambda x: pd.to_numeric(x, errors='ignore'))
data["weight"]=1

In [None]:
#build case year lookups
cited_data = data.filter(['cited_case','cited_usid'], axis=1)
cited_data.drop_duplicates(inplace=True, keep='first')

In [None]:
def case_data_for_year(calc_year):
    #limit cases to based in year or before
    year_data = data.loc[data['citing_case_year'] <= calc_year]
    #build network
    G = nx.from_pandas_dataframe(year_data,"citing_case-opinion_type","cited_case-opinion_type",["weight"],nx.DiGraph())
    #calculate hub and authority
    h,a = nx.hits(G,1000,normalized=False)
    if(len(h) > 0 and len(a) > 0):
        d = []
        for key in h:
            d.append({'calc_year': calc_year, 'case':key,  'hub_raw': h[key], 'auth_raw': a[key] })
        year_case_scores = pd.DataFrame(d)
        year_case_scores['hub_percentile'] = year_case_scores.rank(pct=True)['hub_raw']
        year_case_scores['hub_rank'] = year_case_scores["hub_raw"].rank(ascending=False)
        year_case_scores['auth_percentile'] = year_case_scores.rank(pct=True)['auth_raw']
        year_case_scores['auth_rank'] = year_case_scores["auth_raw"].rank(ascending=False)
        
    else:
        return None
    return year_case_scores


In [None]:
years_to_analyize = data.cited_case_year.unique()
years_to_analyize = np.sort(years_to_analyize)[::-1]
years_to_analyize = years_to_analyize[0:10]
years_to_analyize = [2000]

In [None]:
years_to_analyize

In [None]:
case_data = pd.DataFrame()
for year in years_to_analyize:
    cdy = case_data_for_year(year)
    if(cdy is None):
        continue
    else:
        case_data = case_data.append(cdy)

In [None]:
case_data['case'], case_data['opinion_type'] = case_data['case'].str.split('-', 1).str
case_data['case_year'], _ = case_data['case'].str.split(' ',1).str
case_data = pd.merge(case_data, cited_data, left_on='case', right_on='cited_case', how='left')

In [None]:
case_data = case_data.filter(['calc_year','case','case_year','opinion_type', 'cited_usid', 'hub_rank', 'hub_percentile','auth_rank','auth_percentile','hub_raw','auth_raw'])

In [None]:
case_data.sort_values(by="hub_rank",inplace=True)
case_data

In [None]:
case_data.loc[case_data['opinion_type'] == "2"]