In [1]:
import pandas as pd
import numpy as np

from scipy import sparse
from sklearn.metrics.pairwise import pairwise_distances, cosine_similarity

np.random.seed(42)

In [2]:
# Appeals court data from the 9th circuit
    
cir=pd.read_pickle('./circuit')

In [3]:
# District court data from all district courts

all_dis=pd.read_pickle('./alldistrict')

In [4]:
all_dis.shape

(260, 39)

In [5]:
# Get IDs of appellate court opinions

cir_ids=list(cir['id'])

In [6]:
# Get IDs of trial court opinions from a broader set of jurisdictions

all_dis_ids=list(all_dis['id'])

In [7]:
# Get the connections table, which lists all citation links between all court cases in U.S. federal jurisprudence

connections = pd.read_csv('./all.csv')

In [8]:
# We're interested how 9th circuit cases are cited by other 9th circuit cases on the same topic, and by 
#district courts throughout the whole U.S. that are dealing with this topic.

appel_and_cir=all_dis_ids+cir_ids

In [9]:
# Filter the citation table to those where the cited opinion is in the list of 9th circuit cases we pulled
# and the citing opinion is in the list of 9th circuit and district court cases we pulled.

all_connections_list=connections.loc[(connections['citing_opinion_id'].isin(appel_and_cir))&
(connections['cited_opinion_id'].isin(cir_ids))]


In [10]:
all_connections_list.shape

(2173, 2)

In [11]:
# Mark every citation as if it were a user giving a recommendation of "1" to the cited case.

all_connections_list=all_connections_list.copy()
all_connections_list['link']=1

In [12]:
# Merge in data on every instance of a cited opinion from the 9th circuit

all_connections_list=pd.merge(all_connections_list, 
         cir, 
         left_on='cited_opinion_id',
         right_on='id')

In [13]:
# Convert from a list to the string contained in the list


all_connections_list['citation']=all_connections_list['citation'].map(lambda x: x[0])

In [14]:
# Create a pivot table from the list of citation linkages


all_pivot=pd.pivot_table(all_connections_list, 
                    index='citing_opinion_id',
                    columns='citation',
                    values='link')


In [15]:
all_pivot.shape

(464, 141)

In [16]:
# Rotate so that the 146 different circuit court decisions are the rows, and the 
# 627 citing  cases are features

all_pivot=all_pivot.T

In [17]:
all_pivot.shape

(141, 464)

In [18]:
# Convert pivot to sparse matrix and fill in NAs with 0s

all_sparse_pivot=sparse.csr_matrix(all_pivot.fillna(0))

In [19]:
all_sparse_pivot.shape

(141, 464)

In [20]:
# Calculate distance between each circuit court case

all_distances=pairwise_distances(all_sparse_pivot, metric='cosine')

In [21]:
all_distances.shape

(141, 141)

In [22]:
# "Recommender table" showing how different the citation patterns are between one circuit court case and another 

broader_cir_distance=pd.DataFrame(all_distances, index=all_pivot.index, columns=all_pivot.index)

In [23]:
broader_cir_distance.head()

citation,100 F.3d 1443,109 F.3d 1484,109 F.3d 521,111 F.3d 1447,117 F.3d 1520,123 F.3d 1142,126 F.3d 1118,14 F.3d 1324,142 F.3d 1170,143 F.3d 515,...,952 F.2d 297,954 F.2d 1441,958 F.2d 290,96 F.3d 434,97 F.3d 1161,982 F.2d 1342,985 F.2d 1397,986 F.2d 1568,99 F.3d 334,998 F.2d 699
citation,Unnamed: 1_level_1,Unnamed: 2_level_1,Unnamed: 3_level_1,Unnamed: 4_level_1,Unnamed: 5_level_1,Unnamed: 6_level_1,Unnamed: 7_level_1,Unnamed: 8_level_1,Unnamed: 9_level_1,Unnamed: 10_level_1,Unnamed: 11_level_1,Unnamed: 12_level_1,Unnamed: 13_level_1,Unnamed: 14_level_1,Unnamed: 15_level_1,Unnamed: 16_level_1,Unnamed: 17_level_1,Unnamed: 18_level_1,Unnamed: 19_level_1,Unnamed: 20_level_1,Unnamed: 21_level_1
100 F.3d 1443,0.0,1.0,0.857143,1.0,0.92364,0.840833,0.812956,0.838376,1.0,0.776895,...,1.0,0.857143,0.840281,0.878171,0.857143,1.0,1.0,0.859087,0.917521,0.96182
109 F.3d 1484,1.0,0.0,1.0,1.0,1.0,1.0,1.0,1.0,1.0,1.0,...,1.0,1.0,1.0,1.0,1.0,1.0,1.0,0.835601,1.0,1.0
109 F.3d 521,0.857143,1.0,0.0,1.0,0.933185,0.767881,0.890891,0.858579,1.0,0.960957,...,1.0,0.9375,1.0,0.9467,1.0,1.0,1.0,0.917801,1.0,1.0
111 F.3d 1447,1.0,1.0,1.0,0.0,1.0,1.0,1.0,1.0,1.0,1.0,...,1.0,1.0,1.0,1.0,1.0,1.0,1.0,1.0,1.0,1.0
117 F.3d 1520,0.92364,1.0,0.933185,1.0,0.0,0.900742,1.0,0.735425,1.0,0.916522,...,1.0,1.0,1.0,1.0,1.0,1.0,1.0,0.912125,1.0,1.0


In [24]:
'848 F.2d 1441' in list(broader_cir_distance.columns)

True

In [None]:
# Here are the cases that are most similar to 848 F.2d 1441, Conner v. Burford.
# Case available at https://elr.info/sites/default/files/litigation/15.20608.htm

(broader_cir_distance['848 F.2d 1441']).sort_values(ascending=True)

In [26]:
broader_cir_distance.to_pickle('./case_recommender')