In [1]:
# import dependencies
import pandas as pd
import numpy as np
import pickle
import warnings
from sklearn.model_selection import cross_val_score, train_test_split
from sklearn.ensemble import RandomForestClassifier, VotingClassifier
from sklearn.linear_model import LogisticRegression
from sklearn.naive_bayes import GaussianNB
from sklearn.svm import SVC
from sklearn.metrics import confusion_matrix, classification_report

warnings.filterwarnings("ignore")

### Initial data setup

In [2]:
# read in prediction case data csv
df_predict = pd.read_csv("../Resources/cases_for_prediction.csv" , index_col=0)
df_predict

Unnamed: 0,caseId,caseName,term,precedentAlteration,issueArea,lawType,caseOriginState,decisionDirection
0,1953-069,BROWN et al. v. BOARD OF EDUCATION OF TOPEKA e...,1953,1.0,2.0,2.0,60.0,2.0
1,1960-133,MAPP v. OHIO,1960,1.0,1.0,2.0,41.0,2.0
2,1962-058,"GIDEON v. WAINWRIGHT, CORRECTIONS DIRECTOR",1962,1.0,1.0,2.0,12.0,2.0
3,1965-122,MIRANDA v. ARIZONA,1965,1.0,1.0,2.0,4.0,2.0
4,1968-043,TINKER et al. v. DES MOINES INDEPENDENT COMMUN...,1968,0.0,3.0,2.0,60.0,2.0
5,1972-048,"ROE et al. v. WADE, DISTRICT ATTORNEY OF DALLA...",1972,0.0,5.0,2.0,60.0,2.0
6,1973-172,"UNITED STATES v. NIXON, PRESIDENT OF THE UNITE...",1973,0.0,1.0,9.0,60.0,1.0
7,1977-147,REGENTS OF THE UNIVERSITY OF CALIFORNIA v. BAKKE,1977,0.0,2.0,2.0,6.0,1.0
8,1984-022,NEW JERSEY v. T. L. O.,1984,0.0,1.0,2.0,35.0,1.0
9,1987-019,HAZELWOOD SCHOOL DISTRICT et al. v. KUHLMEIER ...,1987,0.0,3.0,2.0,60.0,1.0


### Pre-Processing

In [3]:
# define possible values for each feature to use in one-hot encoding
# to ensure # of input features is consistent in all cases
unique_term = [1953, 1960, 1962, 1965, 1968, 1972, 1973, 1977, 1984] + [x for x in range(1987, 2019)]
unique_issueArea = [x for x in range(1,11)] + [12, 13, 14]
unique_lawType = [x for x in range(1,7)] + [8,9,10]
unique_caseOriginState = [0, 1, 2, 4, 5, 6, 7, 8, 9, 10, 12, 13, 14, 15, 16, 17, 18, 19, 20, 21, 
                          22, 23, 25, 26, 27, 28, 29, 30, 31, 32, 33, 34, 35, 36, 37, 38, 39, 41, 
                          42, 43, 45, 46, 47, 48, 49, 50, 51, 52, 53, 55, 56, 57, 58, 59, 60]

In [4]:
# Convert each column to "dummies" of any possible value (so # of columns/features is consistent with models) 

possibilites = pd.Series(unique_term)
exists = df_predict["term"].tolist()
difference = pd.Series([item for item in possibilites if item not in exists])
target = df_predict["term"].append(pd.Series(difference))
target = target.reset_index(drop=True)
dummies_term = pd.get_dummies(target, prefix="term")
dummies_term = dummies_term.drop(dummies_term.index[list(range(len(dummies_term)-len(difference), len(dummies_term)))])
# dummies_term.head()

possibilites = pd.Series(unique_issueArea)
exists = df_predict["issueArea"].tolist()
difference = pd.Series([item for item in possibilites if item not in exists])
target = df_predict["issueArea"].append(pd.Series(difference))
target = target.reset_index(drop=True)
dummies_issueArea = pd.get_dummies(target, prefix="issueArea")
dummies_issueArea = dummies_issueArea.drop(dummies_issueArea.index[list(range(len(dummies_issueArea)-len(difference), len(dummies_issueArea)))])
# dummies_issueArea.head()

possibilites = pd.Series(unique_lawType)
exists = df_predict["lawType"].tolist()
difference = pd.Series([item for item in possibilites if item not in exists])
target = df_predict["lawType"].append(pd.Series(difference))
target = target.reset_index(drop=True)
dummies_lawType = pd.get_dummies(target, prefix="lawType")
dummies_lawType = dummies_lawType.drop(dummies_lawType.index[list(range(len(dummies_lawType)-len(difference), len(dummies_lawType)))])
# dummies_lawType.tail()

possibilites = pd.Series(unique_caseOriginState)
exists = df_predict["caseOriginState"].tolist()
difference = pd.Series([item for item in possibilites if item not in exists])
target = df_predict["caseOriginState"].append(pd.Series(difference))
target = target.reset_index(drop=True)
dummies_caseOriginState = pd.get_dummies(target, prefix="caseOriginState")
dummies_caseOriginState = dummies_caseOriginState.drop(dummies_caseOriginState.index[list(range(len(dummies_caseOriginState)-len(difference), len(dummies_caseOriginState)))])
# dummies_caseOriginState.head()

dummies_precAlt = pd.get_dummies(df_predict["precedentAlteration"], prefix="precedentAlteration")
dummies_precAlt.reset_index(drop=True, inplace=True)
# dummies_precAlt.head()

In [5]:
# concatenate columns back together
caseId_df = pd.DataFrame(df_predict["caseId"].reset_index(drop=True))

OH_scotus = pd.concat([caseId_df, dummies_term], axis=1)
OH_scotus = pd.concat([OH_scotus, dummies_issueArea], axis=1)
OH_scotus = pd.concat([OH_scotus, dummies_caseOriginState], axis=1)
OH_scotus = pd.concat([OH_scotus, dummies_lawType], axis=1)
OH_scotus = pd.concat([OH_scotus, dummies_precAlt], axis=1)
OH_scotus.head()

Unnamed: 0,caseId,term_1953,term_1960,term_1962,term_1965,term_1968,term_1972,term_1973,term_1977,term_1984,...,lawType_2.0,lawType_3.0,lawType_4.0,lawType_5.0,lawType_6.0,lawType_8.0,lawType_9.0,lawType_10.0,precedentAlteration_0.0,precedentAlteration_1.0
0,1953-069,1,0,0,0,0,0,0,0,0,...,1,0,0,0,0,0,0,0,0,1
1,1960-133,0,1,0,0,0,0,0,0,0,...,1,0,0,0,0,0,0,0,0,1
2,1962-058,0,0,1,0,0,0,0,0,0,...,1,0,0,0,0,0,0,0,0,1
3,1965-122,0,0,0,1,0,0,0,0,0,...,1,0,0,0,0,0,0,0,0,1
4,1968-043,0,0,0,0,1,0,0,0,0,...,1,0,0,0,0,0,0,0,1,0


### Functions for use in loop

In [6]:
# pre-processing function
def PreProc(c):
    # set up initial target and data dataframes from csv
    target = df_predict.loc[df_predict["caseId"] == c]["decisionDirection"]
    data = OH_scotus.loc[df_predict["caseId"] == c].drop(["caseId"], axis=1)
    
    # Set variables to return
    X = data
    y = target
    feature_names = data.columns
    
    return X, y, feature_names

In [7]:
# use corresponding model to predict a justice's decision direction
def JusticeDirection(j):
    filename = f"./{j}_Model.sav"
    model = pickle.load(open(filename, 'rb'))
    
    prediction = model.predict(X)

    return prediction

### Iterate through cases and justices and apply functions

In [8]:
# For each case, make decision direction predictions on each judge
# save results to later convert to a dataframe
cases = np.array(df_predict["caseId"])
justices = [106] + [x for x in range(108, 116)]
all_cases = []

for c in cases:
    print(f"CaseId: {c}\n")
    X, y, feature_names = PreProc(c)
    
    case_dict = {"caseId": c}
    
    for j in justices:               
        prediction = JusticeDirection(j)
        case_dict[f"justice_{j}"] = prediction[0]
        print(f"Justice #: {j}, Predicted Direction: {prediction}\n")
    
    all_cases.append(case_dict)
    print("--------------------------------")

CaseId: 1953-069

Justice #: 106, Predicted Direction: [1]

Justice #: 108, Predicted Direction: [1]

Justice #: 109, Predicted Direction: [2]

Justice #: 110, Predicted Direction: [2]

Justice #: 111, Predicted Direction: [1]

Justice #: 112, Predicted Direction: [1]

Justice #: 113, Predicted Direction: [2]

Justice #: 114, Predicted Direction: [2]

Justice #: 115, Predicted Direction: [1]

--------------------------------
CaseId: 1960-133

Justice #: 106, Predicted Direction: [1]

Justice #: 108, Predicted Direction: [2]

Justice #: 109, Predicted Direction: [2]

Justice #: 110, Predicted Direction: [1]

Justice #: 111, Predicted Direction: [1]

Justice #: 112, Predicted Direction: [1]

Justice #: 113, Predicted Direction: [1]

Justice #: 114, Predicted Direction: [2]

Justice #: 115, Predicted Direction: [1]

--------------------------------
CaseId: 1962-058

Justice #: 106, Predicted Direction: [1]

Justice #: 108, Predicted Direction: [1]

Justice #: 109, Predicted Direction: [2]

### Convert results to a DataFrame and save to csv

In [33]:
# only compare to overall court decision as target (rather than actuals per justice, since not known for LM cases)
results = pd.DataFrame(all_cases)
caseName = pd.Series(df_predict["caseName"])
prediction_df = pd.concat([caseName, results], axis=1)
prediction_df

Unnamed: 0,caseName,caseId,justice_106,justice_108,justice_109,justice_110,justice_111,justice_112,justice_113,justice_114,justice_115
0,BROWN et al. v. BOARD OF EDUCATION OF TOPEKA e...,1953-069,1,1,2,2,1,1,2,2,1
1,MAPP v. OHIO,1960-133,1,2,2,1,1,1,1,2,1
2,"GIDEON v. WAINWRIGHT, CORRECTIONS DIRECTOR",1962-058,1,1,2,1,1,1,2,2,1
3,MIRANDA v. ARIZONA,1965-122,1,1,2,2,1,1,2,2,1
4,TINKER et al. v. DES MOINES INDEPENDENT COMMUN...,1968-043,1,1,2,2,1,1,2,2,1
5,"ROE et al. v. WADE, DISTRICT ATTORNEY OF DALLA...",1972-048,1,1,2,1,1,1,1,2,1
6,"UNITED STATES v. NIXON, PRESIDENT OF THE UNITE...",1973-172,1,1,2,1,1,1,2,1,1
7,REGENTS OF THE UNIVERSITY OF CALIFORNIA v. BAKKE,1977-147,1,1,2,2,1,1,2,1,1
8,NEW JERSEY v. T. L. O.,1984-022,1,2,2,2,2,2,2,1,1
9,HAZELWOOD SCHOOL DISTRICT et al. v. KUHLMEIER ...,1987-019,1,1,2,2,1,1,2,2,1


In [34]:
# add in predicted and actual decision directions as columns
prediction_df["predictedDirection"] = prediction_df.filter(like="justice").mode(axis=1)

prediction_df["decisionDirection"] = df_predict["decisionDirection"].astype(int)
prediction_df

Unnamed: 0,caseName,caseId,justice_106,justice_108,justice_109,justice_110,justice_111,justice_112,justice_113,justice_114,justice_115,predictedDirection,decisionDirection
0,BROWN et al. v. BOARD OF EDUCATION OF TOPEKA e...,1953-069,1,1,2,2,1,1,2,2,1,1,2
1,MAPP v. OHIO,1960-133,1,2,2,1,1,1,1,2,1,1,2
2,"GIDEON v. WAINWRIGHT, CORRECTIONS DIRECTOR",1962-058,1,1,2,1,1,1,2,2,1,1,2
3,MIRANDA v. ARIZONA,1965-122,1,1,2,2,1,1,2,2,1,1,2
4,TINKER et al. v. DES MOINES INDEPENDENT COMMUN...,1968-043,1,1,2,2,1,1,2,2,1,1,2
5,"ROE et al. v. WADE, DISTRICT ATTORNEY OF DALLA...",1972-048,1,1,2,1,1,1,1,2,1,1,2
6,"UNITED STATES v. NIXON, PRESIDENT OF THE UNITE...",1973-172,1,1,2,1,1,1,2,1,1,1,1
7,REGENTS OF THE UNIVERSITY OF CALIFORNIA v. BAKKE,1977-147,1,1,2,2,1,1,2,1,1,1,1
8,NEW JERSEY v. T. L. O.,1984-022,1,2,2,2,2,2,2,1,1,2,1
9,HAZELWOOD SCHOOL DISTRICT et al. v. KUHLMEIER ...,1987-019,1,1,2,2,1,1,2,2,1,1,1


In [13]:
# write to csv for plotting in tableau
prediction_df.to_csv("../Resources/case_predictions.csv", index=False)

In [36]:
# create version of df to boolean (for tableau viz) where conservative is True, liberal is False
cols = ["justice_106", "justice_108", "justice_109", 
        "justice_110", "justice_111", "justice_112", 
        "justice_113", "justice_114", "justice_115",
        "predictedDirection","decisionDirection"]
bool_predictions = prediction_df
bool_predictions[cols] = bool_predictions[cols].where(bool_predictions[cols] == 1, 0).astype(bool)
bool_predictions

Unnamed: 0,caseName,caseId,justice_106,justice_108,justice_109,justice_110,justice_111,justice_112,justice_113,justice_114,justice_115,predictedDirection,decisionDirection
0,BROWN et al. v. BOARD OF EDUCATION OF TOPEKA e...,1953-069,True,True,False,False,True,True,False,False,True,True,False
1,MAPP v. OHIO,1960-133,True,False,False,True,True,True,True,False,True,True,False
2,"GIDEON v. WAINWRIGHT, CORRECTIONS DIRECTOR",1962-058,True,True,False,True,True,True,False,False,True,True,False
3,MIRANDA v. ARIZONA,1965-122,True,True,False,False,True,True,False,False,True,True,False
4,TINKER et al. v. DES MOINES INDEPENDENT COMMUN...,1968-043,True,True,False,False,True,True,False,False,True,True,False
5,"ROE et al. v. WADE, DISTRICT ATTORNEY OF DALLA...",1972-048,True,True,False,True,True,True,True,False,True,True,False
6,"UNITED STATES v. NIXON, PRESIDENT OF THE UNITE...",1973-172,True,True,False,True,True,True,False,True,True,True,True
7,REGENTS OF THE UNIVERSITY OF CALIFORNIA v. BAKKE,1977-147,True,True,False,False,True,True,False,True,True,True,True
8,NEW JERSEY v. T. L. O.,1984-022,True,False,False,False,False,False,False,True,True,False,True
9,HAZELWOOD SCHOOL DISTRICT et al. v. KUHLMEIER ...,1987-019,True,True,False,False,True,True,False,False,True,True,True


In [37]:
bool_predictions.to_csv("../Resources/bool_predictions.csv")