In [1]:
# import dependencies
import pandas as pd
import numpy as np
import warnings
from sklearn.model_selection import cross_val_score, train_test_split
from sklearn.ensemble import RandomForestClassifier, VotingClassifier
from sklearn.linear_model import LogisticRegression
from sklearn.naive_bayes import GaussianNB
from sklearn.svm import SVC
from sklearn.metrics import confusion_matrix, classification_report

warnings.filterwarnings("ignore")

### Initial data setup

In [2]:
# set up lists for cases (landmark and current term) & justices 106-115
cases = ["LM1", "LM2", "LM3", "LM4", "LM5", 
         "CT1", "CT2", "CT3", "CT4", "CT5"]
justices = [x for x in range(106, 116)]

# read in prediction data csv
# **fill in actual filename once we have CSV prepared**
df_predict = pd.read_csv("")
# df_predict.head()

# target = df_predict["direction"]
# target_names = ["conservative", "liberal"]

# data = df_predict.drop(["direction", "caseId"], axis=1)

FileNotFoundError: File b'' does not exist

### Pre-Processing

In [None]:
# pre-processing function
def PreProc(c):
    # set up initial target and data dataframes from csv
    target = df_predict.loc[df_predict["caseId"] == c]["direction"]
    df_data = df_predict.loc[df_predict["caseId"] == c].drop(["direction", "caseId"], axis=1)

    # one-hot encode each data column
    term_df = pd.get_dummies(df_data["term"], prefix="term")
    precAlt_df = pd.get_dummies(df_data["precedentAlteration"], prefix="precedentAlteration")
    issueArea_df = pd.get_dummies(df_data["issueArea"], prefix="issueArea")
    lawType_df = pd.get_dummies(df_data["lawType"], prefix="lawType")
    caseOriginState_df = pd.get_dummies(df_data["caseOriginState"], prefix="caseOriginState")
    
    # concatenate encoded columns back together
    OH_features = pd.concat([term_df, precAlt_df], axis=1)
    OH_features = pd.concat([OH_features, issueArea_df], axis=1)
    OH_features = pd.concat([OH_features, lawType_df], axis=1)
    OH_features = pd.concat([OH_features, caseOriginState_df], axis=1)
    
    # Set variables to return
    X = OH_features
    y = target
    feature_names = OH_features.columns
    
    return X, y, feature_names


### Justice prediction function

In [None]:
# use model to predict a justice's decision direction
def JusticeDirection(j):
    filename = f"./{j}_Model.sav"
    model = pickle.load(open(filename, 'rb'))
    
    prediction = model.predict(X)
    
    return prediction


### Iterate through cases and justices and apply functions

In [None]:
# For each case, make decision direction predictions on each judge
# save results to later convert to a dataframe
all_cases = []
for c in cases:
    print(f"CaseId: {c}\n")
    X, y, feature_names = PreProc(c)
    
    prediction_list = []
    
    for j in justices:               
        prediction = JusticeDirection(j)
        prediction_list.append(prediction)
        print(f"Justice #: {j}, Predicted Direction: {prediction}\n"))
    
    case_dict = {"{c}": [justices , prediction_list]}
    all_cases.append(case_dict)
    print("--------------------------------")
    

### Convert results to a DataFrame and save to csv

In [None]:
# figure out how to turn the all_cases list of dictionaries into a df
# how will we be using target or feature_names??
# only compare to overall court decision as target (rather than actuals per justice, since not known for LM cases)?

In [None]:
# write to csv for plotting in tableau
df.to_csv("../Resources/.csv")