In [68]:
# import dependencies
import pandas as pd
import numpy as np
import warnings
from sklearn.model_selection import cross_val_score, train_test_split
from sklearn.ensemble import RandomForestClassifier, VotingClassifier
from sklearn.linear_model import LogisticRegression
import pickle
from sklearn.naive_bayes import GaussianNB
from sklearn.svm import SVC
from sklearn.metrics import confusion_matrix, classification_report

warnings.filterwarnings("ignore")

### Initial data setup

In [69]:
# read in prediction case data csv
# **fill in actual filename once we have CSV prepared**
df_predict = pd.read_csv("../Resources/cases_for_prediction.csv")
# df_predict.head()

# target = df_predict["direction"]
# target_names = ["conservative", "liberal"]

# data = df_predict.drop(["direction", "caseId"], axis=1)

# set up lists for cases (landmark and current term) & justices 106-115
cases = np.array(df_predict["caseId"])
justices = [x for x in range(106, 116)]
cases

array(['1953-069', '1960-133', '1962-058', '1965-122', '1968-043',
       '1972-048', '1973-172', '1977-147', '1984-022', '1987-019',
       '1988-124', '2018-054', '2018-011', '2018-031', '2018-052',
       '2018-066', '2018-024'], dtype=object)

### Pre-Processing

In [83]:
possibilites = [0, 1]
exists = df_predict["precedentAlteration"].tolist()
difference = pd.Series([item for item in possibilites if item not in exists])
target = df_predict["precedentAlteration"].append(pd.Series(difference))
target = target.reset_index(drop=True)
dummies = pd.get_dummies(target, prefix="precedentAlteration")
dummies = dummies.drop(dummies.index[list(range(len(dummies)-len(difference), len(dummies)))])
dummies

Unnamed: 0,precedentAlteration_0.0,precedentAlteration_1.0
0,0,1
1,0,1
2,0,1
3,0,1
4,1,0
5,1,0
6,1,0
7,1,0
8,1,0
9,1,0


In [80]:
# pre-processing function
def PreProc(c):
    # set up initial target and data dataframes from csv
    target = df_predict.loc[df_predict["caseId"] == c]["decisionDirection"]
    df_data = df_predict.loc[df_predict["caseId"] == c].drop(["decisionDirection", "caseId"], axis=1)

    # one-hot encode each data column
    term_df = pd.get_dummies(df_data["term"], prefix="term")
    precAlt_df = pd.get_dummies(df_data["precedentAlteration"], prefix="precedentAlteration")
    issueArea_df = pd.get_dummies(df_data["issueArea"], prefix="issueArea")
    lawType_df = pd.get_dummies(df_data["lawType"], prefix="lawType")
    caseOriginState_df = pd.get_dummies(df_data["caseOriginState"], prefix="caseOriginState")
    
    # concatenate encoded columns back together
    OH_features = pd.concat([term_df, precAlt_df], axis=1)
    OH_features = pd.concat([OH_features, issueArea_df], axis=1)
    OH_features = pd.concat([OH_features, lawType_df], axis=1)
    OH_features = pd.concat([OH_features, caseOriginState_df], axis=1)
    
    # Set variables to return
    X = OH_features
    y = target
    feature_names = OH_features.columns
    
    return X, y, feature_names

### Justice prediction function

In [71]:
# use model to predict a justice's decision direction
def JusticeDirection(j, X):
    filename = f"{j}_model.sav"
    model = pickle.load(open(filename, 'rb'))
    
    prediction = model.predict(X)
    print(filename)
    return prediction
    


### Iterate through cases and justices and apply functions

In [76]:
# For each case, make decision direction predictions on each judge
# save results to later convert to a dataframe
all_cases = []
for c in cases:
    print(f"CaseId: {c}\n")
    X, y, feature_names = PreProc(c)
    
    prediction_list = []

#     for j in justices:               
#         prediction = JusticeDirection(j, X)
#         prediction_list.append(prediction)
#         print(f"Justice #: {j}, Predicted Direction: {prediction}\n")
    
#     case_dict = {"{c}": [justices , prediction_list]}
#     all_cases.append(case_dict)
    print(X)
    print("--------------------------------")
    

CaseId: 1953-069

   term_1953  precedentAlteration_1.0  issueArea_2.0  lawType_2.0  \
0          1                        1              1            1   

   caseOriginState_60.0  
0                     1  
--------------------------------
CaseId: 1960-133

   term_1960  precedentAlteration_1.0  issueArea_1.0  lawType_2.0  \
1          1                        1              1            1   

   caseOriginState_41.0  
1                     1  
--------------------------------
CaseId: 1962-058

   term_1962  precedentAlteration_1.0  issueArea_1.0  lawType_2.0  \
2          1                        1              1            1   

   caseOriginState_12.0  
2                     1  
--------------------------------
CaseId: 1965-122

   term_1965  precedentAlteration_1.0  issueArea_1.0  lawType_2.0  \
3          1                        1              1            1   

   caseOriginState_4.0  
3                    1  
--------------------------------
CaseId: 1968-043

   term_1968  pr

### Convert results to a DataFrame and save to csv

In [None]:
# figure out how to turn the all_cases list of dictionaries into a df
# how will we be using target or feature_names??
# only compare to overall court decision as target (rather than actuals per justice, since not known for LM cases)?

In [None]:
# write to csv for plotting in tableau
df.to_csv("../Resources/.csv")