In [1]:
import matplotlib.pyplot as plt
from sklearn.svm import SVC
from sklearn.model_selection import train_test_split
from sklearn.metrics import classification_report
import pandas as pd
import numpy as np
import os

In [2]:
df = pd.read_csv("108justice.csv")
df = df.dropna(subset=["vote"])
df.head()

Unnamed: 0,justice,caseId,term,majVotes,minVotes,decisionDirection,direction,majority,partyWinning,precedentAlteration,vote,issueArea
0,108,1991-010,1991,9,0,1,1,2,0,0,1,9
1,108,1991-013,1991,9,0,1,1,2,1,0,1,8
2,108,1991-015,1991,9,0,1,1,2,1,0,1,2
3,108,1991-019,1991,7,2,1,1,2,0,0,1,1
4,108,1991-020,1991,9,0,2,2,2,1,0,1,9


In [3]:
target_108 = df["direction"]
target_names = ["conservative", "liberal"]

In [4]:
data_108 = df.drop(["justice", "direction", "caseId", "majVotes", "minVotes"], axis=1)
feature_names = data_108.columns
data_108.head()

Unnamed: 0,term,decisionDirection,majority,partyWinning,precedentAlteration,vote,issueArea
0,1991,1,2,0,0,1,9
1,1991,1,2,1,0,1,8
2,1991,1,2,1,0,1,2
3,1991,1,2,0,0,1,1
4,1991,2,2,1,0,1,9


In [5]:
X_train, X_test, y_train, y_test = train_test_split(data_108, target_108, random_state=42)

In [6]:
model = SVC(kernel='linear')
model.fit(X_train, y_train)

SVC(C=1.0, cache_size=200, class_weight=None, coef0=0.0,
  decision_function_shape='ovr', degree=3, gamma='auto_deprecated',
  kernel='linear', max_iter=-1, probability=False, random_state=None,
  shrinking=True, tol=0.001, verbose=False)

In [7]:
# Model Accuracy
print('Train Acc: %.3f' % model.score(X_train, y_train))
print('Test Acc: %.3f' % model.score(X_test, y_test))

Train Acc: 0.971
Test Acc: 0.957


In [8]:
from sklearn.metrics import confusion_matrix

predictions = model.predict(X_test)
print(classification_report(y_test, predictions,
                            target_names=target_names))

              precision    recall  f1-score   support

conservative       0.94      1.00      0.97       378
     liberal       1.00      0.87      0.93       184

   micro avg       0.96      0.96      0.96       562
   macro avg       0.97      0.93      0.95       562
weighted avg       0.96      0.96      0.96       562



In [11]:
confusion_matrix(predictions, y_test)
predictions[:10], y_test[:10].ravel()

(array([2, 1, 1, 1, 1, 1, 2, 1, 1, 1], dtype=int64),
 array([2, 1, 1, 1, 1, 1, 2, 1, 1, 1], dtype=int64))

In [11]:
#Save Model
#https://machinelearningmastery.com/save-load-machine-learning-models-python-scikit-learn/
from sklearn.linear_model import LogisticRegression
import pickle

filename = 'SCOTUS_108_model.sav'
pickle.dump(model, open(filename, 'wb'))

In [12]:
#Load Model from Disk
loaded_model = pickle.load(open(filename, 'rb'))
result = loaded_model.score(X_test, y_test)
print(result)

0.9572953736654805
