In [1]:
import matplotlib.pyplot as plt
from sklearn.svm import SVC
from sklearn.model_selection import train_test_split
from sklearn.metrics import classification_report
import pandas as pd
import numpy as np
import os

In [2]:
df = pd.read_csv("113justice.csv")
df = df.dropna(subset=["vote"])
df.head()

Unnamed: 0,justice,caseId,term,majVotes,minVotes,decisionDirection,direction,majority,partyWinning,precedentAlteration,vote,issueArea
0,113,2009-001,2009,9,0,1,1,2,0,0,1,9
1,113,2009-002,2009,8,1,1,1,2,1,0,1,9
2,113,2009-003,2009,9,0,2,2,2,0,0,1,7
3,113,2009-004,2009,8,0,1,1,2,1,0,1,1
4,113,2009-005,2009,7,2,1,2,1,1,0,2,1


In [3]:
target_113 = df["direction"]
target_names = ["conservative", "liberal"]

In [4]:
data_113 = df.drop(["justice", "direction", "caseId", "majVotes", "minVotes"], axis=1)
feature_names = data_113.columns
data_113.head()

Unnamed: 0,term,decisionDirection,majority,partyWinning,precedentAlteration,vote,issueArea
0,2009,1,2,0,0,1,9
1,2009,1,2,1,0,1,9
2,2009,2,2,0,0,1,7
3,2009,1,2,1,0,1,1
4,2009,1,1,1,0,2,1


In [5]:
X_train, X_test, y_train, y_test = train_test_split(data_113, target_113, random_state=42)

In [6]:
model = SVC(kernel='linear')
model.fit(X_train, y_train)

SVC(C=1.0, cache_size=200, class_weight=None, coef0=0.0,
  decision_function_shape='ovr', degree=3, gamma='auto_deprecated',
  kernel='linear', max_iter=-1, probability=False, random_state=None,
  shrinking=True, tol=0.001, verbose=False)

In [7]:
#Model Accuracy
print('Train Acc: %.3f' % model.score(X_train, y_train))
print('Test Acc: %.3f' % model.score(X_test, y_test))

Train Acc: 0.964
Test Acc: 0.970


In [8]:
predictions = model.predict(X_test)
print(classification_report(y_test, predictions, target_names=target_names))

              precision    recall  f1-score   support

conservative       1.00      0.92      0.96        60
     liberal       0.96      1.00      0.98       107

   micro avg       0.97      0.97      0.97       167
   macro avg       0.98      0.96      0.97       167
weighted avg       0.97      0.97      0.97       167



In [9]:
#Save Model
#https://machinelearningmastery.com/save-load-machine-learning-models-python-scikit-learn/
from sklearn.linear_model import LogisticRegression
import pickle

filename = 'SCOTUS_113_model.sav'
pickle.dump(model, open(filename, 'wb'))

In [10]:
#Load Model from Disk
loaded_model = pickle.load(open(filename, 'rb'))
result = loaded_model.score(X_test, y_test)
print(result)

0.9700598802395209
