In [1]:
import matplotlib.pyplot as plt
from sklearn.svm import SVC
from sklearn.model_selection import train_test_split
from sklearn.metrics import classification_report
import pandas as pd
import numpy as np
import os

In [2]:
df = pd.read_csv("112justice.csv")
df = df.dropna(subset=["vote"])
df.head()

Unnamed: 0,justice,caseId,term,majVotes,minVotes,decisionDirection,direction,majority,partyWinning,precedentAlteration,vote,issueArea
0,112,2005-029,2005,9,0,2,2,2,1,0,1,8
1,112,2005-030,2005,9,0,2,2,2,1,0,1,2
2,112,2005-031,2005,9,0,2,2,2,1,0,1,9
3,112,2005-044,2005,9,0,1,1,2,1,0,1,2
4,112,2005-045,2005,9,0,2,2,2,1,0,1,1


In [3]:
target_112 = df["direction"]
target_names = ["conservative", "liberal"]

In [4]:
data_112 = df.drop(["justice", "direction", "caseId", "majVotes", "minVotes"], axis=1)
feature_names = data_112.columns
data_112.head()

Unnamed: 0,term,decisionDirection,majority,partyWinning,precedentAlteration,vote,issueArea
0,2005,2,2,1,0,1,8
1,2005,2,2,1,0,1,2
2,2005,2,2,1,0,1,9
3,2005,1,2,1,0,1,2
4,2005,2,2,1,0,1,1


In [5]:
X_train, X_test, y_train, y_test = train_test_split(data_112, target_112, random_state=42)

In [6]:
model = SVC(kernel='linear')
model.fit(X_train, y_train)

SVC(C=1.0, cache_size=200, class_weight=None, coef0=0.0,
  decision_function_shape='ovr', degree=3, gamma='auto_deprecated',
  kernel='linear', max_iter=-1, probability=False, random_state=None,
  shrinking=True, tol=0.001, verbose=False)

In [7]:
#Model Accuracy
print('Train Acc: %.3f' % model.score(X_train, y_train))
print('Test Acc: %.3f' % model.score(X_test, y_test))

Train Acc: 0.975
Test Acc: 0.979


In [8]:
predictions = model.predict(X_test)
print(classification_report(y_test, predictions, target_names=target_names))

              precision    recall  f1-score   support

conservative       0.97      1.00      0.98       151
     liberal       1.00      0.94      0.97        85

   micro avg       0.98      0.98      0.98       236
   macro avg       0.98      0.97      0.98       236
weighted avg       0.98      0.98      0.98       236



In [9]:
#Save Model
#https://machinelearningmastery.com/save-load-machine-learning-models-python-scikit-learn/
from sklearn.linear_model import LogisticRegression
import pickle

filename = 'SCOTUS_112_model.sav'
pickle.dump(model, open(filename, 'wb'))

In [10]:
#Load Model from Disk
loaded_model = pickle.load(open(filename, 'rb'))
result = loaded_model.score(X_test, y_test)
print(result)

0.9788135593220338
