In [233]:
import numpy as np

from sklearn.svm import LinearSVC
from sklearn.metrics import accuracy_score
from sklearn.model_selection import train_test_split, cross_val_score
from sklearn.datasets import load_digits

### Load Dataset

In [234]:
whole_data = load_digits()

In [235]:
X_data = whole_data.images   # load X_data
y_data = whole_data.target   # load y_data

In [236]:
X_data = X_data.reshape(X_data.shape[0], X_data.shape[1] * X_data.shape[2])    # flatten X_data
y_data = y_data.reshape((y_data.shape[0], 1))

In [241]:
data_merged = np.concatenate((X_data, y_data), axis = 1)
np.random.shuffle(data_merged)
data_merged = data_merged[:300, :]    # use only 300 data instances

In [243]:
X_data = data_merged[:, :-1]
y_data = data_merged[:, -1]

In [244]:
X_scaled = X_data - np.mean(X_data, axis = 0)
std = np.std(X_scaled, axis = 0) + 0.00001    # add a minute number to prevent divide by zero
X_scaled /= std                               # divde by standard deviation

In [245]:
X_train, X_test, y_train, y_test = train_test_split(X_data, y_data, test_size = 0.3, random_state = 7)    # split data into train & test set

In [246]:
print(X_train.shape)
print(X_test.shape)
print(y_train.shape)
print(y_test.shape)

(210, 64)
(90, 64)
(210,)
(90,)


## 1. Without Regularization

In [247]:
clf = LinearSVC(C = 0.00001, dual = False)

In [248]:
scores = cross_val_score(clf, X_data, y_data)

In [250]:
print(scores.mean())
print(scores.std())

0.826755355948
0.010540990323


In [251]:
clf.fit(X_train, y_train)

LinearSVC(C=1e-05, class_weight=None, dual=False, fit_intercept=True,
     intercept_scaling=1, loss='squared_hinge', max_iter=1000,
     multi_class='ovr', penalty='l2', random_state=None, tol=0.0001,
     verbose=0)

In [252]:
y_tr_pred = clf.predict(X_train)

In [253]:
y_te_pred = clf.predict(X_test)

In [254]:
print(accuracy_score(y_tr_pred, y_train))
print(accuracy_score(y_te_pred, y_test))

0.871428571429
0.833333333333


## 2. L1 Regularization

In [255]:
clf = LinearSVC(penalty = 'l1', C = 100, dual = False)

In [258]:
clf.fit(X_train, y_train)

LinearSVC(C=100, class_weight=None, dual=False, fit_intercept=True,
     intercept_scaling=1, loss='squared_hinge', max_iter=1000,
     multi_class='ovr', penalty='l1', random_state=None, tol=0.0001,
     verbose=0)

In [264]:
y_tr_pred = clf.predict(X_train)
y_te_pred = clf.predict(X_test)

In [265]:
print(accuracy_score(y_tr_pred, y_train))
print(accuracy_score(y_te_pred, y_test))

1.0
0.9


## 3. L2 Regularization

In [266]:
clf = LinearSVC(penalty = 'l2', C = 100.0, dual = False)

In [267]:
clf.fit(X_train, y_train)

LinearSVC(C=100.0, class_weight=None, dual=False, fit_intercept=True,
     intercept_scaling=1, loss='squared_hinge', max_iter=1000,
     multi_class='ovr', penalty='l2', random_state=None, tol=0.0001,
     verbose=0)

In [272]:
y_tr_pred = clf.predict(X_train)
y_te_pred = clf.predict(X_test)

In [273]:
print(accuracy_score(y_tr_pred, y_train))
print(accuracy_score(y_te_pred, y_test))

1.0
0.855555555556
