In [1]:
import os
import re
import random

import tensorflow as tf
import tensorflow.python.platform
from tensorflow.python.platform import gfile
import numpy as np
import pandas as pd
import sklearn
from sklearn import metrics
from sklearn import model_selection
import sklearn.linear_model
from sklearn.model_selection import train_test_split
from sklearn.metrics import accuracy_score, confusion_matrix
from sklearn.svm import SVC, LinearSVC
import matplotlib.pyplot as plt
%matplotlib inline
import pickle
import scipy.linalg
import sklearn.preprocessing
import sklearn.linear_model
from sklearn.model_selection import GridSearchCV
from sklearn.ensemble import BaggingClassifier

### Warm-up

(a) In a one-vs-one fashion, for each pairs of classes, train a linear SVM classifier using scikit-learn's function LinearSVC, with the default value for the regularization parameter. Compute the multi-class misclassification error obtained using these classifiers trained in a one-vs-one fashion.

In [2]:
X_train = pickle.load(open('features_train_all','rb'))
y_train = pickle.load(open('labels_train_all','rb'))

In [3]:
X_train1, X_test1, y_train1, y_test1 = train_test_split(X_train, y_train, test_size=0.2, random_state=42)

In [4]:
LinearSVC_ovo = SVC(C=1.0, kernel='linear', max_iter=1000, decision_function_shape = 'ovo')

In [5]:
LinearSVC_ovo.fit(X_train1, y_train1)

SVC(C=1.0, cache_size=200, class_weight=None, coef0=0.0,
  decision_function_shape='ovo', degree=3, gamma='auto', kernel='linear',
  max_iter=1000, probability=False, random_state=None, shrinking=True,
  tol=0.001, verbose=False)

In [6]:
y_lrSVC_ovo = LinearSVC_ovo.predict(X_test1)

In [7]:
accuracy_lrSVC_ovo = accuracy_score(y_test1, y_lrSVC_ovo)
misclassification_error = 1 - accuracy_lrSVC_ovo
print("The multi-class misclassification error obtained using classifiers trained in a one-vs-one fashion is ", + misclassification_error)

The multi-class misclassification error obtained using classifiers trained in a one-vs-one fashion is  0.327546296296


(b) In a one-vs-rest fashion, for each class, train a linear SVM classifier using scikit-learn's function LinearSVC, with the default value for $\lambda_c$. Compute the multi-class misclassification error obtained using these classifiers trained in a one-vs-rest fashion.

In [8]:
linearSVC_ovr = LinearSVC(C=1.0, loss='squared_hinge', penalty='l2',multi_class='ovr')

In [9]:
linearSVC_ovr.fit(X_train1, y_train1)

LinearSVC(C=1.0, class_weight=None, dual=True, fit_intercept=True,
     intercept_scaling=1, loss='squared_hinge', max_iter=1000,
     multi_class='ovr', penalty='l2', random_state=None, tol=0.0001,
     verbose=0)

In [10]:
y_lrSVC_ovr = linearSVC_ovr.predict(X_test1)

In [11]:
accuracy_lrSVC_ovr = accuracy_score(y_test1, y_lrSVC_ovr)
misclassification_error1 = 1 - accuracy_lrSVC_ovr
print("The multi-class misclassification error obtained using classifiers trained in a one-vs-rest fashion is ", + misclassification_error1)

The multi-class misclassification error obtained using classifiers trained in a one-vs-rest fashion is  0.298611111111


(c) Using the option multi class='crammer singer' in scikitlearn's function LinearSVC, train a multi-class linear SVM classifier using the default value for the regularization parameter. Compute the multi-class misclassification error obtained using this multi-class linear SVM classifier.

In [12]:
linearSVC_cs = LinearSVC(C=1.0, loss='squared_hinge', penalty='l2',multi_class='crammer_singer')

In [13]:
linearSVC_cs.fit(X_train1, y_train1)

LinearSVC(C=1.0, class_weight=None, dual=True, fit_intercept=True,
     intercept_scaling=1, loss='squared_hinge', max_iter=1000,
     multi_class='crammer_singer', penalty='l2', random_state=None,
     tol=0.0001, verbose=0)

In [14]:
y_lrSVC_cs = linearSVC_cs.predict(X_test1)

In [15]:
accuracy_lrSVC_cs = accuracy_score(y_test1, y_lrSVC_cs)
misclassification_error2 = 1 - accuracy_lrSVC_cs
print("The multi-class misclassification error obtained using multi-class linear SVM classifier is ", + misclassification_error2)

The multi-class misclassification error obtained using multi-class linear SVM classifier is  0.295138888889


### Linear SVMs for multi-class classification

- Redo all questions above now tuning the regularization parameters using cross-validation.

In [16]:
X_train_sub = X_train[:500]
y_train_sub = y_train[:500]

In [17]:
#Redo Model one: linearSVC with one-vs-one
ovo_svm = SVC(kernel='linear', max_iter=1000, decision_function_shape = 'ovo')
parameters = {'C':[10**i for i in range(-4, 5)]}
clf_ovo = GridSearchCV(ovo_svm, parameters)

In [18]:
clf_ovo.fit(X_train_sub, y_train_sub)

GridSearchCV(cv=None, error_score='raise',
       estimator=SVC(C=1.0, cache_size=200, class_weight=None, coef0=0.0,
  decision_function_shape='ovo', degree=3, gamma='auto', kernel='linear',
  max_iter=1000, probability=False, random_state=None, shrinking=True,
  tol=0.001, verbose=False),
       fit_params={}, iid=True, n_jobs=1,
       param_grid={'C': [0.0001, 0.001, 0.01, 0.1, 1, 10, 100, 1000, 10000]},
       pre_dispatch='2*n_jobs', refit=True, return_train_score=True,
       scoring=None, verbose=0)

In [19]:
clf_ovo.best_params_

{'C': 0.1}

In [20]:
LinearSVC_ovo_opt = SVC(C=0.1, kernel='linear', max_iter=1000, decision_function_shape = 'ovo')

In [21]:
LinearSVC_ovo_opt.fit(X_train1, y_train1)

SVC(C=0.1, cache_size=200, class_weight=None, coef0=0.0,
  decision_function_shape='ovo', degree=3, gamma='auto', kernel='linear',
  max_iter=1000, probability=False, random_state=None, shrinking=True,
  tol=0.001, verbose=False)

In [22]:
y_lrSVC_ovo_opt = LinearSVC_ovo_opt.predict(X_test1)

In [23]:
accuracy_lrSVC_ovo_opt = accuracy_score(y_test1, y_lrSVC_ovo_opt)
misclassification_error_opt = 1 - accuracy_lrSVC_ovo_opt
print("The multi-class misclassification error obtained using classifiers trained in a one-vs-one fashion with lambda=0.1 is ", + misclassification_error_opt)

The multi-class misclassification error obtained using classifiers trained in a one-vs-one fashion with lambda=0.1 is  0.326388888889


In [24]:
#Redo model 2: LinearSVC with one-vs-rest
ovr_svm = LinearSVC(loss='squared_hinge', penalty='l2',multi_class='ovr')
parameters = {'C':[10**i for i in range(-4, 5)]}
clf_ovr = GridSearchCV(ovr_svm, parameters)

In [25]:
clf_ovr.fit(X_train_sub, y_train_sub)

GridSearchCV(cv=None, error_score='raise',
       estimator=LinearSVC(C=1.0, class_weight=None, dual=True, fit_intercept=True,
     intercept_scaling=1, loss='squared_hinge', max_iter=1000,
     multi_class='ovr', penalty='l2', random_state=None, tol=0.0001,
     verbose=0),
       fit_params={}, iid=True, n_jobs=1,
       param_grid={'C': [0.0001, 0.001, 0.01, 0.1, 1, 10, 100, 1000, 10000]},
       pre_dispatch='2*n_jobs', refit=True, return_train_score=True,
       scoring=None, verbose=0)

In [26]:
clf_ovr.best_params_

{'C': 0.01}

In [27]:
linearSVC_ovr_opt = LinearSVC(C=0.01, loss='squared_hinge', penalty='l2',multi_class='ovr')

In [28]:
linearSVC_ovr_opt.fit(X_train1, y_train1)

LinearSVC(C=0.01, class_weight=None, dual=True, fit_intercept=True,
     intercept_scaling=1, loss='squared_hinge', max_iter=1000,
     multi_class='ovr', penalty='l2', random_state=None, tol=0.0001,
     verbose=0)

In [29]:
y_lrSVC_ovr_opt = linearSVC_ovr_opt.predict(X_test1)

In [30]:
accuracy_lrSVC_ovr_opt = accuracy_score(y_test1, y_lrSVC_ovr_opt)
misclassification_error1_opt = 1 - accuracy_lrSVC_ovr_opt
print("The multi-class misclassification error obtained using classifiers trained in a one-vs-rest fashion with lambda=0.01 is ", + misclassification_error1_opt)

The multi-class misclassification error obtained using classifiers trained in a one-vs-rest fashion with lambda=0.01 is  0.289351851852


In [31]:
#Redo model 3: multi-class linear SVM
cs_svm = LinearSVC(loss='squared_hinge', penalty='l2',multi_class='crammer_singer')
parameters = {'C':[10**i for i in range(-4, 5)]}
clf_cs = GridSearchCV(cs_svm, parameters)

In [32]:
clf_cs.fit(X_train_sub, y_train_sub)

GridSearchCV(cv=None, error_score='raise',
       estimator=LinearSVC(C=1.0, class_weight=None, dual=True, fit_intercept=True,
     intercept_scaling=1, loss='squared_hinge', max_iter=1000,
     multi_class='crammer_singer', penalty='l2', random_state=None,
     tol=0.0001, verbose=0),
       fit_params={}, iid=True, n_jobs=1,
       param_grid={'C': [0.0001, 0.001, 0.01, 0.1, 1, 10, 100, 1000, 10000]},
       pre_dispatch='2*n_jobs', refit=True, return_train_score=True,
       scoring=None, verbose=0)

In [33]:
clf_cs.best_params_

{'C': 0.1}

In [34]:
linearSVC_cs_opt = LinearSVC(C=0.1, loss='squared_hinge', penalty='l2',multi_class='crammer_singer')

In [35]:
linearSVC_cs_opt.fit(X_train1, y_train1)

LinearSVC(C=0.1, class_weight=None, dual=True, fit_intercept=True,
     intercept_scaling=1, loss='squared_hinge', max_iter=1000,
     multi_class='crammer_singer', penalty='l2', random_state=None,
     tol=0.0001, verbose=0)

In [36]:
y_lrSVC_cs_opt = linearSVC_cs_opt.predict(X_test1)

In [37]:
accuracy_lrSVC_cs_opt = accuracy_score(y_test1, y_lrSVC_cs_opt)
misclassification_error2_opt = 1 - accuracy_lrSVC_cs_opt
print("The multi-class misclassification error obtained using multi-class linear SVM with lambda=0.1 is ", + misclassification_error2_opt)

The multi-class misclassification error obtained using multi-class linear SVM with lambda=0.1 is  0.293981481481


### Kernel SVMs for multi-class classification

- Redo all questions above now using the polynomial kernel of order 2 (and tuning the regularization parameters using cross-validation).

In [38]:
#Redo Model 1: polynomial kernel SVM of order 2 with one-vs-one
ovo_svm_poly = SVC(kernel='poly', degree=2, max_iter=1000, decision_function_shape = 'ovo')
parameters = {'C':[10**i for i in range(-4, 5)], 'coef0': [0, 1e-1, 1e-2, 1e-3, 1e-4]}
clf_ovo_poly = GridSearchCV(ovo_svm_poly, parameters)

In [39]:
clf_ovo_poly.fit(X_train_sub, y_train_sub)

GridSearchCV(cv=None, error_score='raise',
       estimator=SVC(C=1.0, cache_size=200, class_weight=None, coef0=0.0,
  decision_function_shape='ovo', degree=2, gamma='auto', kernel='poly',
  max_iter=1000, probability=False, random_state=None, shrinking=True,
  tol=0.001, verbose=False),
       fit_params={}, iid=True, n_jobs=1,
       param_grid={'C': [0.0001, 0.001, 0.01, 0.1, 1, 10, 100, 1000, 10000], 'coef0': [0, 0.1, 0.01, 0.001, 0.0001]},
       pre_dispatch='2*n_jobs', refit=True, return_train_score=True,
       scoring=None, verbose=0)

In [40]:
clf_ovo_poly.best_params_

{'C': 1000, 'coef0': 0.1}

In [41]:
polySVC_ovo_opt = SVC(C=1000, coef0=0.1, kernel='poly', degree=2, max_iter=1000, decision_function_shape = 'ovo')

In [42]:
polySVC_ovo_opt.fit(X_train1, y_train1)

SVC(C=1000, cache_size=200, class_weight=None, coef0=0.1,
  decision_function_shape='ovo', degree=2, gamma='auto', kernel='poly',
  max_iter=1000, probability=False, random_state=None, shrinking=True,
  tol=0.001, verbose=False)

In [43]:
y_ovo_poly = polySVC_ovo_opt.predict(X_test1)

In [44]:
accuracy_poly_ovo_opt = accuracy_score(y_test1, y_ovo_poly)
misclassification_error_poly1 = 1 - accuracy_poly_ovo_opt
print("The multi-class misclassification error obtained using polynomial kernel SVM in one-vs-one with lambda=1000 is ", + misclassification_error_poly1)

The multi-class misclassification error obtained using polynomial kernel SVM in one-vs-one with lambda=1000 is  0.327546296296


In [45]:
#Redo Model 2: polynomial kernel SVM of order 2 with one-vs-rest
ovr_svm_poly = SVC(kernel='poly', degree=2, max_iter=1000, decision_function_shape = 'ovr')
parameters = {'C':[10**i for i in range(-4, 5)], 'coef0': [0, 1e-1, 1e-2, 1e-3, 1e-4]}
clf_ovr_poly = GridSearchCV(ovo_svm_poly, parameters)

In [46]:
clf_ovr_poly.fit(X_train_sub, y_train_sub)

GridSearchCV(cv=None, error_score='raise',
       estimator=SVC(C=1.0, cache_size=200, class_weight=None, coef0=0.0,
  decision_function_shape='ovo', degree=2, gamma='auto', kernel='poly',
  max_iter=1000, probability=False, random_state=None, shrinking=True,
  tol=0.001, verbose=False),
       fit_params={}, iid=True, n_jobs=1,
       param_grid={'C': [0.0001, 0.001, 0.01, 0.1, 1, 10, 100, 1000, 10000], 'coef0': [0, 0.1, 0.01, 0.001, 0.0001]},
       pre_dispatch='2*n_jobs', refit=True, return_train_score=True,
       scoring=None, verbose=0)

In [47]:
clf_ovr_poly.best_params_

{'C': 1000, 'coef0': 0.1}

In [48]:
polySVC_ovr_opt = SVC(C=1000, coef0=0.1, kernel='poly', degree=2, max_iter=1000, decision_function_shape = 'ovr')

In [49]:
polySVC_ovr_opt.fit(X_train1, y_train1)

SVC(C=1000, cache_size=200, class_weight=None, coef0=0.1,
  decision_function_shape='ovr', degree=2, gamma='auto', kernel='poly',
  max_iter=1000, probability=False, random_state=None, shrinking=True,
  tol=0.001, verbose=False)

In [50]:
y_ovr_poly = polySVC_ovr_opt.predict(X_test1)

In [51]:
accuracy_poly_ovr_opt = accuracy_score(y_test1, y_ovr_poly)
misclassification_error_poly2 = 1 - accuracy_poly_ovr_opt
print("The multi-class misclassification error obtained using polynomial kernel SVM in one-vs-rest with lambda=1000 is ", + misclassification_error_poly2)

The multi-class misclassification error obtained using polynomial kernel SVM in one-vs-rest with lambda=1000 is  0.327546296296
