In [1]:
from sklearn.metrics import accuracy_score
from sklearn.model_selection import cross_val_score
from sklearn.model_selection import GridSearchCV
from sklearn.datasets import load_svmlight_file
from sklearn.svm import SVC
import pandas as pd
import numpy as np

In [2]:
# load data in LibSVM sparse data format
X_train, y_train = load_svmlight_file("a9a.txt")
X_test,y_test = load_svmlight_file("a9a.t")

In [3]:
#create SVM model with default parameters (to use it in GridSearchCV)
model = SVC(kernel='rbf')
model.fit(X_train, y_train)
#print(model.get_params())

SVC()

In [5]:
#Set possible hyperparameters
params = {'kernel': ['rbf'],
          'gamma': [0.1, 1, 10],
          'C': [0.1, 1, 10]}

In [6]:
#Run GridSearchCV
tuning_svm = GridSearchCV(estimator = model, param_grid = params, scoring = 'accuracy', cv = 5, n_jobs = -1)

#fit model on training data to obtain best hyperparameters
tuning_svm.fit(X_train, y_train)

#obtain best hyperparameters
print(tuning_svm.best_params_)

#initialize tuned final model
tuned_svm=tuning_svm.best_estimator_

{'C': 1, 'gamma': 0.1, 'kernel': 'rbf'}


In [7]:
#fit tuned model on training data
tuned_svm.fit(X_train,y_train)

#obtain training accuracy/error:
train_pred=tuned_svm.predict(X_train)
train_accuracy=accuracy_score(y_train, train_pred)

print("Training Accuracy:",train_accuracy*100, "%","Training Error:",100-(train_accuracy*100),"%")

Training Accuracy: 86.41626485673044 % Training Error: 13.583735143269564 %


In [8]:
#Run 5-fold Cross Validation on the model with best parameters and get the accuracy

cross_val_accuracy=cross_val_score(tuned_svm,X_train,y_train,cv=5).mean()
print("Cross Vaidation Accuracy:",cross_val_accuracy*100,"%","Cross Validation Error:" ,(100-(cross_val_accuracy*100)),"%")

Cross Vaidation Accuracy: 84.65649640050836 % Cross Validation Error: 15.34350359949164 %


In [9]:
# make predictions for test data
y_pred = model.predict(X_test)

# Obtain tessting accuracy/error:
test_accuracy = accuracy_score(y_test, y_pred)
print("Test Accuracy:",test_accuracy*100,"%","Test Error:",100-(test_accuracy*100),"%")

Test Accuracy: 85.05620047908606 % Test Error: 14.943799520913942 %


In [10]:
#Accuracy of configuration of parameters
accuracy_config_pd=pd.concat([pd.DataFrame(tuning_svm.cv_results_["params"]),
           pd.DataFrame(tuning_svm.cv_results_["mean_test_score"], columns=["Accuracy"])],axis=1)
print(accuracy_config_pd)

      C  gamma kernel  Accuracy
0   0.1    0.1    rbf  0.839133
1   0.1    1.0    rbf  0.764319
2   0.1   10.0    rbf  0.759190
3   1.0    0.1    rbf  0.846565
4   1.0    1.0    rbf  0.801296
5   1.0   10.0    rbf  0.779276
6  10.0    0.1    rbf  0.839133
7  10.0    1.0    rbf  0.803661
8  10.0   10.0    rbf  0.779276
