# Support Vector Machine Classifier (SVM): Fit and evaluate a model

In [1]:
import joblib
import pandas as pd
from sklearn.svm import SVC
from sklearn.model_selection import GridSearchCV
from sklearn.model_selection import cross_val_score

In [3]:
tr_features=pd.read_csv('train_features.csv')
tr_labels=pd.read_csv('train_labels.csv')

te_features=pd.read_csv('test_features.csv')
te_labels=pd.read_csv('test_labels.csv ')

In [6]:
te_labels.shape

(178, 1)

In [8]:
te_features.shape

(178, 8)

In [10]:
te_labels.values.ravel().shape

(178,)

In [14]:
type(te_labels.values.ravel())

numpy.ndarray

In [15]:
type(te_labels)

pandas.core.frame.DataFrame

In [16]:
def print_results(results):
    print('Best PARAMS: {}\n'.format(results.best_params_))
    means=results.cv_results_['mean_test_score']
    stds=results.cv_results_['std_test_score']
    for mean, std, params in zip(means, stds, results.cv_results_['params']):
        print('{} (+/-{}) for {}' .format(round(mean,3), round(std*2,3), params))

In [17]:
svc=SVC()
parameters={
        'kernel':['linear', 'rbf'],
         'C': [0.1, 1, 10],
         'gamma': [1, 0.1, 0.01, 0.001, 0.0001]
}

In [18]:
cv=GridSearchCV(svc, parameters, cv=5)
cv.fit(tr_features, tr_labels.values.ravel())

GridSearchCV(cv=5, error_score=nan,
             estimator=SVC(C=1.0, break_ties=False, cache_size=200,
                           class_weight=None, coef0=0.0,
                           decision_function_shape='ovr', degree=3,
                           gamma='scale', kernel='rbf', max_iter=-1,
                           probability=False, random_state=None, shrinking=True,
                           tol=0.001, verbose=False),
             iid='deprecated', n_jobs=None,
             param_grid={'C': [0.1, 1, 10],
                         'gamma': [1, 0.1, 0.01, 0.001, 0.0001],
                         'kernel': ['linear', 'rbf']},
             pre_dispatch='2*n_jobs', refit=True, return_train_score=False,
             scoring=None, verbose=0)

In [19]:
print_results(cv)

Best PARAMS: {'C': 1, 'gamma': 1, 'kernel': 'rbf'}

0.796 (+/-0.115) for {'C': 0.1, 'gamma': 1, 'kernel': 'linear'}
0.803 (+/-0.117) for {'C': 0.1, 'gamma': 1, 'kernel': 'rbf'}
0.796 (+/-0.115) for {'C': 0.1, 'gamma': 0.1, 'kernel': 'linear'}
0.796 (+/-0.115) for {'C': 0.1, 'gamma': 0.1, 'kernel': 'rbf'}
0.796 (+/-0.115) for {'C': 0.1, 'gamma': 0.01, 'kernel': 'linear'}
0.624 (+/-0.007) for {'C': 0.1, 'gamma': 0.01, 'kernel': 'rbf'}
0.796 (+/-0.115) for {'C': 0.1, 'gamma': 0.001, 'kernel': 'linear'}
0.624 (+/-0.007) for {'C': 0.1, 'gamma': 0.001, 'kernel': 'rbf'}
0.796 (+/-0.115) for {'C': 0.1, 'gamma': 0.0001, 'kernel': 'linear'}
0.624 (+/-0.007) for {'C': 0.1, 'gamma': 0.0001, 'kernel': 'rbf'}
0.796 (+/-0.115) for {'C': 1, 'gamma': 1, 'kernel': 'linear'}
0.809 (+/-0.094) for {'C': 1, 'gamma': 1, 'kernel': 'rbf'}
0.796 (+/-0.115) for {'C': 1, 'gamma': 0.1, 'kernel': 'linear'}
0.796 (+/-0.115) for {'C': 1, 'gamma': 0.1, 'kernel': 'rbf'}
0.796 (+/-0.115) for {'C': 1, 'gamma': 0.01, 'ker

### Write out pickled model

In [20]:
print(cv.best_estimator_)

SVC(C=1, break_ties=False, cache_size=200, class_weight=None, coef0=0.0,
    decision_function_shape='ovr', degree=3, gamma=1, kernel='rbf', max_iter=-1,
    probability=False, random_state=None, shrinking=True, tol=0.001,
    verbose=False)


In [21]:
scores = cross_val_score(svc, tr_features, tr_labels.values.ravel(), cv=5)
scores

array([0.8411215 , 0.8411215 , 0.76635514, 0.75700935, 0.85849057])

In [22]:
joblib.dump(cv.best_estimator_, 'SVM_model.pkl')

['SVM_model.pkl']