In [103]:
# Importing Models
from sklearn.datasets import load_iris
from sklearn.svm import SVC
from sklearn.linear_model import LogisticRegression
from sklearn.ensemble import RandomForestClassifier
from sklearn.cross_validation import cross_val_score
from sklearn.naive_bayes import GaussianNB, MultinomialNB
from sklearn.tree import DecisionTreeClassifier
from sklearn.grid_search import GridSearchCV
from sklearn.ensemble import GradientBoostingClassifier, AdaBoostClassifier
from sklearn.neighbors import KNeighborsClassifier


In [78]:
# Loading Dataset
iris = load_iris()

In [79]:
# Creating features and class
x = iris.data
y = iris.target

In [106]:
# Creating Models dictionary
models = {'svc':SVC(), 'logreg': LogisticRegression(), 'RandomForestClassifier': RandomForestClassifier(), 
          'Naive-Bayes Gauss': GaussianNB(), 'Naive-Bayes Multi': MultinomialNB(), 'Decision tree': DecisionTreeClassifier(),
         'Knn': KNeighborsClassifier(n_neighbors=20)}

In [111]:
# Scoring
for key in models:
    score = cross_val_score(models[key], x, y, cv=13, scoring='accuracy').mean()
    print('{} scored: {:.1f}'.format(key, score*100))


Naive-Bayes Multi scored: 95.5
Knn scored: 97.4
svc scored: 97.4
Decision tree scored: 95.5
logreg scored: 96.2
Naive-Bayes Gauss scored: 95.5
RandomForestClassifier scored: 94.2


In [82]:
# Taking SVM ahead and improving it

In [83]:
# Finding optimized paramters: Defining the grid to be searched

param_grid = dict(kernel = ['linear', 'rbf', 'poly', 'sigmoid'], decision_function_shape=['ovr', 'ovo'])
svm_model = SVC()

In [84]:
# Initializing the grid
grid = GridSearchCV(svm_model, param_grid, cv =10, scoring='accuracy', n_jobs=-1)

In [85]:
# Fitting the grid with data
grid.fit(x, y)

GridSearchCV(cv=10, error_score='raise',
       estimator=SVC(C=1.0, cache_size=200, class_weight=None, coef0=0.0,
  decision_function_shape=None, degree=3, gamma='auto', kernel='rbf',
  max_iter=-1, probability=False, random_state=None, shrinking=True,
  tol=0.001, verbose=False),
       fit_params={}, iid=True, n_jobs=-1,
       param_grid={'decision_function_shape': ['ovr', 'ovo'], 'kernel': ['linear', 'rbf', 'poly', 'sigmoid']},
       pre_dispatch='2*n_jobs', refit=True, scoring='accuracy', verbose=0)

### If param_grid has invalid parameters for model then following error occurs. Always make sure paramters and their values exist for a given model 


File "<string>", line unknown
SyntaxError: invalid or missing encoding declaration for C:\tree\\svm\\libsvm.pyd

In [86]:
# Getting best estimator settings
grid.best_estimator_

SVC(C=1.0, cache_size=200, class_weight=None, coef0=0.0,
  decision_function_shape='ovr', degree=3, gamma='auto', kernel='rbf',
  max_iter=-1, probability=False, random_state=None, shrinking=True,
  tol=0.001, verbose=False)

In [87]:
# Optimized parameters from GridSearchCV
grid.best_params_

{'decision_function_shape': 'ovr', 'kernel': 'rbf'}

In [88]:
# Best score obtained while searching the grid
grid.best_score_

0.97999999999999998

In [89]:
grid.grid_scores_

[mean: 0.97333, std: 0.04422, params: {'decision_function_shape': 'ovr', 'kernel': 'linear'},
 mean: 0.98000, std: 0.03055, params: {'decision_function_shape': 'ovr', 'kernel': 'rbf'},
 mean: 0.97333, std: 0.04422, params: {'decision_function_shape': 'ovr', 'kernel': 'poly'},
 mean: 0.33333, std: 0.00000, params: {'decision_function_shape': 'ovr', 'kernel': 'sigmoid'},
 mean: 0.97333, std: 0.04422, params: {'decision_function_shape': 'ovo', 'kernel': 'linear'},
 mean: 0.98000, std: 0.03055, params: {'decision_function_shape': 'ovo', 'kernel': 'rbf'},
 mean: 0.97333, std: 0.04422, params: {'decision_function_shape': 'ovo', 'kernel': 'poly'},
 mean: 0.33333, std: 0.00000, params: {'decision_function_shape': 'ovo', 'kernel': 'sigmoid'}]

In [90]:
# Initializing SVM model with optimized parameter values
svmc = SVC(decision_function_shape='ovr', kernel='rbf',)
# Ovr is used because it is a multi class problem.

In [91]:
# Finding new score
score = cross_val_score(svmc, x, y, cv = 10, scoring='accuracy').mean()
print(score)

0.98


In [108]:
# Percentage improvement using optimized parameters
print('Improvement by {:0.2}% '.format(((0.98-0.974)/0.974)*100))

Improvement by 0.62% 


## Accuracy of the optimized SVC model is 98%