### AI-10 Practice 1  

#### Import libraries  

In [51]:
import numpy as np
import pandas as pd
import matplotlib.pyplot as plt
from sklearn.svm import SVC  # <-----
from sklearn.preprocessing import MinMaxScaler
from sklearn.model_selection import GridSearchCV
from sklearn.model_selection import StratifiedKFold, cross_val_score  # <-----
from sklearn.metrics import accuracy_score  # <-----
from sklearn.pipeline import Pipeline
from joblib import dump
from sklearn import datasets  # <-----

In [52]:
digits_img = datasets.load_digits(n_class=10)
X = digits_img.data
y = digits_img.target
print(X.shape)
print(y.shape)

(1797, 64)
(1797,)


#### Make pipeline and set parameters for grid search   

In [53]:
pipe = Pipeline([('scaler', MinMaxScaler(feature_range=(0, 1), copy=True)),
                 ('svc', SVC(kernel='rbf'))])  # <-----

C_grid = [1e0, 1e1, 1e2, 1e3]  # or np.logspace(0, 3, num=4) <-----
g_grid = ['scale', 1e-2, 1e-1, 1e0]  # or np.logspace(-3, 0, num=4) <-----

# Hyper paramet settings for grid search
param_grid = {
    'svc__C': C_grid,  # <-----
    'svc__gamma' : g_grid,  # <-----
}



#### Preparation of objects for cross validation  

In [54]:
grid_cv = StratifiedKFold(n_splits=4, shuffle=True, random_state=13)  # <-----
gen_cv = StratifiedKFold(n_splits=4, shuffle=True, random_state=17)  # <-----

#### Define the grid search for hyper parameters  

In [55]:
gs = GridSearchCV(pipe, param_grid , cv=grid_cv, scoring='accuracy')

#### Estimation of generalization performance  

In [56]:
%%time
nested_score = cross_val_score(gs, X=X, y=y, cv=gen_cv,
                               scoring='accuracy')  # <-----
print(nested_score)
print(nested_score.mean())  # <-----

[0.98888889 0.99109131 0.99109131 0.99331849]
0.9910975006186589
CPU times: user 13.3 s, sys: 6.82 ms, total: 13.3 s
Wall time: 13.3 s


**Ans. 0.076**  

#### Cross-validation to obtain the model with the best hyperparameter set (best estimator)  
- Note: gs_best is already fit to the whole data (X) in gs.fit(X,y)  

In [57]:
%%time
gs.fit(X, y)
gs_best = gs.best_estimator_

CPU times: user 5.88 s, sys: 0 ns, total: 5.88 s
Wall time: 5.88 s


In [58]:
print(gs_best)

Pipeline(steps=[('scaler', MinMaxScaler()), ('svc', SVC(C=10.0))])


In [59]:
tag = 'ai-10-assign1'
model_file = 'svr_best_{}.joblib'.format(tag)
dump(gs_best, model_file)

['svr_best_ai-10-assign1.joblib']