In [1]:
import numpy as np

from time import time
from scipy.stats import randint as sp_randint

from sklearn.model_selection import GridSearchCV
from sklearn.model_selection import RandomizedSearchCV
from sklearn.datasets import load_digits
from sklearn.ensemble import RandomForestClassifier
from sklearn.svm import SVC

In [2]:
# Seed the random number generator:
np.random.seed(1)

def load_data(filename, skiprows = 1):
    """
    Function loads data stored in the file filename and returns it as a numpy ndarray.
    
    Inputs:
        filename: given as a string.
        
    Outputs:
        Data contained in the file, returned as a numpy ndarray
    """
    return np.loadtxt(filename, skiprows=skiprows, delimiter=' ')

In [3]:
X = load_data('training_data.txt')
y = X[:, 0]
X = X[:, 1:]

In [None]:
# Utility function to report best scores
def report(results, n_top=3):
    for i in range(1, n_top + 1):
        candidates = np.flatnonzero(results['rank_test_score'] == i)
        for candidate in candidates:
            print("Model with rank: {0}".format(i))
            print("Mean validation score: {0:.3f} (std: {1:.3f})".format(
                  results['mean_test_score'][candidate],
                  results['std_test_score'][candidate]))
            print("Parameters: {0}".format(results['params'][candidate]))
            print("")

In [None]:
clf = SVC()
param_grid = [{'kernel': ['rbf'], 'gamma': [1e-3, 1e-4],
                     'C': [1, 10, 100, 1000]},
                    {'kernel': ['linear'], 'C': [1, 10, 100, 1000]}]

# run grid search
grid_search = GridSearchCV(clf, param_grid=param_grid, verbose=10)
start = time()
grid_search.fit(X, y)

print("GridSearchCV took %.2f seconds for %d candidate parameter settings."
      % (time() - start, len(grid_search.cv_results_['params'])))
report(grid_search.cv_results_)

Fitting 3 folds for each of 12 candidates, totalling 36 fits
[CV] kernel=rbf, C=1, gamma=0.001 ....................................
[CV]  kernel=rbf, C=1, gamma=0.001, score=0.8377081145942703, total= 3.8min
[CV] kernel=rbf, C=1, gamma=0.001 ....................................


[Parallel(n_jobs=1)]: Done   1 out of   1 | elapsed:  6.2min remaining:    0.0s


[CV]  kernel=rbf, C=1, gamma=0.001, score=0.8203089845507725, total= 3.9min
[CV] kernel=rbf, C=1, gamma=0.001 ....................................


[Parallel(n_jobs=1)]: Done   2 out of   2 | elapsed: 12.5min remaining:    0.0s


[CV]  kernel=rbf, C=1, gamma=0.001, score=0.828982898289829, total= 4.0min
[CV] kernel=rbf, C=1, gamma=0.0001 ...................................


[Parallel(n_jobs=1)]: Done   3 out of   3 | elapsed: 18.9min remaining:    0.0s


[CV]  kernel=rbf, C=1, gamma=0.0001, score=0.7234138293085346, total= 5.4min
[CV] kernel=rbf, C=1, gamma=0.0001 ...................................


[Parallel(n_jobs=1)]: Done   4 out of   4 | elapsed: 27.6min remaining:    0.0s


[CV]  kernel=rbf, C=1, gamma=0.0001, score=0.7195140242987851, total= 5.5min
[CV] kernel=rbf, C=1, gamma=0.0001 ...................................


[Parallel(n_jobs=1)]: Done   5 out of   5 | elapsed: 37.1min remaining:    0.0s


[CV]  kernel=rbf, C=1, gamma=0.0001, score=0.7145214521452146, total= 5.8min
[CV] kernel=rbf, C=10, gamma=0.001 ...................................


[Parallel(n_jobs=1)]: Done   6 out of   6 | elapsed: 46.4min remaining:    0.0s


[CV]  kernel=rbf, C=10, gamma=0.001, score=0.848357582120894, total= 3.1min
[CV] kernel=rbf, C=10, gamma=0.001 ...................................


[Parallel(n_jobs=1)]: Done   7 out of   7 | elapsed: 51.3min remaining:    0.0s


[CV]  kernel=rbf, C=10, gamma=0.001, score=0.84040797960102, total= 2.9min
[CV] kernel=rbf, C=10, gamma=0.001 ...................................


[Parallel(n_jobs=1)]: Done   8 out of   8 | elapsed: 55.8min remaining:    0.0s


[CV]  kernel=rbf, C=10, gamma=0.001, score=0.8484848484848485, total= 2.8min
[CV] kernel=rbf, C=10, gamma=0.0001 ..................................


[Parallel(n_jobs=1)]: Done   9 out of   9 | elapsed: 60.3min remaining:    0.0s


[CV]  kernel=rbf, C=10, gamma=0.0001, score=0.8393580320983951, total= 3.9min
[CV] kernel=rbf, C=10, gamma=0.0001 ..................................
[CV]  kernel=rbf, C=10, gamma=0.0001, score=0.8236088195590221, total=11.2min
[CV] kernel=rbf, C=10, gamma=0.0001 ..................................
[CV]  kernel=rbf, C=10, gamma=0.0001, score=0.8294329432943295, total= 3.6min
[CV] kernel=rbf, C=100, gamma=0.001 ..................................
[CV]  kernel=rbf, C=100, gamma=0.001, score=0.847907604619769, total= 2.7min
[CV] kernel=rbf, C=100, gamma=0.001 ..................................
[CV]  kernel=rbf, C=100, gamma=0.001, score=0.839658017099145, total= 2.6min
[CV] kernel=rbf, C=100, gamma=0.001 ..................................
[CV]  kernel=rbf, C=100, gamma=0.001, score=0.848034803480348, total= 2.7min
[CV] kernel=rbf, C=100, gamma=0.0001 .................................
[CV]  kernel=rbf, C=100, gamma=0.0001, score=0.8467076646167692, total= 2.5min
[CV] kernel=rbf, C=100, gamma=

In [None]:
print('hi')

In [None]:
print("GridSearchCV took %.2f seconds for %d candidate parameter settings."
      % (time() - start, len(grid_search.cv_results_['params'])))
report(grid_search.cv_results_)