In [1]:
#Importing the dataset and the train_test_split() method
import time
from sklearn.datasets import load_digits
from sklearn.model_selection import train_test_split

## 1. SVM with GridSearch 

In [2]:
from sklearn.svm import SVC
from sklearn.model_selection import GridSearchCV
from sklearn.metrics import classification_report

In [3]:
#Loading the dataset and splitting it to train, test
data = load_digits()
X = data.data
Y = data.target

X_train, X_test, y_train, y_test = train_test_split(X, Y, test_size = 0.2, random_state = 42)

In [4]:
#Setting the parameters by cross validation
tuned_parameters = [{'kernel':['rbf','linear'], 'gamma':[1e-3, 1e-4], 'C':[1,10,100,1000]}]

In [5]:
scores = ['precision', 'recall']

In [6]:
def grid_search(X_train, y_train, X_test, y_test, tuned_params, metric):
    start_time = time.time()
    print("# Tuning hyper parameters for %s" % metric)
    clf = GridSearchCV(SVC(), tuned_params, scoring='%s_macro' % metric)
    clf.fit(X_train, y_train)
    print("Best parameters found ")
    print(clf.best_params_)
    print("Grid scores on development set: ")
    print(clf.best_estimator_)
    print()
    means = clf.cv_results_['mean_test_score']
    stds = clf.cv_results_['std_test_score']
    for mean, std, params in zip(means, stds, clf.cv_results_['params']):
        print("%0.3f (+/-%0.03f) for %r" % (mean, std*2, params))
        
    print("Classification report")
    y_true, y_pred = y_test, clf.predict(X_test)
    print(classification_report(y_true, y_pred))
    print("--- %s seconds ---" % (time.time() - start_time))
    

In [7]:
# Grid Search using the precision only
import warnings
warnings.filterwarnings("ignore")
grid_search(X_train, y_train, X_test, y_test, tuned_parameters, scores[0])

# Tuning hyper parameters for precision
Best parameters found 
{'C': 10, 'gamma': 0.001, 'kernel': 'rbf'}
Grid scores on development set: 
SVC(C=10, gamma=0.001)

0.988 (+/-0.008) for {'C': 1, 'gamma': 0.001, 'kernel': 'rbf'}
0.972 (+/-0.016) for {'C': 1, 'gamma': 0.001, 'kernel': 'linear'}
0.971 (+/-0.018) for {'C': 1, 'gamma': 0.0001, 'kernel': 'rbf'}
0.972 (+/-0.016) for {'C': 1, 'gamma': 0.0001, 'kernel': 'linear'}
0.990 (+/-0.007) for {'C': 10, 'gamma': 0.001, 'kernel': 'rbf'}
0.972 (+/-0.016) for {'C': 10, 'gamma': 0.001, 'kernel': 'linear'}
0.983 (+/-0.012) for {'C': 10, 'gamma': 0.0001, 'kernel': 'rbf'}
0.972 (+/-0.016) for {'C': 10, 'gamma': 0.0001, 'kernel': 'linear'}
0.990 (+/-0.007) for {'C': 100, 'gamma': 0.001, 'kernel': 'rbf'}
0.972 (+/-0.016) for {'C': 100, 'gamma': 0.001, 'kernel': 'linear'}
0.985 (+/-0.006) for {'C': 100, 'gamma': 0.0001, 'kernel': 'rbf'}
0.972 (+/-0.016) for {'C': 100, 'gamma': 0.0001, 'kernel': 'linear'}
0.990 (+/-0.007) for {'C': 1000, 'gamma': 0.0

In [8]:
grid_search(X_train, y_train, X_test, y_test, tuned_parameters, scores[1])

# Tuning hyper parameters for recall
Best parameters found 
{'C': 10, 'gamma': 0.001, 'kernel': 'rbf'}
Grid scores on development set: 
SVC(C=10, gamma=0.001)

0.987 (+/-0.008) for {'C': 1, 'gamma': 0.001, 'kernel': 'rbf'}
0.971 (+/-0.017) for {'C': 1, 'gamma': 0.001, 'kernel': 'linear'}
0.969 (+/-0.020) for {'C': 1, 'gamma': 0.0001, 'kernel': 'rbf'}
0.971 (+/-0.017) for {'C': 1, 'gamma': 0.0001, 'kernel': 'linear'}
0.989 (+/-0.008) for {'C': 10, 'gamma': 0.001, 'kernel': 'rbf'}
0.971 (+/-0.017) for {'C': 10, 'gamma': 0.001, 'kernel': 'linear'}
0.982 (+/-0.014) for {'C': 10, 'gamma': 0.0001, 'kernel': 'rbf'}
0.971 (+/-0.017) for {'C': 10, 'gamma': 0.0001, 'kernel': 'linear'}
0.989 (+/-0.008) for {'C': 100, 'gamma': 0.001, 'kernel': 'rbf'}
0.971 (+/-0.017) for {'C': 100, 'gamma': 0.001, 'kernel': 'linear'}
0.984 (+/-0.007) for {'C': 100, 'gamma': 0.0001, 'kernel': 'rbf'}
0.971 (+/-0.017) for {'C': 100, 'gamma': 0.0001, 'kernel': 'linear'}
0.989 (+/-0.008) for {'C': 1000, 'gamma': 0.001,

## 2. Auto-SKlearn

In [9]:
import autosklearn.classification as automl
from sklearn.metrics import accuracy_score
import warnings 
warnings.filterwarnings("ignore")

In [10]:
clf = automl.AutoSklearnClassifier(time_left_for_this_task = 1*60, per_run_time_limit = 30, n_jobs = 1)

In [11]:
start_time = time.time()
clf.fit(X_train, y_train)
print("--- %s seconds ---" % (time.time() - start_time))

--- 50.90537619590759 seconds ---


In [12]:
print(clf.sprint_statistics())

auto-sklearn results:
  Dataset name: ccccb2c0-b36e-11ec-8059-54e1adb09d4d
  Metric: accuracy
  Best validation score: 0.985263
  Number of target algorithm runs: 8
  Number of successful target algorithm runs: 6
  Number of crashed target algorithm runs: 0
  Number of target algorithms that exceeded the time limit: 2
  Number of target algorithms that exceeded the memory limit: 0



In [13]:
predicitions = clf.predict(X_test)

In [14]:
accuracy_score(y_test, predicitions)

0.9777777777777777