In [4]:
!pip install pandas
!pip install matplotlib
!pip install numpy
!pip install scikit-learn
!pip install ConfigSpace



In [6]:
import pandas as pd
import scipy.stats as stats
from time import time
import numpy as np
import typing
import ConfigSpace

import sklearn.model_selection
from sklearn import svm
from sklearn.model_selection import GridSearchCV, RandomizedSearchCV, train_test_split
from sklearn.datasets import fetch_openml
from sklearn.neural_network import MLPClassifier, MLPRegressor

from matplotlib import pyplot as plt

np.random.seed(0)

In [7]:
def stock_tuning(X_train, y_train, model):
    model.fit(X_train, y_train)
    return model

In [8]:
def random_search(X_train, y_train, model, param_dist, n_iter):
    random_search = RandomizedSearchCV(model, param_distributions=param_dist, n_iter=n_iter)
    start = time()
    random_search.fit(X_train, y_train)
    return random_search, time()-start

In [9]:
def grid_search(X_train, y_train, model, param_grid):
    grid_search = GridSearchCV(model, param_grid=param_grid)
    start = time()
    grid_search.fit(X_train, y_train)
    return grid_search, time()-start

In [15]:
def sample_configurations_svc(n_configurations):
        cs = ConfigSpace.ConfigurationSpace('sklearn.svm.SVC', 1)

        C = ConfigSpace.UniformFloatHyperparameter(
            name='C', lower=1, upper=1000, log=True, default_value=1.0)
        gamma = ConfigSpace.UniformFloatHyperparameter(
            name='gamma', lower=1e-05, upper=1, log=True, default_value=0.1)
        cs.add_hyperparameters([C, gamma])

        configs = np.array([(configuration['gamma'],
                            configuration['C'])
                        for configuration in cs.sample_configuration(n_configurations)])
    
        return configs

In [22]:
def acc_score(model, X_test, y_test):
    return model.score(X_test, y_test)

In [23]:
def smbo_optimize(theta_new, model):
    # Optimize SVC
    model.set_params(kernel='rbf', gamma=theta_new[0], C=theta_new[1])
    model.fit(X_train, y_train)
    
def smbo_search(X_train, y_train, model, smbo_config_space, iter_ = 100):
    smbo = SequentialModelBasedOptimization()
    start = time()
    smbo.initialize(sample_initial_configurations(10))
    for i in range(iter_):
        smbo.fit_model()
        theta_new = smbo.select_configuration(sample_configurations_svc(64))
        
        
        
        performance = model.score(X_test, y_test)
        
        smbo.update_runs((theta_new, performance))
    
    
    end = time() - start
    _, best_config = smbo.return_best_configuration()
    model.set_params(kernel='rbf', gamma=best_config[0], C=best_config[1])
    return model, end
    

In [26]:
def evaluate_svc(
    datasets_ids, 
    random_search_params = None,
    grid_search_params = None,
    smbo_config_space = None
):
    results = pd.DataFrame(columns=["Dataset", "Stock", "Grid-Search", "Random-Search", "SMBO"])
    for id_ in datasets_ids:
        # Fetch dataset
        dataset = fetch_openml(data_id=id_, as_frame=True, parser="pandas")
        
        X = pd.DataFrame(data=dataset.data, columns=dataset.feature_names)  
        y = dataset.target
        
        # Split dataset in Training-Test
        X_train, X_test, y_train, y_test = train_test_split(X, y, random_state=0, test_size=0.3)
        
        # Initilize Support Vector Classifiers
        svc1, svc2, svc3, svc4 = svm.SVC(), svm.SVC(), svm.SVC(), svm.SVC()
        
        # No-tuning
        stock_svc, stock_time = stock_tuning(X_train, y_train, svc1)
        stock_acc = acc_score(stock_svc, X_test, y_test)
        
        # Random-search tuning
        default_random_search_params = {
            "kernel": ['rbf'],
            "gamma": stats.uniform(1e-1, 1e-5),
            "C": stats.uniform(1, 1000)
        }
        rand_svc, rand_time = random_search(X_train, y_train, svc2, 
                                            default_random_search_params if not random_search_params else random_search_params, 
                                            100)
        rand_acc = acc_score(rand_svc, X_test, y_test)
        
        # Grid-search tuning
        default_grid_search_params = {
            "kernel": ['rbf'],
            "gamma": [1e-1, 1e-5],
            "C": range(1,1000,20)
        }
        grid_svc, grid_time = grid_search(X_train, y_train, svc3, 
                                          default_grid_search_params if not grid_search_params else grid_search_params)
        grid_acc = acc_score(grid_svc, X_test, y_test)
        
        # SMBO Tuning
        smbo_config_space = [((gamma, C), optimizee(gamma, C)) for gamma, C in sample_configurations_svc(64)] if not smbo_config_space else smbo_config_space
        smbo_svc, smbo_time = smbo_search(X_train, y_train, svc4, smbo_config_space)
        smbo_acc = acc_score(smbo_svc, X_test, y_test)
        
        results.append({
            "Dataset": id_,
            "Stock": stock_acc,
            "Grid-Search": grid_acc,
            "Random-Search": rand_acc,
            "SMBO": smbo_acc
        })
        
    return results

In [27]:
evaluate_svc([1464])

NameError: name 'optimizee' is not defined

In [None]:
def evaluate_svr(datasets_ids):
    for dataset in datasets:
        X = pd.DataFrame(data=bunch_dataset.data, columns=bunch_dataset.feature_names)  
        y = bunch_dataset.target
        
        X_train, X_test, y_train, y_test = train_test_split(X, y, random_state=0, test_size=0.3)
        
        # Initialize Support Vector Regressor
        svr1, svr2, svr3 = svm.SVR(), svm.SVR(), svm.SVR()

In [None]:
def evaluate_mlpc(datasets_ids):
    for dataset in datasets:
        
        X = pd.DataFrame(data=bunch_dataset.data, columns=bunch_dataset.feature_names)  
        y = bunch_dataset.target
        
        X_train, X_test, y_train, y_test = train_test_split(X, y, random_state=0, test_size=0.3)
        
        # Initilize Multi-Layer Perceptron Classifier
        mlpc1 = MLPClassifier(random_state=1, max_iter=200)
        mlpc2 = MLPClassifier(random_state=1, max_iter=200)
        mlpc3 = MLPClassifier(random_state=1, max_iter=200)

In [None]:
def evaluate_mlpr(datasets_ids):
    for dataset in datasets:
        X = pd.DataFrame(data=bunch_dataset.data, columns=bunch_dataset.feature_names)  
        y = bunch_dataset.target
        
        X_train, X_test, y_train, y_test = train_test_split(X, y, random_state=0, test_size=0.3)
        
        # Initilize Multi-Layer Perceptron Regressor
        mlpr1 = MLPRegressor(random_state=1, max_iter=200)
        mlpr2 = MLPRegressor(random_state=1, max_iter=200)
        mlpr3 = MLPRegressor(random_state=1, max_iter=200)