# Automated Machine Learning from Scratch

Group 18 Members:

- Clara Pichler, 11917694
- Hannah Knapp, 11901857 
- Sibel Toprakkiran, 09426341

### Overview

1. Data Set Splitting and Preprocessing

2. generate neighborhood
- `generate_neighborhood(self, current_solution)`

3. create model
- `create_model(self, solution)`

4. simulated annealing
- `simulated_annealing(self)`

5. Comparison with two state of the art AutoML systems
- auto-sklearn 
- TPOT

6. Evaluation
- Iris Dataset
- Congressional Voting Dataset
- gym session tracking Dataset
- Abalone Data set

In [137]:
from sklearn import datasets
import numpy as np
import pandas as pd
import matplotlib.pyplot as plt
from sklearn.model_selection import train_test_split
from sklearn.metrics import accuracy_score, confusion_matrix, classification_report
from sklearn.neighbors import KNeighborsClassifier
from sklearn.neural_network import MLPClassifier
from sklearn.ensemble import RandomForestClassifier, AdaBoostClassifier
from sklearn.svm import SVC
import time
from sklearn.dummy import DummyClassifier
from sklearn.tree import DecisionTreeClassifier
from sklearn.preprocessing import OneHotEncoder

## Data sets

- Iris Dataset
- Congressional Voting Dataset
- gym session tracking Dataset
- Abalone Data set

In [138]:
iris = datasets.load_iris()
iris_data = pd.DataFrame(data= np.c_[iris['data'], iris['target']], columns= iris['feature_names'] + ['target'])
iris_data['target'] = iris_data['target'].map({0: 'setosa', 1: 'versicolor', 2: 'virginica'})

df_voting = pd.read_csv('data/CongressionalVotingID.shuf.lrn.csv')

df_gym = pd.read_csv('data/gym_members_exercise_tracking.csv')

url='./data/abalone.csv'
column_names = ["Sex", "Length", "Diameter", "Height", "Whole_weight", "Shucked_weight", "Viscera_weight", "Shell_weight", "Rings"]
abalone_df = pd.read_csv(url, header=0, names=column_names)

### Pre-processing

In [139]:
pd.set_option('future.no_silent_downcasting', True)
df_voting = df_voting.replace({"democrat": 0,"republican": 1,"n": 0,"y": 1,"unknown": np.nan}).infer_objects()
df_voting = df_voting.drop(columns=['ID'])

from sklearn.experimental import enable_iterative_imputer 
from sklearn.impute import IterativeImputer

imp = IterativeImputer(max_iter=10, random_state=0)
df_voting = pd.DataFrame(imp.fit_transform(df_voting), columns=df_voting.columns)
display(df_voting)




Unnamed: 0,class,handicapped-infants,water-project-cost-sharing,adoption-of-the-budget-resolution,physician-fee-freeze,el-salvador-aid,religious-groups-in-schools,anti-satellite-test-ban,aid-to-nicaraguan-contras,mx-missile,immigration,synfuels-crporation-cutback,education-spending,superfund-right-to-sue,crime,duty-free-exports,export-administration-act-south-africa
0,0.0,1.000000,0.000000,1.00000,0.000000,1.000000,1.000000,1.000000,0.000000,1.0,1.000000,0.000000,0.000000,1.000000,1.000000,0.000000,0.896577
1,0.0,0.000000,0.000000,1.00000,0.000000,1.000000,1.000000,0.000000,0.000000,0.0,1.000000,1.000000,1.000000,1.000000,1.000000,0.000000,1.000000
2,0.0,1.000000,0.000000,1.00000,0.000000,0.000000,0.000000,1.000000,1.000000,1.0,0.000000,0.000000,0.000000,0.000000,0.000000,1.000000,0.996577
3,1.0,0.000000,0.000000,0.00000,1.000000,1.000000,0.000000,0.000000,0.000000,0.0,0.000000,0.000000,1.000000,0.000000,1.000000,0.268099,1.000000
4,0.0,1.000000,1.000000,1.00000,0.000000,0.000000,1.000000,0.829126,1.000000,1.0,0.000000,1.000000,0.000000,1.000000,0.000000,1.000000,1.000000
...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...
213,0.0,0.000000,0.000000,1.00000,0.000000,0.000000,0.000000,1.000000,1.000000,1.0,0.000000,0.000000,0.074008,0.000000,0.000000,1.000000,1.000000
214,0.0,0.692818,0.433696,0.97112,0.029204,0.060749,0.339717,0.923257,0.963781,1.0,0.442077,0.419345,0.073561,0.255977,0.272106,0.698756,0.996174
215,1.0,0.000000,0.000000,0.00000,1.000000,1.000000,1.000000,0.000000,0.000000,0.0,1.000000,0.000000,1.000000,1.000000,1.000000,0.000000,0.000000
216,1.0,1.000000,0.000000,0.00000,1.000000,1.000000,1.000000,0.000000,0.000000,0.0,1.000000,0.000000,1.000000,1.000000,1.000000,0.000000,0.000000


### test-validation-train split

In [140]:
X_iris = iris_data.drop(['target'], axis=1)
y_iris = iris_data['target']

X_train_iris, X_temp, y_train_iris, y_temp = train_test_split(X_iris, y_iris, test_size=0.4, random_state=42)
X_val_iris, X_test_iris, y_val_iris, y_test_iris = train_test_split(X_temp, y_temp, test_size=0.5, random_state=42)

In [141]:
X_voting = df_voting.drop(['class'], axis=1)
y_voting = df_voting['class']

X_train_voting, X_temp, y_train_voting, y_temp = train_test_split(X_voting, y_voting, test_size=0.4, random_state=42)
X_val_voting, X_test_voting, y_val_voting, y_test_voting = train_test_split(X_temp, y_temp, test_size=0.5, random_state=42)

In [142]:
X_gym = df_gym.drop(['Gender'], axis=1)
y_gym = df_gym['Gender']

X_train_gym, X_temp, y_train_gym, y_temp = train_test_split(X_gym, y_gym, test_size=0.4, random_state=42)
X_val_gym, X_test_gym, y_val_gym, y_test_gym = train_test_split(X_temp, y_temp, test_size=0.5, random_state=42)

In [143]:
X_abalone = abalone_df.drop(['Sex'], axis=1)
y_abalone = abalone_df['Sex']

X_train_abalone, X_temp, y_train_abalone, y_temp = train_test_split(X_abalone, y_abalone, test_size=0.4, random_state=42)
X_val_abalone, X_test_abalone, y_val_abalone, y_test_abalone = train_test_split(X_temp, y_temp, test_size=0.5, random_state=42)

## Models
- MLP
- RF
- KNN 
- SVM
- AdaBoost

## AutoML algorithm

In [144]:
class AutoML_18:
    def __init__(self, initial_temp=100, cooling_rate=0.99, max_iterations=100, min_training_time=3600):
        self.initial_temp = initial_temp
        self.cooling_rate = cooling_rate
        self.max_iterations = max_iterations
        self.min_training_time = min_training_time
        
        self.algorithms = {
            'DecisionTreeClassifier': {
                'class': DecisionTreeClassifier,
                'parameters': ["max_depth", "min_samples_split", "max_features", "criterion"],
                'values': [[5, 10, 15], [2, 3, 3, 4], ['sqrt', 'log2', None], ['gini', 'log_loss', 'entropy']]
            },
            "MLP": {
                "class": MLPClassifier,
                "parameters": ["max_iter", "activation", "solver", "alpha"],
                "values": [[1000, 2000, 3000], ['relu', 'tanh', 'logistic'], ['adam', 'sgd'], [0.0001, 0.001, 0.01]]
            },
            "RF": {
                "class": RandomForestClassifier,
                "parameters": ["n_estimators", "max_depth", "min_samples_split", "max_features", "criterion"],
                "values": [[10, 25, 50, 100, 150], [5, 10, 15], [2, 3, 3, 4], ['sqrt', 'log2', None], ['gini', 'log_loss', 'entropy']]
            },
            "KNN": {
                "class": KNeighborsClassifier,
                "parameters": ["n_neighbors", "weights", "algorithm", "leaf_size"],
                "values": [[3, 5, 7, 9, 11], ['uniform', 'distance'], ['auto', 'ball_tree', 'kd_tree', 'brute'], [10, 20, 30, 40, 50]]
            },
            "SVM": {
                "class": SVC,
                "parameters": ["C", "kernel", "gamma"],
                "values": [[1, 10, 100, 1000], ['linear', 'poly', 'rbf', 'sigmoid'], ['scale', 'auto']]
            },
            "AdaBoost": {
                "class": AdaBoostClassifier,
                "parameters": ["n_estimators", "learning_rate"],
                "values": [[10, 25, 50, 100, 150], [0.1, 0.5, 1, 1.5, 2]]
            },
        }
        self.best_solution = None
        self.best_score = 0
        self.model = None
        
    def eval(self, model, X_train, y_train, X_val, y_val):
        model.fit(X_train, y_train) 
        predictions = model.predict(X_val)  
        accuracy = accuracy_score(y_val, predictions) 
        print(f'Accuracy: {accuracy:.4f}')
        return accuracy

    def generate_neighborhood(self, current_solution):
        
        algorithm_name = current_solution[0]
        algorithm_info = self.algorithms[algorithm_name]
        new_solution = current_solution[:]
        
        if not algorithm_info['parameters']:
            new_solution[0] = np.random.choice(list(self.algorithms.keys()))
            return new_solution
    
        while len(new_solution) < len(algorithm_info['parameters']) + 1:
            param_index = len(new_solution) - 1
            new_solution.append(np.random.choice(algorithm_info['values'][param_index]))

            
        param_idx = np.random.randint(1, len(new_solution))
        new_solution[param_idx] = np.random.choice(algorithm_info['values'][param_idx - 1])
        
        if np.random.rand() < 0.1:
    
            new_solution[0] = np.random.choice(list(self.algorithms.keys()))
            algorithm_info = self.algorithms[new_solution[0]]
            
            new_solution = [new_solution[0]] + [
                np.random.choice(values) for values in algorithm_info["values"]
            ]

        print(f"Generated neighborhood for algorithm: {new_solution[0]}, parameters: {new_solution[1:]}")
        return new_solution

    def create_model(self, solution):
        algorithm_name = solution[0]
        hyperparameters = solution[1:]
        algorithm_info = self.algorithms[algorithm_name]
        algorithm_class = algorithm_info['class']
        
        if algorithm_name == 'DecisionTreeClassifier':
            return algorithm_class(
                max_depth=hyperparameters[0],
                min_samples_split=hyperparameters[1],
                max_features=hyperparameters[2],
                criterion=hyperparameters[3]
            )
            
        elif algorithm_name == 'MLP':
            return algorithm_class(
                max_iter=hyperparameters[0],
                activation=hyperparameters[1],
                solver=hyperparameters[2],
                alpha=hyperparameters[3]
            )
        elif algorithm_name == 'RF':
            return algorithm_class(
                n_estimators=hyperparameters[0],
                max_depth=hyperparameters[1],
                min_samples_split=hyperparameters[2],
                max_features=hyperparameters[3],
                criterion=hyperparameters[4]
            )
        elif algorithm_name == 'KNN':
            return algorithm_class(
                n_neighbors=hyperparameters[0],
                weights=hyperparameters[1],
                algorithm=hyperparameters[2],
                leaf_size=hyperparameters[3]
            )
        elif algorithm_name == 'SVM':
            return algorithm_class(
                C=hyperparameters[0],
                kernel=hyperparameters[1],
                gamma=hyperparameters[2]
            )
        elif algorithm_name == 'AdaBoost':
            return algorithm_class(
                n_estimators=hyperparameters[0],
                learning_rate=hyperparameters[1],
            )

    def fit(self, X_train, y_train, X_val, y_val):
        self.X_train = X_train
        self.y_train = y_train
        self.X_val = X_val
        self.y_val = y_val
        self.simulated_annealing()

    def predict(self, X):
        if self.model is None:
            raise ValueError("The model has not been fit yet. Please call the fit method first.")
        return self.model.predict(X)
    
    def simulated_annealing(self):
        start_time = time.time()  
        # 0 rule model as initial model as base model
        zero_r_model = DummyClassifier(strategy='most_frequent')
        #zero_r_model.fit(self.X, self.y) 

        print(f"Initial model: DummyClassifier")
        print(f"Initial parameters: (strategy='most_frequent')")

        current_solution = ['DummyClassifier']
        current_score = self.eval(zero_r_model, self.X_train, self.y_train, self.X_val, self.y_val)
        best_solution = current_solution
        best_score = current_score
    
        temperature = self.initial_temp
    
        while time.time() - start_time < self.min_training_time:
            for i in range(100):
                if i % 10 == 0:
                    print(f"Iteration {i}, Temperature {temperature:.3f}, Best Evaluation {best_score:.5f}")
                    
                if current_solution[0] == 'DummyClassifier':
                    new_solution = self.generate_neighborhood(['DecisionTreeClassifier'])
                    new_score = self.eval(self.create_model(new_solution), self.X_train, self.y_train, self.X_val, self.y_val)
                else:
                    new_solution = self.generate_neighborhood(current_solution)
                    new_score = self.eval(self.create_model(new_solution), self.X_train, self.y_train, self.X_val, self.y_val)
    
                if new_score > current_score:
                    current_solution = new_solution
                    current_score = new_score
                    if new_score > best_score:
                        best_solution = new_solution
                        best_score = new_score
                else:
                    acceptance_probability = np.exp((new_score - current_score) / temperature)
                    if np.random.rand() < acceptance_probability:
                        current_solution = new_solution
                        current_score = new_score
    
            temperature *= self.cooling_rate
    
        self.best_solution = best_solution
        self.best_score = best_score
        self.model = self.create_model(best_solution)
        self.model.fit(self.X_train, self.y_train)
        print(f'best_score is {best_score}')
        print(f'best_solution is {best_solution}')

## Sklearn Automated Machine Learning Algorithm

## TPOT Automated Machine Learning Algorithm

## Evaluation

### Our Auto ML Iris

In [145]:
automl = AutoML_18(min_training_time=60)

print("Fitting the AutoML algorithm")
automl.fit(X_train_iris, y_train_iris, X_val_iris, y_val_iris)

print("\nEvaluating on the test data")
predictions = automl.predict(X_test_iris)

test_accuracy = accuracy_score(y_test_iris, predictions)
print(f"Test Accuracy: {test_accuracy:.4f}")
print("\nClassification Report:")
print(classification_report(y_test_iris, predictions))

Fitting the AutoML algorithm
Initial model: DummyClassifier
Initial parameters: (strategy='most_frequent')
Accuracy: 0.4000
Iteration 0, Temperature 100.000, Best Evaluation 0.40000
Generated neighborhood for algorithm: DecisionTreeClassifier, parameters: [10, 4, 'sqrt', 'log_loss']
Accuracy: 1.0000
Generated neighborhood for algorithm: DecisionTreeClassifier, parameters: [10, 4, 'sqrt', 'log_loss']
Accuracy: 1.0000
Generated neighborhood for algorithm: DecisionTreeClassifier, parameters: [10, 4, 'sqrt', 'log_loss']
Accuracy: 1.0000
Generated neighborhood for algorithm: DecisionTreeClassifier, parameters: [10, 3, 'sqrt', 'log_loss']
Accuracy: 1.0000
Generated neighborhood for algorithm: DecisionTreeClassifier, parameters: [10, 3, 'sqrt', 'log_loss']
Accuracy: 1.0000
Generated neighborhood for algorithm: DecisionTreeClassifier, parameters: [10, 2, 'sqrt', 'log_loss']
Accuracy: 1.0000
Generated neighborhood for algorithm: DecisionTreeClassifier, parameters: [5, 2, 'sqrt', 'log_loss']
Acc



Accuracy: 0.9333
Generated neighborhood for algorithm: MLP, parameters: [1000, 'logistic', 'sgd', 0.0001]
Accuracy: 0.9000
Generated neighborhood for algorithm: MLP, parameters: [1000, 'logistic', 'sgd', 0.0001]




Accuracy: 0.9333
Generated neighborhood for algorithm: MLP, parameters: [1000, 'relu', 'sgd', 0.0001]
Accuracy: 1.0000
Generated neighborhood for algorithm: MLP, parameters: [1000, 'relu', 'adam', 0.0001]




Accuracy: 1.0000
Generated neighborhood for algorithm: MLP, parameters: [1000, 'relu', 'adam', 0.0001]
Accuracy: 1.0000
Generated neighborhood for algorithm: MLP, parameters: [1000, 'relu', 'adam', 0.0001]
Accuracy: 1.0000
Generated neighborhood for algorithm: MLP, parameters: [1000, 'relu', 'adam', 0.0001]
Accuracy: 1.0000
Iteration 20, Temperature 98.010, Best Evaluation 1.00000
Generated neighborhood for algorithm: MLP, parameters: [1000, 'relu', 'adam', 0.01]
Accuracy: 1.0000
Generated neighborhood for algorithm: MLP, parameters: [1000, 'relu', 'sgd', 0.01]




Accuracy: 1.0000
Generated neighborhood for algorithm: MLP, parameters: [1000, 'relu', 'sgd', 0.01]
Accuracy: 1.0000
Generated neighborhood for algorithm: MLP, parameters: [1000, 'relu', 'sgd', 0.01]




Accuracy: 1.0000
Generated neighborhood for algorithm: MLP, parameters: [1000, 'relu', 'sgd', 0.01]
Accuracy: 1.0000
Generated neighborhood for algorithm: MLP, parameters: [1000, 'relu', 'sgd', 0.01]




Accuracy: 1.0000
Generated neighborhood for algorithm: MLP, parameters: [1000, 'relu', 'sgd', 0.001]
Accuracy: 1.0000
Generated neighborhood for algorithm: MLP, parameters: [1000, 'relu', 'sgd', 0.001]




Accuracy: 1.0000
Generated neighborhood for algorithm: MLP, parameters: [1000, 'tanh', 'sgd', 0.001]
Accuracy: 1.0000
Generated neighborhood for algorithm: MLP, parameters: [1000, 'tanh', 'sgd', 0.001]




Accuracy: 1.0000
Iteration 30, Temperature 98.010, Best Evaluation 1.00000
Generated neighborhood for algorithm: MLP, parameters: [1000, 'logistic', 'sgd', 0.001]
Accuracy: 0.9333
Generated neighborhood for algorithm: MLP, parameters: [1000, 'logistic', 'sgd', 0.001]




Accuracy: 0.9333
Generated neighborhood for algorithm: MLP, parameters: [1000, 'logistic', 'sgd', 0.001]
Accuracy: 0.9667
Generated neighborhood for algorithm: MLP, parameters: [2000, 'logistic', 'sgd', 0.001]




Accuracy: 1.0000
Generated neighborhood for algorithm: MLP, parameters: [1000, 'logistic', 'sgd', 0.001]
Accuracy: 0.9333
Generated neighborhood for algorithm: MLP, parameters: [1000, 'logistic', 'sgd', 0.0001]




Accuracy: 0.9333
Generated neighborhood for algorithm: MLP, parameters: [1000, 'logistic', 'sgd', 0.01]
Accuracy: 0.9667
Generated neighborhood for algorithm: MLP, parameters: [1000, 'logistic', 'sgd', 0.001]




Accuracy: 0.9333
Generated neighborhood for algorithm: MLP, parameters: [1000, 'logistic', 'sgd', 0.001]
Accuracy: 0.9333
Generated neighborhood for algorithm: MLP, parameters: [1000, 'logistic', 'sgd', 0.001]




Accuracy: 0.9000
Iteration 40, Temperature 98.010, Best Evaluation 1.00000
Generated neighborhood for algorithm: MLP, parameters: [3000, 'logistic', 'sgd', 0.001]
Accuracy: 1.0000
Generated neighborhood for algorithm: MLP, parameters: [3000, 'logistic', 'sgd', 0.001]
Accuracy: 1.0000
Generated neighborhood for algorithm: MLP, parameters: [3000, 'tanh', 'sgd', 0.001]
Accuracy: 1.0000
Generated neighborhood for algorithm: MLP, parameters: [3000, 'relu', 'sgd', 0.001]
Accuracy: 1.0000
Generated neighborhood for algorithm: MLP, parameters: [3000, 'tanh', 'sgd', 0.001]
Accuracy: 1.0000
Generated neighborhood for algorithm: MLP, parameters: [3000, 'tanh', 'sgd', 0.01]
Accuracy: 1.0000
Generated neighborhood for algorithm: RF, parameters: [100, 10, 4, 'sqrt', 'entropy']
Accuracy: 1.0000
Generated neighborhood for algorithm: RF, parameters: [100, 10, 4, 'sqrt', 'entropy']
Accuracy: 1.0000
Generated neighborhood for algorithm: RF, parameters: [100, 10, 3, 'sqrt', 'entropy']
Accuracy: 1.0000
Gen



Accuracy: 1.0000
Generated neighborhood for algorithm: MLP, parameters: [2000, 'tanh', 'sgd', 0.01]
Accuracy: 1.0000
Generated neighborhood for algorithm: MLP, parameters: [2000, 'tanh', 'sgd', 0.01]
Accuracy: 1.0000
Generated neighborhood for algorithm: MLP, parameters: [3000, 'tanh', 'sgd', 0.01]
Accuracy: 1.0000
Generated neighborhood for algorithm: MLP, parameters: [2000, 'tanh', 'sgd', 0.01]
Accuracy: 1.0000
Generated neighborhood for algorithm: SVM, parameters: [10, 'linear', 'auto']
Accuracy: 1.0000
Generated neighborhood for algorithm: SVM, parameters: [10, 'poly', 'auto']
Accuracy: 1.0000
Generated neighborhood for algorithm: SVM, parameters: [10, 'poly', 'auto']
Accuracy: 1.0000
Generated neighborhood for algorithm: SVM, parameters: [1, 'poly', 'auto']
Accuracy: 1.0000
Iteration 10, Temperature 97.030, Best Evaluation 1.00000
Generated neighborhood for algorithm: SVM, parameters: [1, 'poly', 'auto']
Accuracy: 1.0000
Generated neighborhood for algorithm: SVM, parameters: [1, '



Accuracy: 1.0000
Generated neighborhood for algorithm: MLP, parameters: [2000, 'relu', 'sgd', 0.01]
Accuracy: 1.0000
Generated neighborhood for algorithm: DecisionTreeClassifier, parameters: [10, 3, None, 'entropy']
Accuracy: 1.0000
Generated neighborhood for algorithm: DecisionTreeClassifier, parameters: [10, 3, None, 'entropy']
Accuracy: 1.0000
Generated neighborhood for algorithm: DecisionTreeClassifier, parameters: [10, 3, None, 'entropy']
Accuracy: 1.0000
Generated neighborhood for algorithm: DecisionTreeClassifier, parameters: [15, 3, None, 'entropy']
Accuracy: 1.0000
Generated neighborhood for algorithm: AdaBoost, parameters: [100, 1.5]
Accuracy: 0.9333
Generated neighborhood for algorithm: AdaBoost, parameters: [100, 0.5]
Accuracy: 1.0000
Generated neighborhood for algorithm: RF, parameters: [150, 15, 3, 'sqrt', 'gini']
Accuracy: 1.0000
Generated neighborhood for algorithm: RF, parameters: [150, 15, 3, None, 'gini']
Accuracy: 1.0000
Iteration 40, Temperature 97.030, Best Evalua



Accuracy: 1.0000
Generated neighborhood for algorithm: MLP, parameters: [1000, 'relu', 'sgd', 0.001]
Accuracy: 1.0000
Generated neighborhood for algorithm: MLP, parameters: [1000, 'relu', 'sgd', 0.001]




Accuracy: 1.0000
Generated neighborhood for algorithm: MLP, parameters: [1000, 'logistic', 'sgd', 0.001]
Accuracy: 0.9000
Generated neighborhood for algorithm: MLP, parameters: [1000, 'logistic', 'sgd', 0.01]




Accuracy: 0.9000
Generated neighborhood for algorithm: MLP, parameters: [1000, 'tanh', 'sgd', 0.01]




Accuracy: 1.0000
Generated neighborhood for algorithm: MLP, parameters: [3000, 'tanh', 'sgd', 0.01]
Accuracy: 1.0000
Generated neighborhood for algorithm: MLP, parameters: [3000, 'tanh', 'sgd', 0.001]
Accuracy: 1.0000
Generated neighborhood for algorithm: MLP, parameters: [3000, 'tanh', 'sgd', 0.01]
Accuracy: 1.0000
Iteration 40, Temperature 95.099, Best Evaluation 1.00000
Generated neighborhood for algorithm: MLP, parameters: [3000, 'tanh', 'sgd', 0.0001]
Accuracy: 1.0000
Generated neighborhood for algorithm: MLP, parameters: [3000, 'logistic', 'sgd', 0.0001]
Accuracy: 1.0000
Generated neighborhood for algorithm: MLP, parameters: [3000, 'logistic', 'adam', 0.0001]
Accuracy: 1.0000
Generated neighborhood for algorithm: MLP, parameters: [3000, 'logistic', 'adam', 0.001]
Accuracy: 1.0000
Generated neighborhood for algorithm: MLP, parameters: [2000, 'logistic', 'adam', 0.001]
Accuracy: 1.0000
Generated neighborhood for algorithm: MLP, parameters: [2000, 'logistic', 'adam', 0.001]
Accuracy



Accuracy: 1.0000
Generated neighborhood for algorithm: MLP, parameters: [1000, 'relu', 'sgd', 0.01]
Accuracy: 1.0000
Generated neighborhood for algorithm: MLP, parameters: [1000, 'tanh', 'sgd', 0.01]




Accuracy: 1.0000
Generated neighborhood for algorithm: MLP, parameters: [1000, 'tanh', 'sgd', 0.0001]




Accuracy: 1.0000
Generated neighborhood for algorithm: MLP, parameters: [1000, 'relu', 'sgd', 0.0001]




Accuracy: 1.0000
Generated neighborhood for algorithm: MLP, parameters: [1000, 'logistic', 'sgd', 0.0001]
Accuracy: 0.9667
Generated neighborhood for algorithm: MLP, parameters: [1000, 'logistic', 'sgd', 0.0001]




Accuracy: 0.9333
Generated neighborhood for algorithm: MLP, parameters: [1000, 'logistic', 'adam', 0.0001]
Accuracy: 1.0000
Generated neighborhood for algorithm: MLP, parameters: [1000, 'logistic', 'adam', 0.0001]
Accuracy: 1.0000
Generated neighborhood for algorithm: MLP, parameters: [1000, 'logistic', 'adam', 0.001]
Accuracy: 1.0000
Iteration 80, Temperature 94.148, Best Evaluation 1.00000
Generated neighborhood for algorithm: MLP, parameters: [1000, 'tanh', 'adam', 0.001]
Accuracy: 1.0000
Generated neighborhood for algorithm: MLP, parameters: [1000, 'tanh', 'adam', 0.001]
Accuracy: 1.0000
Generated neighborhood for algorithm: MLP, parameters: [1000, 'tanh', 'adam', 0.01]
Accuracy: 1.0000
Generated neighborhood for algorithm: MLP, parameters: [1000, 'tanh', 'adam', 0.01]
Accuracy: 1.0000
Generated neighborhood for algorithm: MLP, parameters: [1000, 'tanh', 'adam', 0.001]
Accuracy: 1.0000
Generated neighborhood for algorithm: MLP, parameters: [1000, 'tanh', 'adam', 0.0001]
Accuracy: 1



Accuracy: 1.0000
Generated neighborhood for algorithm: MLP, parameters: [1000, 'tanh', 'sgd', 0.001]




Accuracy: 1.0000
Generated neighborhood for algorithm: MLP, parameters: [1000, 'tanh', 'sgd', 0.01]
Accuracy: 1.0000
Iteration 90, Temperature 94.148, Best Evaluation 1.00000
Generated neighborhood for algorithm: MLP, parameters: [1000, 'logistic', 'sgd', 0.01]




Accuracy: 0.9333
Generated neighborhood for algorithm: MLP, parameters: [1000, 'logistic', 'sgd', 0.01]
Accuracy: 0.9667
Generated neighborhood for algorithm: MLP, parameters: [3000, 'logistic', 'sgd', 0.01]
Accuracy: 1.0000
Generated neighborhood for algorithm: MLP, parameters: [3000, 'relu', 'sgd', 0.01]
Accuracy: 1.0000
Generated neighborhood for algorithm: MLP, parameters: [1000, 'relu', 'sgd', 0.01]




Accuracy: 1.0000
Generated neighborhood for algorithm: MLP, parameters: [1000, 'logistic', 'sgd', 0.01]
Accuracy: 0.9333
Generated neighborhood for algorithm: MLP, parameters: [1000, 'logistic', 'sgd', 0.0001]




Accuracy: 0.8667
Generated neighborhood for algorithm: MLP, parameters: [3000, 'logistic', 'sgd', 0.0001]
Accuracy: 1.0000
Generated neighborhood for algorithm: MLP, parameters: [3000, 'tanh', 'sgd', 0.0001]
Accuracy: 1.0000
Generated neighborhood for algorithm: MLP, parameters: [3000, 'tanh', 'sgd', 0.0001]
Accuracy: 1.0000
Iteration 0, Temperature 93.207, Best Evaluation 1.00000
Generated neighborhood for algorithm: MLP, parameters: [1000, 'tanh', 'sgd', 0.0001]
Accuracy: 1.0000
Generated neighborhood for algorithm: MLP, parameters: [1000, 'tanh', 'adam', 0.0001]




Accuracy: 1.0000
Generated neighborhood for algorithm: MLP, parameters: [1000, 'tanh', 'sgd', 0.0001]
Accuracy: 1.0000
Generated neighborhood for algorithm: MLP, parameters: [3000, 'tanh', 'sgd', 0.0001]




Accuracy: 1.0000
Generated neighborhood for algorithm: MLP, parameters: [1000, 'tanh', 'sgd', 0.0001]
Accuracy: 1.0000
Generated neighborhood for algorithm: MLP, parameters: [3000, 'tanh', 'sgd', 0.0001]




Accuracy: 1.0000
Generated neighborhood for algorithm: MLP, parameters: [3000, 'relu', 'sgd', 0.0001]
Accuracy: 1.0000
Generated neighborhood for algorithm: MLP, parameters: [3000, 'relu', 'sgd', 0.0001]
Accuracy: 1.0000
Generated neighborhood for algorithm: MLP, parameters: [3000, 'logistic', 'sgd', 0.0001]
Accuracy: 1.0000
Generated neighborhood for algorithm: MLP, parameters: [3000, 'logistic', 'adam', 0.0001]
Accuracy: 1.0000
Iteration 10, Temperature 93.207, Best Evaluation 1.00000
Generated neighborhood for algorithm: MLP, parameters: [3000, 'logistic', 'adam', 0.0001]
Accuracy: 1.0000
Generated neighborhood for algorithm: MLP, parameters: [3000, 'relu', 'adam', 0.0001]
Accuracy: 1.0000
Generated neighborhood for algorithm: MLP, parameters: [3000, 'relu', 'adam', 0.0001]
Accuracy: 1.0000
Generated neighborhood for algorithm: MLP, parameters: [3000, 'relu', 'adam', 0.001]
Accuracy: 1.0000
Generated neighborhood for algorithm: AdaBoost, parameters: [10, 1.0]
Accuracy: 1.0000
Genera



Accuracy: 1.0000
Generated neighborhood for algorithm: MLP, parameters: [2000, 'relu', 'sgd', 0.01]
Accuracy: 1.0000
Generated neighborhood for algorithm: MLP, parameters: [2000, 'tanh', 'sgd', 0.01]
Accuracy: 1.0000
Generated neighborhood for algorithm: MLP, parameters: [2000, 'tanh', 'sgd', 0.01]
Accuracy: 1.0000
Generated neighborhood for algorithm: MLP, parameters: [2000, 'relu', 'sgd', 0.01]
Accuracy: 1.0000
Iteration 30, Temperature 93.207, Best Evaluation 1.00000
Generated neighborhood for algorithm: MLP, parameters: [2000, 'logistic', 'sgd', 0.01]




Accuracy: 1.0000
Generated neighborhood for algorithm: MLP, parameters: [2000, 'logistic', 'sgd', 0.001]




Accuracy: 1.0000
Generated neighborhood for algorithm: MLP, parameters: [2000, 'logistic', 'sgd', 0.001]




Accuracy: 1.0000
Generated neighborhood for algorithm: MLP, parameters: [3000, 'logistic', 'sgd', 0.001]
Accuracy: 1.0000
Generated neighborhood for algorithm: SVM, parameters: [10, 'linear', 'auto']
Accuracy: 1.0000
Generated neighborhood for algorithm: SVM, parameters: [10, 'linear', 'scale']
Accuracy: 1.0000
Generated neighborhood for algorithm: SVM, parameters: [100, 'linear', 'scale']
Accuracy: 1.0000
Generated neighborhood for algorithm: SVM, parameters: [1, 'linear', 'scale']
Accuracy: 1.0000
Generated neighborhood for algorithm: SVM, parameters: [100, 'linear', 'scale']
Accuracy: 1.0000
Generated neighborhood for algorithm: SVM, parameters: [100, 'linear', 'auto']
Accuracy: 1.0000
Iteration 40, Temperature 93.207, Best Evaluation 1.00000
Generated neighborhood for algorithm: SVM, parameters: [100, 'linear', 'auto']
Accuracy: 1.0000
Generated neighborhood for algorithm: SVM, parameters: [1, 'linear', 'auto']
Accuracy: 1.0000
Generated neighborhood for algorithm: SVM, parameters:



Accuracy: 1.0000
Generated neighborhood for algorithm: MLP, parameters: [2000, 'logistic', 'sgd', 0.001]




Accuracy: 1.0000
Iteration 70, Temperature 93.207, Best Evaluation 1.00000
Generated neighborhood for algorithm: MLP, parameters: [3000, 'logistic', 'sgd', 0.001]
Accuracy: 1.0000
Generated neighborhood for algorithm: MLP, parameters: [3000, 'logistic', 'sgd', 0.01]
Accuracy: 1.0000
Generated neighborhood for algorithm: MLP, parameters: [1000, 'logistic', 'sgd', 0.01]
Accuracy: 0.9000
Generated neighborhood for algorithm: MLP, parameters: [1000, 'tanh', 'sgd', 0.01]




Accuracy: 1.0000
Generated neighborhood for algorithm: MLP, parameters: [1000, 'logistic', 'sgd', 0.01]
Accuracy: 0.9000
Generated neighborhood for algorithm: MLP, parameters: [1000, 'logistic', 'adam', 0.01]




Accuracy: 1.0000
Generated neighborhood for algorithm: MLP, parameters: [1000, 'logistic', 'adam', 0.001]
Accuracy: 1.0000
Generated neighborhood for algorithm: MLP, parameters: [1000, 'logistic', 'adam', 0.001]
Accuracy: 1.0000
Generated neighborhood for algorithm: MLP, parameters: [1000, 'logistic', 'adam', 0.001]
Accuracy: 1.0000
Generated neighborhood for algorithm: MLP, parameters: [3000, 'logistic', 'adam', 0.001]
Accuracy: 1.0000
Iteration 80, Temperature 93.207, Best Evaluation 1.00000
Generated neighborhood for algorithm: MLP, parameters: [3000, 'relu', 'adam', 0.001]
Accuracy: 1.0000
Generated neighborhood for algorithm: MLP, parameters: [3000, 'tanh', 'adam', 0.001]
Accuracy: 1.0000
Generated neighborhood for algorithm: MLP, parameters: [3000, 'tanh', 'adam', 0.001]
Accuracy: 1.0000
Generated neighborhood for algorithm: DecisionTreeClassifier, parameters: [5, 3, None, 'entropy']
Accuracy: 1.0000
Generated neighborhood for algorithm: MLP, parameters: [2000, 'relu', 'sgd', 0.0



Accuracy: 1.0000
Generated neighborhood for algorithm: MLP, parameters: [2000, 'logistic', 'sgd', 0.0001]




Accuracy: 1.0000
Generated neighborhood for algorithm: KNN, parameters: [5, 'uniform', 'ball_tree', 40]
Accuracy: 1.0000
Generated neighborhood for algorithm: KNN, parameters: [5, 'uniform', 'auto', 40]
Accuracy: 1.0000
Generated neighborhood for algorithm: DecisionTreeClassifier, parameters: [10, 3, None, 'log_loss']
Accuracy: 1.0000
Generated neighborhood for algorithm: DecisionTreeClassifier, parameters: [10, 3, None, 'log_loss']
Accuracy: 1.0000
Generated neighborhood for algorithm: DecisionTreeClassifier, parameters: [10, 3, None, 'log_loss']
Accuracy: 1.0000
Iteration 0, Temperature 92.274, Best Evaluation 1.00000
Generated neighborhood for algorithm: DecisionTreeClassifier, parameters: [10, 3, None, 'entropy']
Accuracy: 1.0000
Generated neighborhood for algorithm: DecisionTreeClassifier, parameters: [10, 3, None, 'gini']
Accuracy: 1.0000
Generated neighborhood for algorithm: DecisionTreeClassifier, parameters: [10, 2, None, 'gini']
Accuracy: 1.0000
Generated neighborhood for alg



Accuracy: 1.0000
Generated neighborhood for algorithm: MLP, parameters: [1000, 'logistic', 'sgd', 0.001]
Accuracy: 0.9333
Generated neighborhood for algorithm: KNN, parameters: [3, 'distance', 'brute', 40]
Accuracy: 1.0000
Generated neighborhood for algorithm: KNN, parameters: [3, 'distance', 'ball_tree', 40]
Accuracy: 1.0000
Generated neighborhood for algorithm: KNN, parameters: [5, 'distance', 'ball_tree', 40]
Accuracy: 1.0000
Generated neighborhood for algorithm: KNN, parameters: [5, 'distance', 'auto', 40]
Accuracy: 1.0000
Generated neighborhood for algorithm: KNN, parameters: [5, 'distance', 'ball_tree', 40]
Accuracy: 1.0000
Generated neighborhood for algorithm: KNN, parameters: [11, 'distance', 'ball_tree', 40]
Accuracy: 1.0000
Iteration 40, Temperature 90.438, Best Evaluation 1.00000
Generated neighborhood for algorithm: KNN, parameters: [3, 'distance', 'ball_tree', 40]
Accuracy: 1.0000
Generated neighborhood for algorithm: KNN, parameters: [11, 'distance', 'ball_tree', 40]
Accu



Accuracy: 1.0000
Generated neighborhood for algorithm: MLP, parameters: [2000, 'logistic', 'sgd', 0.01]




Accuracy: 1.0000
Generated neighborhood for algorithm: RF, parameters: [150, 5, 2, 'log2', 'gini']
Accuracy: 1.0000
Generated neighborhood for algorithm: RF, parameters: [150, 5, 3, 'log2', 'gini']
Accuracy: 1.0000
Generated neighborhood for algorithm: RF, parameters: [50, 5, 3, 'log2', 'gini']
Accuracy: 1.0000
Generated neighborhood for algorithm: RF, parameters: [100, 5, 3, 'log2', 'gini']
Accuracy: 1.0000
Generated neighborhood for algorithm: RF, parameters: [100, 5, 3, 'log2', 'gini']
Accuracy: 1.0000
Generated neighborhood for algorithm: RF, parameters: [100, 5, 3, 'log2', 'gini']
Accuracy: 1.0000
Iteration 50, Temperature 90.438, Best Evaluation 1.00000
Generated neighborhood for algorithm: RF, parameters: [100, 5, 3, 'log2', 'gini']
Accuracy: 1.0000
Generated neighborhood for algorithm: RF, parameters: [50, 5, 3, 'log2', 'gini']
Accuracy: 1.0000
Generated neighborhood for algorithm: RF, parameters: [50, 5, 3, 'log2', 'gini']
Accuracy: 1.0000
Generated neighborhood for algorithm:



Accuracy: 1.0000
Generated neighborhood for algorithm: MLP, parameters: [1000, 'relu', 'adam', 0.0001]
Accuracy: 1.0000
Generated neighborhood for algorithm: MLP, parameters: [1000, 'relu', 'adam', 0.001]
Accuracy: 1.0000
Generated neighborhood for algorithm: MLP, parameters: [1000, 'relu', 'adam', 0.001]
Accuracy: 1.0000
Generated neighborhood for algorithm: AdaBoost, parameters: [25, 0.5]
Accuracy: 1.0000
Iteration 60, Temperature 90.438, Best Evaluation 1.00000
Generated neighborhood for algorithm: AdaBoost, parameters: [25, 1.5]
Accuracy: 0.9333
Generated neighborhood for algorithm: SVM, parameters: [1, 'rbf', 'scale']
Accuracy: 1.0000
Generated neighborhood for algorithm: SVM, parameters: [1, 'rbf', 'auto']
Accuracy: 1.0000
Generated neighborhood for algorithm: SVM, parameters: [1, 'rbf', 'auto']
Accuracy: 1.0000
Generated neighborhood for algorithm: KNN, parameters: [7, 'uniform', 'ball_tree', 40]
Accuracy: 1.0000
Generated neighborhood for algorithm: KNN, parameters: [7, 'unifor



Accuracy: 1.0000
Generated neighborhood for algorithm: MLP, parameters: [1000, 'logistic', 'sgd', 0.001]
Accuracy: 0.9000
Generated neighborhood for algorithm: MLP, parameters: [1000, 'logistic', 'adam', 0.001]
Accuracy: 1.0000
Generated neighborhood for algorithm: MLP, parameters: [1000, 'logistic', 'adam', 0.001]
Accuracy: 1.0000
Generated neighborhood for algorithm: MLP, parameters: [1000, 'logistic', 'sgd', 0.001]




Accuracy: 0.9000
Generated neighborhood for algorithm: MLP, parameters: [1000, 'logistic', 'sgd', 0.001]




Accuracy: 0.9000
Generated neighborhood for algorithm: MLP, parameters: [2000, 'logistic', 'sgd', 0.001]




Accuracy: 1.0000
Generated neighborhood for algorithm: MLP, parameters: [2000, 'logistic', 'sgd', 0.001]




Accuracy: 1.0000
Generated neighborhood for algorithm: MLP, parameters: [1000, 'logistic', 'sgd', 0.001]
Accuracy: 0.9667
Iteration 90, Temperature 90.438, Best Evaluation 1.00000
Generated neighborhood for algorithm: MLP, parameters: [1000, 'logistic', 'sgd', 0.01]




Accuracy: 0.9000
Generated neighborhood for algorithm: MLP, parameters: [1000, 'logistic', 'sgd', 0.001]
Accuracy: 0.9333
Generated neighborhood for algorithm: MLP, parameters: [1000, 'logistic', 'sgd', 0.0001]




Accuracy: 0.9333
Generated neighborhood for algorithm: AdaBoost, parameters: [10, 0.1]
Accuracy: 1.0000
Generated neighborhood for algorithm: AdaBoost, parameters: [10, 0.5]
Accuracy: 1.0000
Generated neighborhood for algorithm: AdaBoost, parameters: [10, 0.5]
Accuracy: 1.0000
Generated neighborhood for algorithm: AdaBoost, parameters: [150, 0.5]
Accuracy: 0.9667
Generated neighborhood for algorithm: RF, parameters: [50, 10, 3, 'sqrt', 'entropy']
Accuracy: 1.0000
Generated neighborhood for algorithm: RF, parameters: [25, 10, 3, 'sqrt', 'log_loss']
Accuracy: 1.0000
Generated neighborhood for algorithm: MLP, parameters: [1000, 'relu', 'adam', 0.01]
Accuracy: 1.0000
best_score is 1.0
best_solution is ['DecisionTreeClassifier', 10, 4, 'sqrt', 'log_loss']

Evaluating on the test data
Test Accuracy: 0.9333

Classification Report:
              precision    recall  f1-score   support

      setosa       1.00      1.00      1.00        11
  versicolor       0.92      0.92      0.92        13
 

### Our AutoML Congressional Voting

In [None]:
print("Fitting the AutoML algorithm")
automl.fit(X_train_voting, y_train_voting, X_val_voting, y_val_voting)

print("\nEvaluating on the test data")
predictions = automl.predict(X_test_voting)

test_accuracy = accuracy_score(y_test_voting, predictions)
print(f"Test Accuracy: {test_accuracy:.4f}")
print("\nClassification Report:")
print(classification_report(y_test_voting, predictions))

Fitting the AutoML algorithm
Initial model: DummyClassifier
Initial parameters: (strategy='most_frequent')
Accuracy: 0.5227
Iteration 0, Temperature 100.000, Best Evaluation 0.52273
Generated neighborhood for algorithm: DecisionTreeClassifier, parameters: [15, 4, 'log2', 'gini']
Accuracy: 0.9545
Generated neighborhood for algorithm: DecisionTreeClassifier, parameters: [15, 4, 'sqrt', 'gini']
Accuracy: 0.9545
Generated neighborhood for algorithm: DecisionTreeClassifier, parameters: [5, 4, 'sqrt', 'gini']
Accuracy: 0.8864
Generated neighborhood for algorithm: DecisionTreeClassifier, parameters: [5, 3, 'sqrt', 'gini']
Accuracy: 0.9545
Generated neighborhood for algorithm: DecisionTreeClassifier, parameters: [5, 3, 'sqrt', 'entropy']
Accuracy: 0.9545
Generated neighborhood for algorithm: DecisionTreeClassifier, parameters: [5, 3, None, 'entropy']
Accuracy: 0.9545
Generated neighborhood for algorithm: DecisionTreeClassifier, parameters: [5, 3, 'log2', 'entropy']
Accuracy: 0.9545
Generated n



Accuracy: 0.9545
Generated neighborhood for algorithm: SVM, parameters: [100, 'poly', 'auto']
Accuracy: 0.9545
Generated neighborhood for algorithm: RF, parameters: [150, 15, 3, 'log2', 'entropy']
Accuracy: 0.9545
Generated neighborhood for algorithm: RF, parameters: [150, 5, 3, 'log2', 'entropy']
Accuracy: 0.9545
Generated neighborhood for algorithm: RF, parameters: [10, 5, 3, 'log2', 'entropy']
Accuracy: 0.9545
Generated neighborhood for algorithm: RF, parameters: [10, 5, 2, 'log2', 'entropy']
Accuracy: 0.9773
Iteration 20, Temperature 98.010, Best Evaluation 1.00000
Generated neighborhood for algorithm: RF, parameters: [10, 5, 2, 'log2', 'log_loss']
Accuracy: 0.9545
Generated neighborhood for algorithm: RF, parameters: [10, 5, 2, 'log2', 'entropy']
Accuracy: 0.9545
Generated neighborhood for algorithm: RF, parameters: [10, 5, 2, 'sqrt', 'entropy']
Accuracy: 0.9545
Generated neighborhood for algorithm: RF, parameters: [10, 5, 2, 'sqrt', 'entropy']
Accuracy: 0.9773
Generated neighborh

### Our AutoML gym

In [None]:
print("Fitting the AutoML algorithm")
automl.fit(X_train_gym, y_train_gym, X_val_gym, y_val_gym)

print("\nEvaluating on the test data")
predictions = automl.predict(X_test_gym)

test_accuracy = accuracy_score(y_test_gym, predictions)
print(f"Test Accuracy: {test_accuracy:.4f}")
print("\nClassification Report:")
print(classification_report(y_test_gym, predictions))

### Our AutoML abalone

In [None]:
print("Fitting the AutoML algorithm")
automl.fit(X_train_abalone, y_train_abalone, X_val_abalone, y_val_abalone)

print("\nEvaluating on the test data")
predictions = automl.predict(X_test_abalone)

test_accuracy = accuracy_score(y_test_abalone, predictions)
print(f"Test Accuracy: {test_accuracy:.4f}")
print("\nClassification Report:")
print(classification_report(y_test_abalone, predictions))