In [None]:
import numpy as np
import time
from sklearn.datasets import make_classification
from sklearn.model_selection import cross_val_score
from sklearn.tree import DecisionTreeClassifier
from sklearn.linear_model import LogisticRegression
from sklearn.svm import SVC
from sklearn.ensemble import RandomForestClassifier, GradientBoostingClassifier
from sklearn.neural_network import MLPClassifier
from sklearn.naive_bayes import GaussianNB 


class AutoML:
    def __init__(self, initial_temp=100, cooling_rate=0.99, max_iterations=100, min_training_time=3600):
        self.initial_temp = initial_temp
        self.cooling_rate = cooling_rate
        self.max_iterations = max_iterations
        self.min_training_time = max(min_training_time, 3600)

        self.algorithms = {
            'DecisionTreeClassifier': {
                'class': DecisionTreeClassifier,
                'parameters': ['max_depth', 'min_samples_split'],
                'ranges': [(1, 20), (2, 20)]
            },
            'LogisticRegression': {
                'class': LogisticRegression,
                'parameters': ['C', 'penalty'],
                'ranges': [(0.01, 10), ['l2', 'l1']]
            },
            'SVC': {
                'class': SVC,
                'parameters': ['C', 'gamma'],
                'ranges': [(0.01, 10), (0.01, 1)]
            },
            'RandomForestClassifier': {
                'class': RandomForestClassifier,
                'parameters': ['n_estimators', 'max_depth'],
                'ranges': [(10, 100), (1, 20)]
            },
            'GradientBoostingClassifier': {
                'class': GradientBoostingClassifier,
                'parameters': ['n_estimators', 'learning_rate'],
                'ranges': [(10, 100), (0.01, 0.3)]
            },
            'MLPClassifier': {
                'class': MLPClassifier,
                'parameters': ['alpha', 'learning_rate_init'],
                'ranges': [(0.0001, 0.1), (0.0001, 0.1)]
            },
            'GaussianNB': { 
                'class': GaussianNB,
                'parameters': [],  
                'ranges': []      
            }
        }

        self.best_solution = None
        self.best_score = 0
        self.model = None

    def eval(self, model, X, y):
        scores = cross_val_score(model, X, y, cv=5)
        print('scores=' + str(np.mean(scores)))
        return np.mean(scores)

    def generate_neighborhood(self, current_solution):
        new_solution = current_solution.copy()
        algorithm_name = np.random.choice(list(self.algorithms.keys()))
        new_solution[0] = algorithm_name

        algorithm_info = self.algorithms[algorithm_name]
        for i, parameter in enumerate(algorithm_info['parameters']):
            if len(algorithm_info['ranges']) == 0:
                return current_solution  

            low, high = algorithm_info['ranges'][i]

            if low is not None and high is not None:
                if isinstance(high, list):  
                    new_solution[i + 1] =  high
                elif isinstance(high, str):  
                    current_idx = algorithm_info['ranges'][i].index(high)
                    new_idx = current_idx -1
                   
                    new_solution[i + 1] = algorithm_info['ranges'][i][new_idx]
                elif isinstance(low, int) and isinstance(high, int): 
                    new_solution[i + 1] = np.random.randint(low, high)
                else:  
                    new_solution[i + 1] = np.random.uniform(low, high)

        print(f"Neighborhood algorithm: {algorithm_name}, parameters: {new_solution[1:]}")
        return new_solution

    def create_model(self, solution):
        algorithm_name = solution[0]
        hyperparameters = solution[1:]
        algorithm_class = self.algorithms[algorithm_name]['class']
        if algorithm_name == 'LogisticRegression':
            return algorithm_class(C=hyperparameters[0], penalty=hyperparameters[1], solver='liblinear')
        elif algorithm_name == 'MLPClassifier':
            return algorithm_class(alpha=hyperparameters[0], learning_rate_init=hyperparameters[1])
        elif algorithm_name == 'GaussianNB':
            return algorithm_class()  # No hyperparameters to pass for GaussianNB
        else:
            return algorithm_class(**{param: int(value) if param in ['max_depth', 'n_estimators', 'min_samples_split'] else value for param, value in zip(self.algorithms[algorithm_name]['parameters'], hyperparameters)})

    def fit(self, X, y):
        self.X = X
        self.y = y
        self.simulated_annealing()

    def predict(self, X):
        if self.model is None:
            raise ValueError("The model has not been fit yet. Please call the fit method first.")
        return self.model.predict(X)

    def simulated_annealing(self):
        start_time = time.time()  # Track the start time of the process
        
        current_model = [np.random.choice(list(self.algorithms.keys()))]
        if current_model[0] == 'RandomForestClassifier' or current_model[0] == 'DecisionTreeClassifier':
            current_hyperparameters = [
                np.random.randint(*self.algorithms[current_model[0]]['ranges'][i])
                if self.algorithms[current_model[0]]['parameters'][i] in ['n_estimators', 'max_depth', 'min_samples_split']
                else np.random.uniform(*self.algorithms[current_model[0]]['ranges'][i])
                for i in range(len(self.algorithms[current_model[0]]['parameters']))
            ]
        elif current_model[0] == 'GradientBoostingClassifier':
            current_hyperparameters = [
                np.random.randint(*self.algorithms[current_model[0]]['ranges'][i]) if self.algorithms[current_model[0]]['parameters'][i] in ['n_estimators']
                else np.random.uniform(*self.algorithms[current_model[0]]['ranges'][i])
                for i in range(len(self.algorithms[current_model[0]]['parameters']))
            ]
        elif current_model[0] == 'SVC':
            current_hyperparameters = [
                np.random.uniform(*self.algorithms[current_model[0]]['ranges'][i]) if self.algorithms[current_model[0]]['parameters'][i] != 'penalty'
                else np.random.choice(self.algorithms[current_model[0]]['ranges'][i])
                for i in range(len(self.algorithms[current_model[0]]['parameters']))
            ]
        elif current_model[0] == 'LogisticRegression':
            current_hyperparameters = [
                np.random.uniform(*self.algorithms[current_model[0]]['ranges'][i])
                if self.algorithms[current_model[0]]['parameters'][i] != 'penalty'
                else np.random.choice(self.algorithms[current_model[0]]['ranges'][i])
                for i in range(len(self.algorithms[current_model[0]]['parameters']))
            ]
        elif current_model[0] == 'GaussianNB':  # No hyperparameters to generate for GaussianNB
            current_hyperparameters = []
        else:
            current_hyperparameters = [
                np.random.uniform(*self.algorithms[current_model[0]]['ranges'][i])
                for i in range(len(self.algorithms[current_model[0]]['parameters']))
            ]
    
        current_solution = current_model + current_hyperparameters
    
        print(f"Initial model: {current_model[0]}")
        print(f"Initial parameters: {current_hyperparameters}")
    
        current_score = self.eval(self.create_model(current_solution), self.X, self.y)
        best_solution = current_solution
        best_score = current_score
    
        temperature = self.initial_temp
    
        while time.time() - start_time < self.min_training_time:
            for _ in range(100):
                new_solution = self.generate_neighborhood(current_solution)
                new_score = self.eval(self.create_model(new_solution), self.X, self.y)
    
                if new_score > current_score:
                    current_solution = new_solution
                    current_score = new_score
                    if new_score > best_score:
                        best_solution = new_solution
                        best_score = new_score
                else:
                    acceptance_probability = np.exp((new_score - current_score) / temperature)
                    if np.random.rand() < acceptance_probability:
                        current_solution = new_solution
                        current_score = new_score
    
            temperature *= self.cooling_rate
    
        self.best_solution = best_solution
        self.best_score = best_score
        self.model = self.create_model(best_solution)


X, y = make_classification(n_samples=1000, n_features=10, n_informative=5, random_state=42)
automl = AutoML(min_training_time=3600)  # Set minimum training time to 1 hour
automl.fit(X, y)
predictions = automl.predict(X)


Initial model: DecisionTreeClassifier
Initial parameters: [8, 9]
scores=0.9029999999999999
Neighborhood algorithm: DecisionTreeClassifier, parameters: [7, 11]
scores=0.899
Neighborhood algorithm: DecisionTreeClassifier, parameters: [10, 16]
scores=0.8899999999999999
Neighborhood algorithm: LogisticRegression, parameters: [6.518866122660014, 'l2']
scores=0.821
Neighborhood algorithm: SVC, parameters: [8.695229067783705, 0.18629852144152304]
scores=0.923
Neighborhood algorithm: RandomForestClassifier, parameters: [91, 7]
scores=0.9309999999999998
Neighborhood algorithm: LogisticRegression, parameters: [2.525364652116174, 'l2']
scores=0.821
Neighborhood algorithm: MLPClassifier, parameters: [0.054085016451561135, 0.05595817111672631]
scores=0.943
Neighborhood algorithm: RandomForestClassifier, parameters: [22, 1]
scores=0.752
Neighborhood algorithm: LogisticRegression, parameters: [6.582261679757857, 'l2']
scores=0.821
Neighborhood algorithm: GaussianNB, parameters: [6.582261679757857, 'l