In [112]:
import random
import time
import numpy as np
import pandas as pd
from deap import base, creator, tools, algorithms
from sklearn.model_selection import cross_val_score
import lightgbm as lgb
from tqdm import tqdm
import gc

# Загрузка данных
data = pd.read_csv('houston_clean.csv')
data = data.sample(n=500, random_state=42)
X = data[['Latitude', 'Longitude', 'Year Built', 'Beds', 'Baths', 'buildingSize', 'lotSize', 'PostalCode']]
y = data['Price']

# Функция оценки модели LGBM с использованием MSE
def lgbm_function(individual):
    params = {
        'n_estimators': int(individual[0]),
        'max_depth': int(individual[1]),
        'num_leaves': int(individual[2]),
        'learning_rate': individual[3],
        'max_bin': int(individual[4]),
        'colsample_bytree': individual[5]
    }
    
    model = lgb.LGBMRegressor(**params)
    scores = cross_val_score(model, X, y, cv=3, scoring='neg_mean_squared_error')
    mse = -np.mean(scores)
    return mse,

# Ограничение значений после мутации и кроссинговера
def check_bounds(individual, param_ranges):
    for i in range(len(individual)):
        individual[i] = max(param_ranges[f'x{i}'][0], min(individual[i], param_ranges[f'x{i}'][1]))
    return individual

# Настройка среды DEAP для генетического алгоритма
def genetic_algorithm_deap(param_ranges, population_size, generations, mutation_rate, crossover_rate,
                           elitism_fraction, alpha, mu, sigma, tournsize, crossover_method, mutation_method,
                           selection_method):
    # Создание минимизирующей функции в DEAP
    creator.create("FitnessMin", base.Fitness, weights=(-1.0,))
    creator.create("Individual", list, fitness=creator.FitnessMin)

    # Функция инициализации индивидов
    def create_individual():
        return creator.Individual([random.uniform(*param_ranges[param]) for param in param_ranges])

    # Настройка инструментария DEAP
    toolbox = base.Toolbox()
    toolbox.register("individual", create_individual)
    toolbox.register("population", tools.initRepeat, list, toolbox.individual)
    toolbox.register("evaluate", lgbm_function)

    # Handling crossover strategies
    toolbox.register("mate", crossover_method, alpha=alpha)
    toolbox.register("mutate", mutation_method, mu=mu, sigma=sigma, indpb=mutation_rate)
    toolbox.register("select", selection_method, tournsize=tournsize)

    # Инициализация популяции
    population = toolbox.population(n=population_size)

    # Добавление элитизма
    elite_size = int(population_size * elitism_fraction)

    # Старт отсчета времени
    start_time = time.time()

    best_individual = None
    best_score = float('inf')
    best_generation = 0
    times = []  # Сохраняем время каждой итерации

    # Запуск алгоритма
    for gen in range(generations):
        gen_start_time = time.time()
        offspring = algorithms.varAnd(population, toolbox, cxpb=crossover_rate, mutpb=mutation_rate)

        # Применение функции check_bounds ко всем потомкам
        offspring = [check_bounds(ind, param_ranges) for ind in offspring]

        fits = list(map(toolbox.evaluate, offspring))

        # Оценка лучших индивидов
        for ind, fit in zip(offspring, fits):
            ind.fitness.values = fit
            if fit[0] < best_score:
                best_individual = ind
                best_score = fit[0]
                best_generation = gen + 1

        population = tools.selBest(offspring, elite_size) + tools.selBest(offspring, population_size - elite_size)
        
        # Сохраняем время итерации
        gen_time = time.time() - gen_start_time
        times.append(gen_time)

        print(f"Поколение {gen + 1}, Лучшее MSE: {best_score}, Параметры: {best_individual}")

    end_time = time.time()
    elapsed_time = end_time - start_time

    return best_individual, best_score, best_generation, elapsed_time, times

# Генерация случайных гиперпараметров
def generate_random_hyperparameters():
    hyperparams = {
        "population_size": random.randint(100, 500),
        "mutation_rate": random.uniform(0.01, 0.5),
        "crossover_rate": random.uniform(0.01, 0.5),
        "elitism_fraction": random.uniform(0.01, 0.2),
        "alpha": random.uniform(0.01, 0.9),  # For crossover methods like Blend
        "mu": random.uniform(-1, 1),  # For Gaussian mutation
        "sigma": random.uniform(0.01, 1),  # For Gaussian mutation
        "tournsize": random.randint(2, 5)
    }
    return hyperparams

# Сохранение результатов в CSV
def save_results_to_csv(df, filename):
    try:
        df.to_csv(filename, index=False, mode='a', header=not pd.io.common.file_exists(filename))
    except Exception as e:
        print(f"Ошибка сохранения в CSV: {e}")

# Внешний и внутренний циклы для сбора статистики
num_outer_runs = 10
num_inner_runs = 10
summary_results = []

param_ranges = {
    'x0': (50, 2000),        # n_estimators
    'x1': (1, 100),          # max_depth
    'x2': (2, 1000),         # num_leaves
    'x3': (0.01, 0.3),       # learning_rate
    'x4': (10, 1000),        # max_bin
    'x5': (0.5, 1.0)         # colsample_bytree
}

filename = "deap_houses.csv"  # Имя файла для сохранения результатов

# Внешний цикл
for outer_run in tqdm(range(num_outer_runs), desc="Внешние прогоны"):
    print(f"\n=== Внешний прогон {outer_run + 1} ===\n")
    
    hyperparams = generate_random_hyperparameters()
    results = []

    # Внутренний цикл
    for inner_run in range(num_inner_runs):
        best_solution, best_fitness, best_generation, best_generation_time, times = genetic_algorithm_deap(
            param_ranges=param_ranges,
            population_size=hyperparams["population_size"],
            generations=30,
            mutation_rate=hyperparams["mutation_rate"],
            crossover_rate=hyperparams["crossover_rate"],
            elitism_fraction=hyperparams["elitism_fraction"],
            alpha=hyperparams["alpha"],
            mu=hyperparams["mu"],
            sigma=hyperparams["sigma"],
            tournsize=hyperparams["tournsize"],
            crossover_method=tools.cxBlend,
            mutation_method=tools.mutGaussian,
            selection_method=tools.selTournament
        )

        run_data = {
            'Best Fitness': best_fitness,
            'Best Generation': best_generation,
            'Best Generation Time': best_generation_time,
            'Iteration Times': times
        }
        results.append(run_data)

    df_results = pd.DataFrame(results)
    median_fitness = df_results['Best Fitness'].median()
    mode_iteration = df_results['Best Generation'].mode()[0]
    median_time_to_best_gen = pd.Series([min(r['Iteration Times']) for r in results]).median()

    summary_row = {
        'Медиана MSE': median_fitness,
        'Мода лучшей итерации': mode_iteration,
        'Медиана времени до лучшей итерации': median_time_to_best_gen,
        'Population Size': hyperparams["population_size"],
        'Mutation Rate': hyperparams["mutation_rate"],
        'Crossover Rate': hyperparams["crossover_rate"],
        'Elitism Fraction': hyperparams["elitism_fraction"],
        'Alpha': hyperparams["alpha"],
        'Mu': hyperparams["mu"],
        'Sigma': hyperparams["sigma"],
        'Tournament Size': hyperparams["tournsize"]
    }

    summary_results.append(summary_row)

    # Сохранение в CSV после каждого внешнего цикла
    df_summary = pd.DataFrame([summary_row])
    save_results_to_csv(df_summary, filename)

    gc.collect()

# Итоговая таблица с результатами всех внешних прогонов
df_summary = pd.DataFrame(summary_results)
print("\nИтоговая таблица с медианными значениями и гиперпараметрами:")
print(df_summary)





=== Внешний прогон 1 ===

