In [None]:
import random
import numpy as np
import os
import pandas as pd
from sklearn.model_selection import train_test_split
from sklearn.impute import SimpleImputer
from sklearn.ensemble import RandomForestRegressor
import scipy.stats as st
import pickle
import warnings
import copy
from datetime import datetime
from matplotlib import pyplot as plt
from sklearn.preprocessing import MinMaxScaler
import seaborn as sns
from sklearn.metrics import accuracy_score
import math
from scipy.spatial import distance

# Ignorar advertencias
warnings.filterwarnings('ignore')

# Definir rutas de archivos
general_path =r'C:User\ML_algorithm'
path = r'C:\User\file.xlsx'

# Lista de variables a utilizar
list_var = ['var_1','var_2',...,'var_3']

# Leer datos de entrenamiento y prueba desde archivos Excel
x_train = pd.read_excel(path, sheet_name='Train1')
x_test = pd.read_excel(path, sheet_name='test1')
y_train = x_train['GRADO']
y_test = x_test['GRADO']

# Seleccionar variables de interés
x_train = x_train[list_var]
x_test = x_test[list_var]

# Convertir a arrays numpy
train = x_train.to_numpy()
test = x_test.to_numpy()

def asignar_cutoff(x, exp):
    """
    Asigna un cutoff basado en los valores de entrenamiento y prueba.
    """
    k_i = [sum((train[i] * x)) for i in range(len(train))]
    unicos = len(np.unique(y_train))
    df = pd.DataFrame(k_i, columns=['K'])
    df['Ranking'] = df['K'].rank(ascending=True) * unicos
    df = df.sort_values(by='Ranking')
    df['Cutoff'] = df['Ranking'].apply(lambda x: math.floor(x / (len(y_train) + 1)))

    max_por_cutoff = pd.DataFrame()
    max_por_cutoff['Cutoff'] = list(range(1, unicos + 1))
    max_por_cutoff['K'] = sorted(np.linspace(min(k_i), max(k_i), unicos))

    y_pred_test = []
    y_pred_train = []
    k_j = []

    for j in range(len(test)):
        k_j.append(sum((test[j] * x)))
        for l in range(len(max_por_cutoff)):
            if l == len(max_por_cutoff) - 1:  # Último intervalo
                if k_j[j] > max_por_cutoff.loc[l - 1, 'K']:
                    y_pred_test.append(max_por_cutoff.loc[l, 'Cutoff'])
                    break
            elif k_j[j] <= max_por_cutoff.loc[l, 'K']:
                y_pred_test.append(max_por_cutoff.loc[l, 'Cutoff'])
                break
        else:
            y_pred_test.append(max_por_cutoff['Cutoff'].max())

    x_test['y_pred_test'] = y_pred_test
    x_test['y_test_c'] = y_test
    x_test['k_j'] = k_j

    for k in range(len(x_test['y_test_c'])):
        for m in range(len(max_por_cutoff)):
            if x_test['y_test_c'][k] <= max_por_cutoff.loc[m, 'K']:
                x_test['y_test_c'][k] = max_por_cutoff.loc[m, 'Cutoff']
                break
        else:
            x_test['y_test_c'][k] = max_por_cutoff['Cutoff'].max()

    x_test['y_test'] = y_test
    x_test['Diff'] = x_test['y_test_c'] - x_test['y_pred_test']

    if exp:
        for m in range(len(train)):
            for n in range(len(max_por_cutoff)):
                if l == len(max_por_cutoff) - 1:  # Último intervalo
                    if k_i[m] > max_por_cutoff.loc[n - 1, 'K']:
                        y_pred_train.append(max_por_cutoff.loc[n, 'Cutoff'])
                        break
                elif k_i[m] <= max_por_cutoff.loc[n, 'K']:
                    y_pred_train.append(max_por_cutoff.loc[n, 'Cutoff'])
                    break
            else:
                y_pred_train.append(max_por_cutoff['Cutoff'].max())

        x_train['y_pred_train'] = y_pred_train
        x_train['y_train_c'] = y_train
        x_train['k_i'] = k_i

        for k in range(len(x_train['y_train_c'])):
            for m in range(len(max_por_cutoff)):
                if x_train['y_train_c'][k] <= max_por_cutoff.loc[m, 'K']:
                    x_train['y_train_c'][k] = max_por_cutoff.loc[m, 'Cutoff']
                    break
            else:
                x_train['y_train_c'][k] = max_por_cutoff['Cutoff'].max()

        x_train['y_test'] = y_train
        x_train['Diff'] = x_train['y_train_c'] - x_train['y_pred_train']

        x = pd.DataFrame(x, columns=['x'])
        print('max_por_cutoff', max_por_cutoff)

        with pd.ExcelWriter(r'C:\Users\s3183426\Documents\ML_algorithm\ABC\ASSET.xlsx') as writer:
            max_por_cutoff.to_excel(writer, sheet_name='CutOff', index=False)
            x_test.to_excel(writer, sheet_name='test', index=False)
            x_train.to_excel(writer, sheet_name='train', index=False)
            x.to_excel(writer, sheet_name='pesos', index=False)

    return x_test

def fitness_function(x):
    """
    Función de fitness para evaluar la calidad de las soluciones.
    """
    df_pred = asignar_cutoff(x, False)
    match = df_pred['Diff'][df_pred['Diff'] == 0].count()
    return match

def artificial_bee_colony_fitness_function(n_iter=1000, n_bees=50, dim=14, bound=(0.01, 0.25)):
    """
    Implementación del algoritmo de optimización por colonia de abejas artificiales.
    """
    def generate_normalized_bees(n_bees, dim, lower_bound, upper_bound):
        bees = np.random.uniform(lower_bound, upper_bound, (n_bees, dim))
        bees = bees / np.sum(bees, axis=1, keepdims=True)
        return bees

    bees = generate_normalized_bees(n_bees, dim, bound[0], bound[1])
    f = [fitness_function(bees[i]) for i in range(n_bees)]
    best_bee = bees[f.index(max(f))]
    best_fitness = fitness_function(best_bee)
    limit = 25
    no_improvement_count = 0
    no_improvement_bee = np.zeros(len(bees))

    for iteration in range(n_iter):
        print('-----------------------ITERACION', iteration, '------------------------')
        # Fase de abejas empleadas: explorar nuevas soluciones
        for i in range(n_bees):
            new_bee = (bees[i] + generate_normalized_bees(1, dim, bound[0], bound[1])[0]) / 2
            new_fitness = fitness_function(new_bee)
            if new_fitness > fitness_function(bees[i]):
                bees[i] = new_bee
                no_improvement_bee[i] = 0
            else:
                no_improvement_bee[i] += 1

        # Fase de abejas observadoras: explotar buenas soluciones
        fitnesses = np.array([fitness_function(bee) for bee in bees])
        probabilities = fitnesses / sum(fitnesses)
        selected_bee_index = np.random.choice(len(bees), p=probabilities)
        new_bee = (bees[selected_bee_index] + generate_normalized_bees(1, dim, bound[0], bound[1])[0]) / 2
        if fitness_function(new_bee) > fitness_function(bees[selected_bee_index]):
            bees[selected_bee_index] = new_bee
            fitnesses[selected_bee_index] = fitness_function(new_bee)
            no_improvement_bee[selected_bee_index] = 0
            print('se muto la abeja', selected_bee_index)
        else:
            no_improvement_bee[selected_bee_index] += 1

        # Fase de exploración: reinicializar algunas abejas
        for k in range(len(bees)):
            if no_improvement_bee[k] > limit and k != np.argmax(fitnesses):
                print('se reinicio la abeja:', k)
                bees[k] = generate_normalized_be