In [None]:
import os
from sklearn.metrics import hamming_loss, mean_squared_error, confusion_matrix, accuracy_score, r2_score
import pandas as pd
import numpy as np
from matplotlib import pyplot as plt
from sklearn.preprocessing import MinMaxScaler
import seaborn as sns
from sklearn.neighbors import KNeighborsClassifier
from collections import Counter
from scipy.spatial import distance
import Non_Retail as NR
from sklearn.model_selection import cross_val_score

def outputRes(writer, output_tables, sheet_name='option_1'):
    """
    Esta función guarda tablas en un archivo Excel.
    """
    offset = 0
    for df in output_tables:
        # Convertir array numpy a DataFrame de pandas
        df = pd.DataFrame(df)
        df.to_excel(writer, sheet_name=sheet_name, startrow=offset, header=True, index=True)
        offset += len(df) + 3
    writer.close()

def tsnHelper(pred_label, true_label):
    """
    Esta función calcula la tasa de coincidencia entre etiquetas predichas y verdaderas.
    """
    diff = pred_label - true_label
    res = {}
    for num in range(-2, 3):
        res['count' + str(num)] = sum(diff == num)
    count = [res['count0'], res['count1'] + res['count-1'], res['count2'] + res['count-2']]
    return np.append(np.cumsum(count, axis=0) / float(len(diff)),
                     [np.sum(diff == 0), np.sum(diff > 0), np.sum(diff < 0)])

def minkowski_distance(x1, x2, p=2):
    """
    Calcula la distancia de Minkowski entre dos vectores.
    """
    return np.sum(np.abs(x1 - x2) ** p) ** (1/p)

# Clase KNN mejorada
class KNN_Enhanced:
    def __init__(self, k=9, distance_metric='euclidean', weights='uniform'):
        self.k = k
        self.weights = weights
        # Asignar la función de distancia apropiada
        if distance_metric == 'euclidean':
            self.distance = lambda x1, x2: minkowski_distance(x1, x2, p=2)
        elif distance_metric == 'manhattan':
            self.distance = lambda x1, x2: minkowski_distance(x1, x2, p=1)
        elif distance_metric == 'minkowski':
            self.distance = lambda x1, x2: minkowski_distance(x1, x2, p=3)
        else:
            raise ValueError("Métrica de distancia no soportada. Use 'euclidean', 'manhattan' o 'minkowski'.")

    def fit(self, X, y):
        self.X_train = X
        self.y_train = y

    def predict(self, X):
        y_pred = [self._predict(x) for x in X]
        return np.array(y_pred)

    def _predict(self, x):
        # Calcular distancias entre x y todos los ejemplos en el conjunto de entrenamiento
        distances = [self.distance(x, x_train) for x_train in self.X_train]
        # Ordenar por distancia y devolver los índices de los primeros k vecinos
        k_indices = np.argsort(distances)[:self.k]
        # Extraer las etiquetas de los k vecinos más cercanos
        k_nearest_labels = [self.y_train[i] for i in k_indices]

        if self.weights == 'uniform':
            # Pesos uniformes: usar voto mayoritario
            most_common = Counter(k_nearest_labels).most_common(1)
            return most_common[0][0]
        elif self.weights == 'distance':
            # Pesos por distancia: los vecinos más cercanos tienen mayor peso
            k_nearest_distances = [distances[i] for i in k_indices]
            # Evitar división por cero
            k_nearest_distances = [1e-5 if d == 0 else d for d in k_nearest_distances]
            weight_sum = sum((1/d) for d in k_nearest_distances)
            weighted_votes = Counter()
            for i, label in enumerate(k_nearest_labels):
                weighted_votes[label] += (1 / k_nearest_distances[i]) / weight_sum
            return weighted_votes.most_common(1)[0][0]
        else:
            raise ValueError("Tipo de pesos no soportado. Use 'uniform' o 'distance'.")

def model(list_var, k, metric, w, path_file, name_file):
    general_path = r'C:\Users\ML_algorithm'
    path = r'C:\users\file.xlsx'

    # Leer datos de entrenamiento y prueba desde archivos Excel
    x_train = pd.read_excel(path, sheet_name=' train1')
    x_test = pd.read_excel(path, sheet_name=' test1')
    y_train = x_train['Grado']
    y_test = x_test['Grado']

    x_train_var = x_train[list_var]
    x_test_var = x_test[list_var]

    # Crear y entrenar el modelo KNN mejorado
    knn_uniform_euclidean = KNN_Enhanced(k=k, distance_metric=metric, weights=w)
    knn_uniform_euclidean.fit(x_train_var.values, y_train)

    # Hacer predicciones
    predictions_uniform_euclidean_test = knn_uniform_euclidean.predict(x_test_var.values)
    predictions_uniform_euclidean_train = knn_uniform_euclidean.predict(x_train_var.values)

    x_test['Predicted_IG'] = predictions_uniform_euclidean_test
    x_train['Predicted_IG'] = predictions_uniform_euclidean_train

    IG_TEST = pd.Series(x_test['Grado'].tolist())
    IG_TRAIN = pd.Series(x_train['Grado'].tolist())
    IG__test_PRED = pd.Series(predictions_uniform_euclidean_test)
    IG__train_PRED = pd.Series(predictions_uniform_euclidean_train)

    writer = pd.ExcelWriter(general_path + path_file, engine='openpyxl')

    # Crear DataFrames para las medidas de entrenamiento y prueba
    train = pd.DataFrame(
        {'train measure ': ['40', '60', '90', 'identical', 'upgrade', 'downgrade'],
         'value_after_affiliation': tsnHelper(Grado__train_PRED, Grado_TRAIN)})

    test = pd.DataFrame(
        {'test measure ': ['40', '60', '90', 'identical', 'upgrade', 'downgrade'],
         'value_after_affiliation': tsnHelper(Grado__test_PRED, Grado_TEST)})

    outputRes(writer, (train, test), name_file)

    IG_PRED = pd.concat([IG__test_PRED, IG__train_PRED], ignore_index=True)
    IG_PASS = pd.concat([IG_TEST, IG_TRAIN], ignore_index=True)

    df = pd.DataFrame({
        'NEW_MODEL_Grado': Grado_PRED,
        'FINAL_Grado': Grado_PASS
    })

    with pd.ExcelWriter(general_path + r'\final.xlsx') as writer:
        x_train.to_excel(writer, sheet_name='x_train', index=False)
        x_test.to_excel(writer, sheet_name='x_test', index=False)

    br1_train = x_train_var
    br1_train.loc[:, 'NEW_MODEL_Grado_AFTER_AFF'] = Grado__train_PRED
    br1_train.loc[:, 'FINAL_Grado'] = Grado_TRAIN

    return performance, mse

list_var =['var_1','var_2',...,'var_14']

result = model(list_var, 9, 'euclidean', 'distance', r'\asset\Match_results_ASSET_score.xlsx', 'Match_results_ASSET_score')
