In [4]:
import pandas as pd
import numpy as np
from sklearn.model_selection import train_test_split, KFold
from sklearn.model_selection import StratifiedKFold

from sklearn.preprocessing import MinMaxScaler
from skopt import BayesSearchCV
from skopt.space import Real, Categorical, Integer
from sklearn.metrics import mean_absolute_error, mean_squared_error, r2_score
import matplotlib.pyplot as plt
from scipy.optimize import curve_fit
import shap
from sklearn.linear_model import LinearRegression
import seaborn as sns
import os
from joblib import dump, load
import pickle

from nilearn import plotting
import statsmodels.api as sm
from sklearn.model_selection import StratifiedShuffleSplit

  from .autonotebook import tqdm as notebook_tqdm


In [1]:
path=  'C:/Users/felipe/Documents/Brain/'
import sys
sys.path.append(path) 
path_=  'C:/Users/felipe/Documents/Brain/Models/'
import sys
sys.path.append(path_) 

In [2]:
import pandas as pd
import numpy as np
from sklearn.preprocessing import MinMaxScaler

file_name = 'dataframe_alpha.xlsx'
file_path_CN = f"{path}{file_name}"
file_path_AD = f'{path}df_AD_filtrado.xlsx'
file_path_FTD = f'{path}df_FTD_filtrado.xlsx'
file_path_MCI = f'{path}df_MCI_filtrado.xlsx'
file_path_PD = f'{path}df_PD_filtrado.xlsx'

df_CN = pd.read_excel(file_path_CN)
# Filtrar el dataset de CN
df_CN_filtrado = df_CN[(df_CN['Age'] >= 50) & (df_CN['Age'] <= 90)].reset_index(drop=True)

# Filtrar los otros datasets cargados desde archivos
df_AD = pd.read_excel(file_path_AD)
df_AD_filtrado = df_AD[(df_AD['Age'] >= 50) & (df_AD['Age'] <= 90)].reset_index(drop=True)

df_FTD = pd.read_excel(file_path_FTD)
df_FTD_filtrado = df_FTD[(df_FTD['Age'] >= 50) & (df_FTD['Age'] <= 90)].reset_index(drop=True)

df_MCI = pd.read_excel(file_path_MCI)
df_MCI_filtrado = df_MCI[(df_MCI['Age'] >= 50) & (df_MCI['Age'] <= 90)].reset_index(drop=True)

df_PD = pd.read_excel(file_path_PD)
df_PD_filtrado = df_PD[(df_PD['Age'] >= 50) & (df_PD['Age'] <= 90)].reset_index(drop=True)


X_CN = df_CN_filtrado.drop(columns=["Unnamed: 0", "Age"])
X_AD = df_AD_filtrado.drop(columns=["Unnamed: 0", "Age", "Diagnosis"])
X_FTD = df_FTD_filtrado.drop(columns=["Unnamed: 0", "Age", "Diagnosis"])
X_MCI = df_MCI_filtrado.drop(columns=["Unnamed: 0", "Age", "Diagnosis"])
X_PD = df_PD_filtrado.drop(columns=["Unnamed: 0", "Age", "Diagnosis"])

# Combina X_CN y X_AD
X_combined = pd.concat([X_CN, X_AD, X_FTD, X_MCI, X_PD], axis=0).reset_index(drop=True)

# Inicializa y ajusta el MinMaxScaler
scaler = MinMaxScaler()
scaler.fit(X_combined)

# Aplica la transformación a X_CN y X_AD
X_CN_scaled = scaler.transform(X_CN)
X_AD_scaled = scaler.transform(X_AD)
X_FTD_scaled = scaler.transform(X_FTD)
X_MCI_scaled = scaler.transform(X_MCI)
X_PD_scaled = scaler.transform(X_PD)

# Opcional: convierte los resultados escalados de nuevo a DataFrames
X_CN_scaled = pd.DataFrame(X_CN_scaled, columns=X_CN.columns)
X_AD_scaled = pd.DataFrame(X_AD_scaled, columns=X_AD.columns)
X_FTD_scaled = pd.DataFrame(X_FTD_scaled, columns=X_FTD.columns)
X_MCI_scaled = pd.DataFrame(X_MCI_scaled, columns=X_MCI.columns)
X_PD_scaled = pd.DataFrame(X_PD_scaled, columns=X_PD.columns)

y_CN = df_CN_filtrado["Age"]
y_AD = df_AD_filtrado["Age"]
y_FTD = df_FTD_filtrado["Age"]
y_MCI = df_MCI_filtrado["Age"]
y_PD = df_PD_filtrado["Age"]

y_combined = pd.concat([y_CN, y_AD, y_FTD, y_MCI, y_PD], axis=0).reset_index(drop=True)


In [5]:
def regression_metrics( y_true, y_pred):
    mae = mean_absolute_error(y_true, y_pred)
    mse = mean_squared_error(y_true, y_pred)
    rmse = np.sqrt(mse)
    r2 = r2_score(y_true, y_pred)
    return mae, mse, rmse, r2

In [None]:
best_fold = 0
best_score = float('inf')
best_model = None

metrics = ['mae', 'mse', 'rmse', 'r2']
results = {'train': {m: [] for m in metrics}, 
           'val': {m: [] for m in metrics}, 
           'test': {m: [] for m in metrics},
           'model': []}

n_splits = 10
kf = KFold(n_splits=n_splits, shuffle=True, random_state=126)

model_ml = LinearRegression()

for fold, (train_index, test_index) in enumerate(kf.split(X, y)):
    X_train_kf, X_val_kf = X_CN_scaled.iloc[train_index], X_CN_scaled.iloc[test_index]
    y_train_kf, y_val_kf = y_CN.iloc[train_index], y_CN.iloc[test_index]

    model = LinearRegression() 

    y_pred_train = model.predict(X_train_kf)
    train_metrics = regression_metrics(y_train_kf, y_pred_train)

    y_pred_val = model.predict(X_val_kf)
    val_metrics = regression_metrics(y_val_kf, y_pred_val)

    #y_pred_test = model.predict(X_test)
    #test_metrics = regression_metrics(y_test, y_pred_test)

    # Almacenar los resultados de las métricas
    for ds in ['train', 'val', 'test']:
        if ds == 'train':
            metrics_set = train_metrics
        elif ds == 'val':
            metrics_set = val_metrics
        else:
            metrics_set = test_metrics
            
        for i, metric in enumerate(metrics):
            results[ds][metric].append(metrics_set[i])
    
    # Almacenar el modelo
    results['model'].append(model)

    mae = val_metrics[0]
    if mae < best_score:
        best_fold = fold
        best_score = mae
        best_model = model
