In [15]:
%load_ext autoreload
%autoreload 2

In [16]:
import pandas as pd
import sys
import os
parent_dir = os.path.abspath(os.path.join(os.getcwd(), '..'))
src_path = os.path.join(parent_dir, 'src')
sys.path.insert(0, src_path)

from functions import main_cleaning

pd.set_option('display.max_columns', None) 

data_path = os.path.join(parent_dir, 'data')

df_raw = pd.read_csv(os.path.join(data_path, "dielectron.csv"))

In [17]:
df_cleaned = main_cleaning(df_raw)
df_cleaned.reset_index(drop=True, inplace=True)

A value is trying to be set on a copy of a slice from a DataFrame.
Try using .loc[row_indexer,col_indexer] = value instead

See the caveats in the documentation: https://pandas.pydata.org/pandas-docs/stable/user_guide/indexing.html#returning-a-view-versus-a-copy
  df["is_same_charge"] = df["Q1"] == df["Q2"]
A value is trying to be set on a copy of a slice from a DataFrame.
Try using .loc[row_indexer,col_indexer] = value instead

See the caveats in the documentation: https://pandas.pydata.org/pandas-docs/stable/user_guide/indexing.html#returning-a-view-versus-a-copy
  df["is_outlier"] = False


### Database creation

In [18]:
# Event has one value repeated. We have 2 options: 
# We can either drop it or create an ID for Event. We will do the last.

df_cleaned[df_cleaned["Event"] == 418006834]

Unnamed: 0,Run,Event,E1,px1,py1,pz1,pt1,eta1,phi1,Q1,E2,px2,py2,pz2,pt2,eta2,phi2,Q2,M,is_same_charge,is_outlier
39064,147114,418006834,18.7789,5.74359,7.40263,-16.2745,9.36951,-1.31941,0.910934,1,61.6136,23.1457,5.76582,-56.8091,23.853,-1.60235,0.244141,1,10.6654,True,False
79596,148029,418006834,21.8398,-19.4314,-9.03848,-4.20742,21.4307,-0.195087,-2.70621,-1,7.19454,-6.13848,-1.89557,-3.23839,6.4245,-0.484849,-2.84208,1,3.76546,False,False


In [19]:
# "Run" table

df_run = df_cleaned[["Run"]].drop_duplicates().copy()

df_run["date_run"] = pd.NaT

df_run.rename(columns={"Run": "run_num"}, inplace=True)

In [20]:
# "Particle A" table

df_particle_a = df_cleaned[["E1", "px1", "py1", "pz1", "pt1", "eta1", "phi1", "Q1"]].copy()

df_particle_a.rename(columns={"E1": "energy", "px1": "px", "py1": "py", "pz1": "pz", "pt1": "pt", "eta1": "eta", "phi1": "phi", "Q1": "charge"}, inplace=True)

In [21]:
# "Particle B" table

df_particle_b = df_cleaned[["E2", "px2", "py2", "pz2", "pt2", "eta2", "phi2", "Q2"]].copy()

df_particle_b.rename(columns={"E2": "energy", "px2": "px", "py2": "py", "pz2": "pz", "pt2": "pt", "eta2": "eta", "phi2": "phi", "Q2": "charge"}, inplace=True)

In [22]:
# from functions import import_to_sql

# import_to_sql(df_run, "run")
# import_to_sql(df_particle_a, "particle_a")
# import_to_sql(df_particle_b, "particle_b")


In [23]:
# ID for particles

from functions import make_query

id_partA_query = "SELECT id_part FROM particle_a"
id_partA = make_query(id_partA_query)

id_partB_query = "SELECT id_part FROM particle_b"
id_partB = make_query(id_partB_query)

In [24]:
# Event table

df_event = df_cleaned[["Event", "Run", "M"]].copy()

df_event["id_partA"] = id_partA
df_event["id_partB"] = id_partB

df_event.rename(columns={"Event": "event_num", "Run": "run_num", "M": "invariant_mass"}, inplace=True)

In [25]:
# import_to_sql(df_event, "event")

# MACHINE LEARNING

### Los resultados de MSE, MAE y R2 para KNN sin normalizar eran: 
MAE: 17.717622715597013  
MSE: 12.581512831873468   
R2: 0.507506217561434  
### So I will normalize/standarize.

In [26]:
df_ml = df_cleaned.dropna()

In [27]:
features = df_ml[["Run", "px1", "py1", "pz1", "px2", "py2", "pz2", "is_same_charge", "is_outlier"]]
target = df_ml[["M"]]

In [28]:
from sklearn.model_selection import train_test_split

X_train, X_test, y_train, y_test = train_test_split(features, target, test_size=0.20, random_state=42)

### Normalization

In [29]:
from sklearn.preprocessing import MinMaxScaler

normalizer = MinMaxScaler()

X_train_norm= normalizer.fit_transform(X_train)        

X_test_norm = normalizer.transform(X_test)

In [30]:
X_train_norm = pd.DataFrame(X_train_norm, columns = X_train.columns)   
X_test_norm = pd.DataFrame(X_test_norm, columns = X_test.columns)

### I will try 5 different models

In [31]:
from sklearn.ensemble import RandomForestRegressor

from sklearn.neighbors import KNeighborsRegressor
from sklearn.linear_model import LinearRegression
from sklearn.tree import DecisionTreeRegressor
from sklearn.svm import SVR
from sklearn.metrics import root_mean_squared_error, mean_absolute_error, r2_score

# Define models
models = {
  'KNeighborsRegressor': KNeighborsRegressor(),
  'Linear Regression': LinearRegression(),
  'Decision Tree': DecisionTreeRegressor(random_state=42),
  'Random Forest': RandomForestRegressor(random_state=42, n_estimators=100),
  'SVR': SVR(),
}   

# Train and evaluate models
results = {}

for name, model in models.items():
  # Train the model
    model.fit(X_train_norm, y_train)
  
  # Make predictions
    y_pred_norm = model.predict(X_test_norm)

    mae_standardized = mean_absolute_error(y_test, y_pred_norm)
    mse_standardized = root_mean_squared_error(y_test, y_pred_norm)
    r2_standardized = r2_score(y_test, y_pred_norm)
  
  # Store results
    results[name] = {
        'mae' : mae_standardized,
        'mse' : mse_standardized,
        'R2' : r2_standardized
  }

# Print results
for name, result in results.items():
  print(f"\n{name}:")
  print(f"Mean Absolute Error: {result['mae']}")
  print(f"Root Mean Squared Error: {result['mse']}")
  print(f"R2: {result['R2']}")

# Compare R2_value
r2_values = {name: result['R2'] for name, result in results.items()}
best_model = max(r2_values, key=r2_values.get)

print("\nModel Accuracy Comparison:")
for name, accuracy in r2_values.items():
  print(f"{name}: {accuracy:.4f}")

print(f"\nBest performing model: {best_model} with accuracy {r2_values[best_model]:.4f}")

  return fit_method(estimator, *args, **kwargs)
  y = column_or_1d(y, warn=True)



KNeighborsRegressor:
Mean Absolute Error: 10.12530578237149
Root Mean Squared Error: 15.381530427495692
R2: 0.628816253440426

Linear Regression:
Mean Absolute Error: 19.259107458476098
Root Mean Squared Error: 24.897594748274074
R2: 0.02746634175662188

Decision Tree:
Mean Absolute Error: 4.1532178397317185
Root Mean Squared Error: 7.146840026724198
R2: 0.9198657871470972

Random Forest:
Mean Absolute Error: 2.3478761082736876
Root Mean Squared Error: 3.918134937140667
R2: 0.9759148902055805

SVR:
Mean Absolute Error: 16.845790601770357
Root Mean Squared Error: 24.50329975751872
R2: 0.0580258193386155

Model Accuracy Comparison:
KNeighborsRegressor: 0.6288
Linear Regression: 0.0275
Decision Tree: 0.9199
Random Forest: 0.9759
SVR: 0.0580

Best performing model: Random Forest with accuracy 0.9759


### Standarization

In [32]:
from sklearn.preprocessing import StandardScaler

scaler = StandardScaler()

X_train_scaled = scaler.fit_transform(X_train)        

X_test_scaled = scaler.transform(X_test)

In [33]:
X_train_scaled = pd.DataFrame(X_train_scaled, columns = X_train.columns)   
X_test_scaled = pd.DataFrame(X_test_scaled, columns = X_test.columns)

In [34]:
from sklearn.ensemble import RandomForestRegressor
from sklearn.neighbors import KNeighborsRegressor
from sklearn.linear_model import LinearRegression
from sklearn.tree import DecisionTreeRegressor
from sklearn.svm import SVR


# Define models
models = {
  'KNeighborsRegressor': KNeighborsRegressor(),
  'Linear Regression': LinearRegression(),
  'Decision Tree': DecisionTreeRegressor(random_state=42),
  'Random Forest': RandomForestRegressor(random_state=42, n_estimators=100),
  'SVR': SVR(),
}   

# Train and evaluate models
results = {}

for name, model in models.items():
  # Train the model
    model.fit(X_train_scaled, y_train)
  
  # Make predictions
    y_pred_scaled = model.predict(X_test_scaled)

    mae_standardized = mean_absolute_error(y_test, y_pred_scaled)
    mse_standardized = root_mean_squared_error(y_test, y_pred_scaled)
    r2_standardized = r2_score(y_test, y_pred_scaled)
  
  # Store results
    results[name] = {
        'mae' : mae_standardized,
        'mse' : mse_standardized,
        'R2' : r2_standardized
  }

# Print results
for name, result in results.items():
  print(f"\n{name}:")
  print(f"Mean Absolute Error: {result['mae']}")
  print(f"Root Mean Squared Error: {result['mse']}")
  print(f"R2: {result['R2']}")

# Compare R2_value
r2_values = {name: result['R2'] for name, result in results.items()}
best_model = max(r2_values, key=r2_values.get)

print("\nModel Accuracy Comparison:")
for name, accuracy in r2_values.items():
  print(f"{name}: {accuracy:.4f}")

print(f"\nBest performing model: {best_model} with accuracy {r2_values[best_model]:.4f}")

  return fit_method(estimator, *args, **kwargs)
  y = column_or_1d(y, warn=True)



KNeighborsRegressor:
Mean Absolute Error: 8.875525099854848
Root Mean Squared Error: 13.74404795685576
R2: 0.7036402792529388

Linear Regression:
Mean Absolute Error: 19.259107458476098
Root Mean Squared Error: 24.897594748274077
R2: 0.02746634175662166

Decision Tree:
Mean Absolute Error: 4.142311697782672
Root Mean Squared Error: 7.141685544711038
R2: 0.919981335111248

Random Forest:
Mean Absolute Error: 2.3447513779118077
Root Mean Squared Error: 3.9098505538185626
R2: 0.9760166321541155

SVR:
Mean Absolute Error: 9.141582787416484
Root Mean Squared Error: 13.338408631516149
R2: 0.7208755440655801

Model Accuracy Comparison:
KNeighborsRegressor: 0.7036
Linear Regression: 0.0275
Decision Tree: 0.9200
Random Forest: 0.9760
SVR: 0.7209

Best performing model: Random Forest with accuracy 0.9760


### We take Standarized Random Forest as the best model

### Now, we are going to try RF just with the columns we need in our hypothesis (explained in streamlit)

In [35]:
features_lower = df_ml[["Run", "pz1", "pz2", "is_same_charge", "is_outlier"]]
target_lower = df_ml[["M"]]

In [36]:
from sklearn.model_selection import train_test_split

X_train, X_test, y_train, y_test = train_test_split(features_lower, target_lower, test_size=0.20, random_state=42)

In [37]:
type(X_train)

pandas.core.frame.DataFrame

In [38]:
data_scaler = StandardScaler()

X_train_scaled = data_scaler.fit_transform(X_train)        

X_test_scaled = data_scaler.transform(X_test)

In [39]:
type(X_train_scaled)

numpy.ndarray

In [40]:
X_train_scaled = pd.DataFrame(X_train_scaled, columns = X_train.columns)   
X_test_scaled = pd.DataFrame(X_test_scaled, columns = X_test.columns)

In [41]:
# import pickle 
# with open('data_scaler.pkl', 'wb') as file:
#     pickle.dump(data_scaler, file)

In [42]:
rf_lower = RandomForestRegressor(random_state=42, n_estimators=100)

rf_lower.fit(X_train_scaled, y_train)
  
y_pred_lower_scaled = rf_lower.predict(X_test_scaled)

mae_standardized = mean_absolute_error(y_test, y_pred_lower_scaled)
mse_standardized = root_mean_squared_error(y_test, y_pred_lower_scaled)
r2_standardized = r2_score(y_test, y_pred_lower_scaled)

mae_standardized, mse_standardized, r2_standardized

  return fit_method(estimator, *args, **kwargs)


(9.648483653205867, 15.70953893783305, 0.6128165981972764)

All -> (2.3447513779118077, 3.9098505538185626, 0.9760166321541155)

No py -> (5.030465963967166, 8.80652660636964, 0.8783256160020667)

Neither py nor pz -> (16.016459623359527, 21.49041157214318, 0.2754317177361222)

Neither px nor py -> (9.648483653205867, 15.70953893783305, 0.6128165981972764)

In [43]:
features_scaled = data_scaler.transform(features_lower)

y_pred_lower_real = rf_lower.predict(features_scaled)

mae_standardized = mean_absolute_error(target_lower, y_pred_lower_real)
mse_standardized = root_mean_squared_error(target_lower, y_pred_lower_real)
r2_standardized = r2_score(target_lower, y_pred_lower_real)

mae_standardized, mse_standardized, r2_standardized



(4.804240913842951, 8.788852094326584, 0.8789006392171295)

In [44]:
df_ml["pred"] = y_pred_lower_real

df_ml

A value is trying to be set on a copy of a slice from a DataFrame.
Try using .loc[row_indexer,col_indexer] = value instead

See the caveats in the documentation: https://pandas.pydata.org/pandas-docs/stable/user_guide/indexing.html#returning-a-view-versus-a-copy
  df_ml["pred"] = y_pred_lower_real


Unnamed: 0,Run,Event,E1,px1,py1,pz1,pt1,eta1,phi1,Q1,E2,px2,py2,pz2,pt2,eta2,phi2,Q2,M,is_same_charge,is_outlier,pred
0,147115,366639895,58.71410,-7.311320,10.531000,-57.29740,12.82020,-2.202670,2.177660,1,11.28360,-1.032340,-1.88066,-11.077800,2.14537,-2.344030,-2.072810,-1,8.94841,False,False,8.597872
1,147115,366704169,6.61188,-4.152130,-0.579855,-5.11278,4.19242,-1.028420,-3.002840,-1,17.14920,-11.713500,5.04474,11.464700,12.75360,0.808077,2.734920,1,15.89300,False,False,19.626943
2,147115,367112316,25.54190,-11.480900,2.041680,22.72460,11.66100,1.420480,2.965600,1,15.82030,-1.472800,2.25895,-15.588800,2.69667,-2.455080,2.148570,1,38.38770,True,False,39.525085
3,147115,366952149,65.39590,7.512140,11.887100,63.86620,14.06190,2.218380,1.007210,1,25.12730,4.087860,2.59641,24.656300,4.84272,2.330210,0.565865,-1,3.72862,False,False,8.538839
4,147115,366523212,61.45040,2.952840,-14.622700,-59.61210,14.91790,-2.093750,-1.371540,-1,13.88710,-0.277757,-2.42560,-13.670800,2.44145,-2.423700,-1.684810,-1,2.74718,True,False,6.694673
...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...
99972,146511,522575834,12.31310,-10.658000,5.164440,3.36858,11.84330,0.280727,2.690370,-1,1.80181,0.668609,-1.58437,0.537805,1.71967,0.307851,-1.171470,1,8.44779,False,False,9.816381
99973,146511,522786431,18.46420,7.854990,15.133000,-7.08659,17.05020,-0.404510,1.092010,1,14.69110,-1.418020,-2.28117,-14.443500,2.68598,-2.383880,-2.126960,1,20.71540,True,False,18.591266
99974,146511,522906124,4.18566,-3.273500,-0.308507,-2.59013,3.28801,-0.723075,-3.047630,1,72.81740,-11.074900,-9.28179,-71.369300,14.45010,-2.300410,-2.444050,-1,12.71350,False,False,15.340402
99975,146511,523243830,54.46220,11.352600,11.880900,51.92400,16.43280,1.867800,0.808132,-1,8.58671,0.378009,3.07828,8.007050,3.10141,1.677170,1.448610,1,4.69670,False,False,7.410890


In [46]:
from sklearn.model_selection import cross_val_score

# Aplicamos la validación cruzada en el conjunto de entrenamiento
scores = cross_val_score(rf_lower, X_train_scaled, y_train, cv=5, scoring='r2')

print("Cross-validated R2 scores:", scores)
print("Mean cross-validated R2:", scores.mean())

  return fit_method(estimator, *args, **kwargs)
  return fit_method(estimator, *args, **kwargs)
  return fit_method(estimator, *args, **kwargs)
  return fit_method(estimator, *args, **kwargs)
  return fit_method(estimator, *args, **kwargs)


Cross-validated R2 scores: [0.62209734 0.61593845 0.61195166 0.59977886 0.59800937]
Mean cross-validated R2: 0.6095551354410731


# Trying with Random Forest and Hyperparameter Tuning

It took several hours, it´s not worth compared to XGBoost which takes a few minutes

In [47]:
# from sklearn.ensemble import RandomForestRegressor
# from sklearn.model_selection import GridSearchCV, KFold, cross_val_score
# from sklearn.metrics import root_mean_squared_error, mean_absolute_error, r2_score
# import numpy as np

# # Define hyperparameter space for Random Forest
# param_grid_rf = {
#     'n_estimators': [100, 300, 500],  # Número de árboles en el bosque
#     'max_depth': [None, 10, 30, 50],        # Máxima profundidad de cada árbol
#     'min_samples_split': [2, 5, 10],                # Número mínimo de muestras requeridas para dividir un nodo
#     'min_samples_leaf': [1, 2, 4],                  # Número mínimo de muestras requeridas para estar en una hoja
#     'max_features': ['auto', 'sqrt',],       # Número de características a considerar para la mejor división
#     'bootstrap': [True, False],                     # Método para seleccionar muestras para entrenar cada árbol
#     'criterion': ['absolute_error', 'friedman_mse', 'poisson', 'squared_error'],               # Función para medir la calidad de una división
# }


# # Initialize base XGBoost model
# rf_regressor = RandomForestRegressor()

# # Set up KFold cross-validation
# kfold = KFold(n_splits=10, shuffle=True, random_state=42)

# # Configure GridSearchCV for RF
# grid_search_rf = GridSearchCV(rf_regressor, param_grid_rf, cv=kfold, scoring='neg_mean_squared_error')

# # Train GridSearchCV with XGBoost
# grid_search_rf.fit(X_train, y_train)

# # Extract the optimal XGBoost model
# best_rf = grid_search_rf.best_estimator_

# # Predict using the optimal model
# y_pred_train_best_rf = best_rf.predict(X_train)
# y_pred_test_best_rf = best_rf.predict(X_test)

# # Cross-validation scores for XGBoost using best_xgb
# mse_scores_rf = cross_val_score(best_rf, X_train, y_train, cv=kfold, scoring=mse_scorer)
# r2_scores_rf = cross_val_score(best_rf, X_train, y_train, cv=kfold, scoring=r2_scorer)
# mae_scores_rf = cross_val_score(best_rf, X_train, y_train, cv=kfold, scoring=mae_scorer)

# # Compile results into a DataFrame for XGBoost with Grid Search
# evaluation_metrics_xgb = {
#     'Model': ['RandomForest (Grid Search)'],
#     'Avg_MSE_CV': [np.mean(mse_scores_rf)],
#     'Std_MSE_CV': [np.std(mse_scores_rf)],
#     'Avg_R2_CV': [np.mean(r2_scores_rf)],
#     'Std_R2_CV': [np.std(r2_scores_rf)],
#     'Avg_MAE_CV': [np.mean(mae_scores_rf)],
#     'Std_MAE_CV': [np.std(mae_scores_rf)],
#     'MSE_Train': [root_mean_squared_error(y_train, y_pred_train_best_rf)],
#     'R2_Train': [r2_score(y_train, y_pred_train_best_rf)],
#     'MAE_Train': [mean_absolute_error(y_train, y_pred_train_best_rf)],
#     'MSE_Test': [root_mean_squared_error(y_test, y_pred_test_best_rf)],
#     'R2_Test': [r2_score(y_test, y_pred_test_best_rf)],
#     'MAE_Test': [mean_absolute_error(y_test, y_pred_test_best_rf)]
# }

# rf_results_df = pd.DataFrame(evaluation_metrics_xgb)


# A new model: XGBoost

In [48]:
from sklearn.model_selection import RandomizedSearchCV
import numpy as np
import xgboost as xgb

xg_model = xgb.XGBRegressor()

grid = {
    'n_estimators': [100, 200, 300, 400, 500],            # Número de árboles
    'max_depth': [3, 5, 7, 9, 11],                        # Profundidad máxima de los árboles
    'learning_rate': [0.01, 0.05, 0.1, 0.2, 0.3],        # Tasa de aprendizaje
    'subsample': [0.6, 0.8, 1.0],                         # Proporción de muestras utilizadas para entrenar
    'colsample_bytree': [0.6, 0.8, 1.0],                 # Proporción de características utilizadas para entrenar
    'gamma': [0, 0.1, 0.2, 0.3],                          # Reducción de pérdida requerida para hacer una partición adicional
    'reg_alpha': [0, 0.1, 1, 10],                         # Regularización L1
    'reg_lambda': [0, 0.1, 1, 10]                        # Regularización L2
}


xg_model_randomized_search = RandomizedSearchCV(estimator = xg_model, param_distributions = grid, n_iter = 10, cv = 5, n_jobs = -1) 

In [49]:
xg_model_randomized_search.fit(X_train_scaled, y_train)
  
y_pred_scaled = xg_model_randomized_search.predict(X_test_scaled)

mae_standardized = mean_absolute_error(y_test, y_pred_scaled)
mse_standardized = root_mean_squared_error(y_test, y_pred_scaled)
r2_standardized = r2_score(y_test, y_pred_scaled)

mae_standardized, mse_standardized, r2_standardized

(8.997789351195191, 14.81660474930629, 0.6555808865004926)

In [50]:
from sklearn.model_selection import cross_val_score

# Aplicamos la validación cruzada en el conjunto de entrenamiento
scores = cross_val_score(xg_model_randomized_search, X_train_scaled, y_train, cv=5, scoring='r2')

print("Cross-validated R2 scores:", scores)
print("Mean cross-validated R2:", scores.mean())

Cross-validated R2 scores: [0.6601214  0.64777683 0.65303298 0.64460663 0.6383617 ]
Mean cross-validated R2: 0.6487799077217029


In [55]:
# import pickle 
# with open('xg_model_randomized_search.pkl', 'wb') as file:
#     pickle.dump(xg_model_randomized_search, file)

In [51]:
# feature importance
importance = xg_model_randomized_search.best_estimator_.feature_importances_

In [52]:
importance

array([0.05765451, 0.42355523, 0.35660595, 0.13862169, 0.02356272],
      dtype=float32)

# Neural Network

In [53]:
import tensorflow as tf
from tensorflow import keras

In [54]:
import pandas as pd
from sklearn.model_selection import train_test_split
from sklearn.preprocessing import StandardScaler
import tensorflow as tf
from tensorflow.keras import layers, models, regularizers
from tensorflow.keras.callbacks import EarlyStopping
from sklearn.metrics import root_mean_squared_error, r2_score


# Convertir a tensores de TensorFlow
X_train_tf = X_train_scaled.astype('float32')
X_test_tf = X_test_scaled.astype('float32')
y_train_tf = y_train.astype('float32')
y_test_tf = y_test.astype('float32')

#Construir el modelo (regresión)
model = models.Sequential()
model.add(layers.Dense(128, activation='relu', input_shape=(X_train.shape[1],), kernel_regularizer=regularizers.l2(0.01)))
model.add(layers.BatchNormalization())  # Añadir BatchNormalization
model.add(layers.Dropout(0.2))
model.add(layers.Dense(64, activation='relu', kernel_regularizer=regularizers.l2(0.01)))
model.add(layers.BatchNormalization())  # Añadir BatchNormalization
model.add(layers.Dropout(0.2))
model.add(layers.Dense(32, activation='relu', kernel_regularizer=regularizers.l2(0.01)))
model.add(layers.BatchNormalization())  # Añadir BatchNormalization
model.add(layers.Dropout(0.2))
model.add(layers.Dense(1))  # Capa de salida para regresión

# Compilar el modelo
model.compile(optimizer=tf.keras.optimizers.Adam(learning_rate=0.0005), loss='mean_squared_error', metrics=['mae'])

# Early Stopping callback
early_stopping = EarlyStopping(monitor='val_loss', patience=10, restore_best_weights=True)

# Entrenar el modelo
history = model.fit(X_train_tf, y_train_tf, epochs=10000, batch_size=1024, validation_data=(X_test_tf, y_test_tf))     #callbacks=[early_stopping]

# Evaluar el modelo
test_loss, test_mae = model.evaluate(X_test_tf, y_test_tf)
print('Test MAE:', test_mae)

# Predicciones del conjunto de prueba
y_pred_tf = model.predict(X_test_tf)

# Calcular métricas adicionales
rmse = root_mean_squared_error(y_test_tf, y_pred_tf)
r2 = r2_score(y_test, y_pred_tf)

print(f'Test RMSE: {rmse}')
print(f'Test R²: {r2}')


Epoch 1/10000


  super().__init__(activity_regularizer=activity_regularizer, **kwargs)


[1m79/79[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m2s[0m 5ms/step - loss: 1521.7312 - mae: 30.0047 - val_loss: 1501.5181 - val_mae: 29.6819
Epoch 2/10000
[1m79/79[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m0s[0m 2ms/step - loss: 1443.0978 - mae: 29.7898 - val_loss: 1478.7524 - val_mae: 29.9689
Epoch 3/10000
[1m79/79[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m0s[0m 2ms/step - loss: 1363.5175 - mae: 29.3233 - val_loss: 1499.7583 - val_mae: 31.2077
Epoch 4/10000
[1m79/79[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m0s[0m 2ms/step - loss: 1245.4751 - mae: 28.4746 - val_loss: 1516.4402 - val_mae: 32.6216
Epoch 5/10000
[1m79/79[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m0s[0m 3ms/step - loss: 1153.8096 - mae: 27.7759 - val_loss: 1406.2695 - val_mae: 32.1586
Epoch 6/10000
[1m79/79[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m0s[0m 3ms/step - loss: 1062.1300 - mae: 26.7928 - val_loss: 1249.8339 - val_mae: 30.8485
Epoch 7/10000
[1m79/79[0m [32m━━━━━━━━━━━━━━━