<a href="https://colab.research.google.com/github/alexcarvajal/ModeloPrediccionBRT/blob/main/Algoritmos_Modelo_con_Cross_Validation.ipynb" target="_parent"><img src="https://colab.research.google.com/assets/colab-badge.svg" alt="Open In Colab"/></a>

# Modelo Cross-Validation Dataset 1


## Random Forest Dataset 1
70% Entrenamiento - 30% Prueba

In [None]:
from sklearn.model_selection import cross_validate
from sklearn.ensemble import RandomForestRegressor
from sklearn.metrics import make_scorer, mean_squared_error, r2_score
import numpy as np
import pandas as pd

def mape(y_true, y_pred):
    return np.mean(np.abs((y_true - y_pred) / y_true)) * 100

# Cargar el dataset
dataset_path = '/content/drive/MyDrive/Reuniones/DatasetsWP2/Dataset1Codificado.csv'

data = pd.read_csv(dataset_path)

# Eliminar columna innecesaria
data = data.drop(columns=['Unnamed: 0'])

# Separar las características (X) de la variable objetivo (y)
X = data.drop(columns=['Inputs'])
y = data['Inputs']

# Inicializar el modelo de Random Forest para regresión
rf_regressor = RandomForestRegressor(n_estimators=100, random_state=42)

# Definir las métricas para la validación cruzada
scoring = {'MSE': make_scorer(mean_squared_error, greater_is_better=False),
           'R2': 'r2',
           'MAPE': make_scorer(mape, greater_is_better=False)}

# Aplicar la validación cruzada
cv_results = cross_validate(rf_regressor, X, y, cv=5, scoring=scoring, n_jobs=-1, return_train_score=False)

# Calcular y mostrar los resultados
print(f"MSE promedio: {-cv_results['test_MSE'].mean()}")
print(f"R2 promedio: {cv_results['test_R2'].mean()}")
print(f"MAPE promedio: {-cv_results['test_MAPE'].mean()}")

MSE promedio: 494.61328158426204
R2 promedio: 0.47128453114728963
MAPE promedio: inf


## LSTM  Dataset 1
70% Entrenamiento - 30% Prueba

In [6]:
import numpy as np
import pandas as pd
from tensorflow.keras.models import Sequential
from tensorflow.keras.layers import LSTM, Dense
from sklearn.preprocessing import MinMaxScaler
from sklearn.model_selection import TimeSeriesSplit
from tensorflow.keras.callbacks import EarlyStopping
from sklearn.metrics import mean_squared_error, r2_score, mean_absolute_percentage_error

# Cargar datos
dataset_path = '/content/drive/MyDrive/Reuniones/DatasetsWP2/Dataset1Codificado.csv'
data = pd.read_csv(dataset_path)

# Preprocesamiento
features = data.drop(columns=['Inputs', 'Unnamed: 0'])
target = data['Inputs']
scaler = MinMaxScaler(feature_range=(0, 1))
features_scaled = scaler.fit_transform(features)

# Convertir datos para LSTM
X = np.reshape(features_scaled, (features_scaled.shape[0], 1, features_scaled.shape[1]))
y = target.values

# Definir el modelo LSTM
model = Sequential()
model.add(LSTM(50, activation='relu', input_shape=(X.shape[1], X.shape[2])))
model.add(Dense(1))
model.compile(optimizer='adam', loss='mean_squared_error')

# Callback para detener el entrenamiento
early_stopping = EarlyStopping(monitor='val_loss', patience=10, mode='min')

# TimeSeriesSplit para la validación cruzada
tscv = TimeSeriesSplit(n_splits=5)

metrics_summary = {'mse': [], 'r2': [], 'mape': []}

for train_index, test_index in tscv.split(X):
    X_train, X_test = X[train_index], X[test_index]
    y_train, y_test = y[train_index], y[test_index]

    # Entrenar el modelo
    history = model.fit(X_train, y_train, epochs=10, validation_data=(X_test, y_test), callbacks=[early_stopping], batch_size=32, verbose=0)

    # Predecir
    predictions = model.predict(X_test, verbose=0)

    # Calcular métricas
    mse = mean_squared_error(y_test, predictions)
    r2 = r2_score(y_test, predictions)
    mape = mean_absolute_percentage_error(y_test, predictions)

    # Guardar métricas
    metrics_summary['mse'].append(mse)
    metrics_summary['r2'].append(r2)
    metrics_summary['mape'].append(mape)

    print(f'MSE: {mse}, R2: {r2}, MAPE: {mape}')

# Mostrar resumen de métricas
print("Resumen de métricas:")
print(f"MSE promedio: {np.mean(metrics_summary['mse'])}")
print(f"R2 promedio: {np.mean(metrics_summary['r2'])}")
print(f"MAPE promedio: {np.mean(metrics_summary['mape'])}")


MSE: 804.8328793750047, R2: 0.2371014559611302, MAPE: 1.4333623512150598e+16
MSE: 829.042551003372, R2: 0.32607887848176, MAPE: 1.2698506935784214e+16
MSE: 431.8103049858526, R2: 0.32422941016821016, MAPE: 8248134151799015.0
MSE: 599.4288847300128, R2: 0.4898016083519853, MAPE: 9820309876494968.0
MSE: 395.18621544881967, R2: 0.4047869152732646, MAPE: 7844698712112757.0
Resumen de métricas:
MSE promedio: 612.0601671086124
R2 promedio: 0.35639965364727005
MAPE promedio: 1.0589054637668312e+16


## SVM  Dataset 1
70% Entrenamiento - 30% Prueba



In [None]:
from sklearn.model_selection import cross_val_score
from sklearn.svm import SVC
import pandas as pd
from sklearn.preprocessing import StandardScaler
from sklearn.pipeline import make_pipeline

# Cargar datos
dataset_path = '/content/drive/MyDrive/Reuniones/DatasetsWP2/Dataset1Codificado.csv'
data = pd.read_csv(dataset_path)


# Preprocesamiento básico
# Eliminar columna innecesaria y separar características de la variable objetivo
X = data.drop(columns=['Inputs', 'Unnamed: 0'])
y = data['Inputs']

# Crear un pipeline que primero estandariza los datos y luego aplica SVM
svm_pipeline = make_pipeline(StandardScaler(), SVC(kernel='linear', random_state=42))

# Aplicar validación cruzada
cv_scores = cross_val_score(svm_pipeline, X, y, cv=5, scoring='accuracy')

# Mostrar los resultados de la validación cruzada
print(f"Accuracy promedio: {cv_scores.mean()}")
print(f"Desviación estándar: {cv_scores.std()}")




# XGBoost Dataset 1


In [2]:
import xgboost as xgb
import pandas as pd
from sklearn.model_selection import train_test_split
from sklearn.metrics import r2_score, mean_squared_error, mean_absolute_error
import numpy as np
# Cargar datos
dataset_path = '/content/drive/MyDrive/Reuniones/DatasetsWP2/Dataset1Codificado.csv'
data = pd.read_csv(dataset_path)

# Preprocesamiento básico
X = data.drop(['Inputs', 'Unnamed: 0'], axis=1)
y = data['Inputs']

# Convertir datos a DMatrix, formato optimizado de XGBoost
dtrain = xgb.DMatrix(X, label=y)

# Parámetros de XGBoost
params = {
    'objective': 'reg:squarederror',  # Cambiar por 'reg:logistic' para clasificación, si es necesario
    'eval_metric': ['rmse', 'mae'],  # Métricas de evaluación
    'max_depth': 6,
    'eta': 0.3,
    'seed': 42
}

# Validación cruzada con XGBoost
cv_results = xgb.cv(dtrain=dtrain, params=params, nfold=5, num_boost_round=50,
                    metrics=('rmse', 'mae'), as_pandas=True, seed=42)

# Mostrar resultados finales
print(cv_results.tail(1))


    train-rmse-mean  train-rmse-std  train-mae-mean  train-mae-std  \
49        20.910789        0.035241        12.43962       0.019574   

    test-rmse-mean  test-rmse-std  test-mae-mean  test-mae-std  
49       21.068221       0.057262      12.529507      0.033216  
