In [25]:
import pandas as pd
df = pd.read_csv('../../Datos/df_balanceado.csv').drop(['time'],axis=1)
df_features = df[['count','winddirection_10m_dominant (°)','semana_del_mes','semana_del_anio','es_festivo_mexico','anio','dia_de_la_semana']]
df_features.head()

Unnamed: 0,count,winddirection_10m_dominant (°),semana_del_mes,semana_del_anio,es_festivo_mexico,anio,dia_de_la_semana
0,129,59,15,15,0,2018,1
1,46,101,37,37,0,2017,6
2,80,340,24,24,0,2022,3
3,95,124,44,44,0,2018,5
4,41,3,37,37,0,2022,6


In [26]:
# Preparar la matriz de características (X) y el vector objetivo (Y)
# En X, eliminamos las columnas 'time' y 'count' que no son características
X = df_features.drop(['count'], axis=1)

# En Y, seleccionamos la columna 'count' como nuestro vector objetivo
Y = df_features['count']


In [27]:
from sklearn.model_selection import train_test_split
from sklearn.metrics import mean_absolute_error, mean_squared_error, mean_squared_log_error, r2_score
from sklearn.ensemble import RandomForestRegressor
from sklearn.svm import SVR
from sklearn.linear_model import LinearRegression
from sklearn.neighbors import KNeighborsRegressor
from sklearn.ensemble import AdaBoostRegressor
from sklearn.ensemble import GradientBoostingRegressor
from sklearn.neural_network import MLPRegressor
import numpy as np

# Divide tus datos en entrenamiento y prueba
X_train, X_test, y_train, y_test = train_test_split(X, Y, test_size=0.2, random_state=42)

# Inicializa diferentes modelos de regresión
models = {
    'Random Forest': RandomForestRegressor(),
    'SVR': SVR(),
    'Linear Regression': LinearRegression(),
    'K-NN': KNeighborsRegressor(),
    'AdaBoost': AdaBoostRegressor(),
    'Gradient Boosting': GradientBoostingRegressor(),
    'Neural Network (ANN)': MLPRegressor()
}

# Entrena y evalúa cada modelo
results = {}
for model_name, model in models.items():
    model.fit(X_train, y_train)
    y_pred = model.predict(X_test)
    
    mae = mean_absolute_error(y_test, y_pred)
    mape = np.mean(np.abs((y_test - y_pred) / y_test)) * 100
    mse = mean_squared_error(y_test, y_pred)
    rmse = mean_squared_error(y_test, y_pred, squared=False)
    msle = mean_squared_log_error(y_test, y_pred)
    r2 = r2_score(y_test, y_pred)
    
    
    results[model_name] = {
        'MAE': mae,
        'MAPE': mape,
        'MSE': mse,
        'RMSE': rmse,
        'MSLE': msle,
        'R-squared': r2
        
    }

# Crea una tabla de resultados
import pandas as pd
results_df = pd.DataFrame.from_dict(results, orient='index')

# Muestra la tabla de resultados
results_df


Unnamed: 0,MAE,MAPE,MSE,RMSE,MSLE,R-squared
Random Forest,10.510383,15.884204,189.326997,13.759615,0.039138,0.633814
SVR,18.907802,31.681223,516.936473,22.736237,0.116398,0.00017
Linear Regression,15.370855,24.44555,354.297238,18.822785,0.079291,0.314738
K-NN,18.805364,31.730224,554.987126,23.558165,0.124257,-0.073425
AdaBoost,11.190339,17.306152,198.735577,14.097361,0.043839,0.615617
Gradient Boosting,9.858299,15.121873,161.058788,12.690894,0.034412,0.688489
Neural Network (ANN),16.877712,28.007869,422.222935,20.548064,0.09584,0.18336
