# **Evaluacion y analisis de modelos, casos porblema de regresión y clasificación**

### **Librerias:**

In [1]:
import sys
# Agregamos esa ruta raíz al sys.path
sys.path.append("..")

# Ahora sí podemos importar
from modulos.compartido import df


In [2]:
import pandas as pd
import matplotlib.pyplot as plt
import time
from sklearn.model_selection import train_test_split, GridSearchCV
from sklearn.pipeline import Pipeline
from sklearn.preprocessing import StandardScaler
from sklearn.linear_model import Ridge, Lasso
from sklearn.ensemble import RandomForestRegressor
from xgboost import XGBRegressor
from sklearn.metrics import r2_score, mean_squared_error, mean_absolute_error

## **Caso problema: Evaluación del potencial de energía Solar en Barranquilla: Análisis de irradiancia y variables atmosféricas para la generación sostenible**

In [3]:
df_rg = df.copy()
# Definimos la variable objetivo y las variables predictoras
uv_max = df["ALLSKY_SFC_UV_INDEX"].max()

df_rg["SolarIndex"] = (
    (df_rg["ALLSKY_SFC_SW_DIFF"] / 1000)  # convertir Wh/m² a kWh/m²
    * df_rg["ALLSKY_KT"]
    * (df_rg["ALLSKY_SFC_UV_INDEX"] / uv_max)
)

df_rg.head()

Unnamed: 0,YEAR,MO,DY,HR,T2M,PRECTOTCORR,RH2M,WS10M,WD10M,PS,ALLSKY_SFC_UV_INDEX,ALLSKY_SRF_ALB,ALLSKY_SFC_SW_DIFF,T2MDEW,T2MWET,WS50M,ALLSKY_KT,LOCALITY,NBHD,SolarIndex
0,2020,1,1,0,26.66,0.03,86.19,7.87,40.0,100.66,0.0,,0.0,24.19,25.42,10.13,,Centro,BarrioAbajo,
1,2020,1,1,1,26.56,0.03,85.81,8.02,41.8,100.58,0.0,,0.0,24.01,25.29,10.31,,Centro,BarrioAbajo,
2,2020,1,1,2,26.43,0.02,85.59,8.05,42.7,100.52,0.0,,0.0,23.84,25.14,10.33,,Centro,BarrioAbajo,
3,2020,1,1,3,26.35,0.0,84.96,8.01,43.7,100.49,0.0,,0.0,23.63,24.99,10.25,,Centro,BarrioAbajo,
4,2020,1,1,4,26.27,0.0,84.21,7.88,45.6,100.49,0.0,,0.0,23.42,24.85,10.08,,Centro,BarrioAbajo,


In [4]:
feature_cols = [
    'T2M', 'RH2M', 'WS10M', 'WD10M', 'PS',
    'T2MDEW', 'T2MWET', 'WS50M'
]

X = df_rg[feature_cols]
y = df_rg['SolarIndex']


In [5]:
X_train, X_test, y_train, y_test = train_test_split(
    X, y, test_size=0.2, random_state=42
)

model_results = {}

### **Ridge Regression**

In [None]:
ridge_pipe = Pipeline([
    ('scaler', StandardScaler()),
    ('model', Ridge())
])

ridge_params = {'model__alpha': [0.01, 0.1, 1, 10, 100]}
ridge_grid = GridSearchCV(ridge_pipe, ridge_params, cv=5)

start_time = time.time()
ridge_grid.fit(X_train, y_train)
end_time = time.time()

y_pred_ridge = ridge_grid.predict(X_test)

ridge_training_time = end_time - start_time

model_results['Ridge'] = {
    'Best Alpha': ridge_grid.best_params_['model__alpha'],
    'R2': r2_score(y_test, y_pred_ridge),
    'MSE': mean_squared_error(y_test, y_pred_ridge),
    'MAE': mean_absolute_error(y_test, y_pred_ridge),
    'Training Time (s)': round(ridge_training_time, 2)
}



### **Lasso Regression**

In [None]:
lasso_pipe = Pipeline([
    ('scaler', StandardScaler()),
    ('model', Lasso(max_iter=10000))
])

lasso_params = {'model__alpha': [0.01, 0.1, 1, 10]}
lasso_grid = GridSearchCV(lasso_pipe, lasso_params, cv=5)
lasso_grid.fit(X_train, y_train)
y_pred_lasso = lasso_grid.predict(X_test)

model_results['Lasso'] = {
    'Best Alpha': lasso_grid.best_params_['model__alpha'],
    'R2': r2_score(y_test, y_pred_lasso),
    'MSE': mean_squared_error(y_test, y_pred_lasso),
    'MAE': mean_absolute_error(y_test, y_pred_lasso)
}



### **Random Forest**

In [None]:
rf = RandomForestRegressor(n_estimators=100, random_state=42)
rf.fit(X_train, y_train)
y_pred_rf = rf.predict(X_test)

model_results['Random Forest'] = {
    'R2': r2_score(y_test, y_pred_rf),
    'MSE': mean_squared_error(y_test, y_pred_rf),
    'MAE': mean_absolute_error(y_test, y_pred_rf)
}

### **XGBoost**

In [None]:
xgb = XGBRegressor(n_estimators=100, random_state=42)
xgb.fit(X_train, y_train)
y_pred_xgb = xgb.predict(X_test)

model_results['XGBoost'] = {
    'R2': r2_score(y_test, y_pred_xgb),
    'MSE': mean_squared_error(y_test, y_pred_xgb),
    'MAE': mean_absolute_error(y_test, y_pred_xgb)
}

### **Resultados:**

#### **Comparacion grafica:**

In [None]:


# Convertir a DataFrame si no lo tienes
result_df_reg = pd.DataFrame(model_results).T

# Crear gráfico de barras para cada métrica
metrics = ['R2', 'MSE', 'MAE']

for metric in metrics:
    plt.figure(figsize=(8, 5))
    result_df_reg[metric].plot(kind='bar', color='skyblue')
    plt.title(f'Comparación de modelos según {metric}')
    plt.ylabel(metric)
    plt.xticks(rotation=45)
    plt.grid(axis='y', linestyle='--', alpha=0.7)
    plt.tight_layout()
    plt.show()
