In [1]:
import pandas as pd
import numpy as np
from sklearn.linear_model import ElasticNetCV
from sklearn.metrics import root_mean_squared_error, r2_score
import joblib
import matplotlib.pyplot as plt
import seaborn as sns

In [2]:
X_train = pd.read_pickle("processed_data/linear/Xlm_train.pkl")
X_test = pd.read_pickle("processed_data/linear/Xlm_test.pkl")
y_train = pd.read_pickle("processed_data/linear/ylm_train.pkl")
y_test = pd.read_pickle("processed_data/linear/ylm_test.pkl")

In [3]:
# elastic_net = ElasticNetCV(
#     l1_ratio=[0.1, 0.5, 0.7, 1],  # Mixing values (0=Ridge, 1=Lasso)
#     alphas=np.logspace(-4, 2, 50),  # Range of alpha values
#     cv=5,  # 5-fold cross-validation
#     max_iter=10000,
#     random_state=42
# )

In [4]:
print(list(X_train.columns))

['LotArea', 'OverallQual', 'OverallCond', 'YearBuilt', 'GrLivArea', 'GarageCars', 'Exterior1st_BrkFace', 'KitchenQual']


In [5]:
elastic_net = ElasticNetCV(
    l1_ratio=[0.1, 0.3, 0.5, 0.7, 0.9, 1],
    alphas=np.logspace(-5, 3, 100),
    cv=10,
    max_iter=25000,
    tol=1e-5,
    random_state=42
)

In [6]:
elastic_net.fit(X_train, y_train)

0,1,2
,l1_ratio,"[0.1, 0.3, ...]"
,eps,0.001
,n_alphas,'deprecated'
,alphas,array([1.0000...00000000e+03])
,fit_intercept,True
,precompute,'auto'
,max_iter,25000
,tol,1e-05
,cv,10
,copy_X,True


In [7]:
predicciones_en = elastic_net.predict(X_test)

In [8]:
rmse = root_mean_squared_error(y_test, predicciones_en)
r2 = r2_score(y_test, predicciones_en)

In [9]:
print(f"✅ Desempeno Elastic Net:")
print(f"Test RMSE: {rmse:.2f}")
print(f"R² Score: {r2:.3f}")

✅ Desempeno Elastic Net:
Test RMSE: 0.13
R² Score: 0.882


In [10]:
predicciones_real = np.expm1(predicciones_en)  
y_test_real = np.expm1(y_test)

rmse_real = root_mean_squared_error(y_test_real, predicciones_real)
print(rmse_real)

23153.147675118347


In [11]:
margen_error = (rmse_real / y_test_real.mean()) * 100
print(margen_error)

13.430107689648697


In [12]:
score = elastic_net.score(X_test, y_test)
print(score)

0.8816675796968476


In [13]:
#-------------------Lasso----------------------------------------------------

In [14]:
from sklearn.linear_model import Lasso

In [15]:
lasso_model = Lasso(alpha=0.1)
lasso_model.fit(X_train, y_train)

0,1,2
,alpha,0.1
,fit_intercept,True
,precompute,False
,copy_X,True
,max_iter,1000
,tol,0.0001
,warm_start,False
,positive,False
,random_state,
,selection,'cyclic'


In [16]:
predicciones_lasso = lasso_model.predict(X_test)
rmse_lasso = root_mean_squared_error(y_test, predicciones_lasso)
r2_lasso = r2_score(y_test, predicciones_lasso)

In [17]:
print(f"✅ Desempeno Elastic Net:")
print(f"Test RMSE: {rmse_lasso:.2f}")
print(f"R² Score: {r2_lasso:.3f}")

✅ Desempeno Elastic Net:
Test RMSE: 0.22
R² Score: 0.694


In [18]:
predicciones_real_lasso = np.expm1(predicciones_lasso)  


rmse_real_lasso = root_mean_squared_error(y_test_real, predicciones_real_lasso)
print(rmse_real_lasso)

38985.720543229574


In [19]:
margen_error_lasso = (rmse_real_lasso / y_test_real.mean()) * 100
print(margen_error_lasso)

22.613876635736798


In [20]:
#--------------------Ridge----------------------------

In [21]:
from sklearn.linear_model import Ridge

In [22]:
ridge_model = Ridge(alpha=1.0)
ridge_model.fit(X_train, y_train)

0,1,2
,alpha,1.0
,fit_intercept,True
,copy_X,True
,max_iter,
,tol,0.0001
,solver,'auto'
,positive,False
,random_state,


In [23]:
predicciones_ridge = ridge_model.predict(X_test)
rmse_ridge = root_mean_squared_error(y_test, predicciones_ridge)
r2_ridge = r2_score(y_test, predicciones_ridge)

print(f"✅ Desempeno Elastic Net:")
print(f"Test RMSE: {rmse_ridge:.2f}")
print(f"R² Score: {r2_ridge:.3f}")

✅ Desempeno Elastic Net:
Test RMSE: 0.13
R² Score: 0.882


In [24]:
predicciones_real_ridge = np.expm1(predicciones_ridge)  
rmse_real_ridge = root_mean_squared_error(y_test_real, predicciones_real_ridge)
print(rmse_real_ridge)

23159.829858387977


In [25]:
margen_error_ridge = (rmse_real_ridge / y_test_real.mean()) * 100
print(margen_error_ridge)

13.433983725951506


In [26]:
import pandas as pd

# Build a comparison table using the variable names directly
comparison_df = pd.DataFrame({
    'Modelo': ['Elastic Net', 'Lasso', 'Ridge'],
    'MSE Real': [rmse_real, rmse_real_lasso, rmse_real_ridge],
    'R² Score': [r2, r2_lasso, r2_ridge],
    'Margen de error(%)': [margen_error, margen_error_lasso, margen_error_ridge]
})

# Display the table
print("📊 Comparasion de modelos lineales:")
display(comparison_df) 



📊 Comparasion de modelos lineales:


Unnamed: 0,Modelo,MSE Real,R² Score,Margen de error(%)
0,Elastic Net,23153.147675,0.881668,13.430108
1,Lasso,38985.720543,0.694307,22.613877
2,Ridge,23159.829858,0.881665,13.433984


In [27]:
joblib.dump(elastic_net, 'elastic_net.pkl')

['elastic_net.pkl']

In [28]:
print(list(X_train.columns))

['LotArea', 'OverallQual', 'OverallCond', 'YearBuilt', 'GrLivArea', 'GarageCars', 'Exterior1st_BrkFace', 'KitchenQual']


In [29]:
print(elastic_net.feature_names_in_)


['LotArea' 'OverallQual' 'OverallCond' 'YearBuilt' 'GrLivArea'
 'GarageCars' 'Exterior1st_BrkFace' 'KitchenQual']
