In [1]:
import pandas as pd
from sklearn.linear_model import LinearRegression, ElasticNet
import numpy as np
from sklearn.metrics import r2_score 
from sklearn.pipeline import Pipeline
from sklearn.model_selection import train_test_split
from sklearn.preprocessing import OneHotEncoder 
from sklearn.compose import make_column_transformer 
from sklearn.compose import make_column_selector

In [2]:
housing = pd.read_csv("C:/Python/Datasets/Housing.csv")
housing.head()

Unnamed: 0,price,lotsize,bedrooms,bathrms,stories,driveway,recroom,fullbase,gashw,airco,garagepl,prefarea
0,42000.0,5850,3,1,2,yes,no,yes,no,no,1,no
1,38500.0,4000,2,1,1,yes,no,no,no,no,0,no
2,49500.0,3060,3,1,1,yes,no,no,no,no,0,no
3,60500.0,6650,3,1,2,yes,yes,no,no,no,0,no
4,61000.0,6360,2,1,1,yes,no,no,no,no,0,no


In [3]:
X = housing.drop('price', axis=1)
y = housing['price']
X_train, X_test, y_train, y_test = train_test_split(X, y, random_state=24, test_size=0.3)
ohe = OneHotEncoder(sparse_output=False, drop='first').set_output(transform='pandas')
ct = make_column_transformer(('passthrough', make_column_selector(dtype_exclude=object)  ),
                             (ohe, make_column_selector(dtype_include=object) ),
                            verbose_feature_names_out=False).set_output(transform='pandas')
elastic = ElasticNet()
X_ohe_trn = ct.fit_transform(X_train)
X_ohe_tst = ct.transform(X_test)

Hyper-Parameter Optimization

In [5]:
alphas = np.linspace(0.0001, 10, 20)
l1 = np.linspace(0.0001, 1, 10)
scores = []
for a in alphas:
    for i in l1:
        elastic = ElasticNet(alpha=a, l1_ratio=i)
        elastic.fit(X_ohe_trn, y_train)
        y_pred = elastic.predict(X_ohe_tst)
        scores.append([a,i, r2_score(y_test, y_pred)])
df_scores = pd.DataFrame(scores, columns=['alpha','l1_ratio','score'])
df_scores.sort_values('score', ascending=False, inplace=True)
best_a = df_scores['alpha'].iloc[0]
best_sc = df_scores['score'].iloc[0]
best_l1 = df_scores['l1_ratio'].iloc[0]
print("Best Alpha:", best_a)
print("Best l1 ratio:", best_l1)
print("Best Score:", best_sc)

Best Alpha: 0.0001
Best l1 ratio: 1.0
Best Score: 0.6246856181760901


Building best model on best score:

In [7]:
elastic = ElasticNet(alpha=best_a, l1_ratio=best_l1)
elastic.fit(X_ohe_trn, y_train)

In [8]:
print(X_ohe_trn.columns)
print(elastic.coef_)

Index(['lotsize', 'bedrooms', 'bathrms', 'stories', 'garagepl', 'driveway_yes',
       'recroom_yes', 'fullbase_yes', 'gashw_yes', 'airco_yes',
       'prefarea_yes'],
      dtype='object')
[3.60793320e+00 1.90378380e+03 1.42583605e+04 6.50823112e+03
 5.15503410e+03 7.37970949e+03 3.60587126e+03 7.30633029e+03
 1.08877125e+04 1.23164438e+04 7.66079630e+03]
