In [None]:
import pandas as pd
import matplotlib.pyplot as plt
import seaborn as sns
import numpy as np
from sklearn.linear_model import LinearRegression
from sklearn.model_selection import train_test_split
from sklearn.metrics import mean_squared_error, r2_score
from sklearn.preprocessing import PolynomialFeatures

In [None]:
boston = pd.read_csv('Boston.csv')
boston.head()

Unnamed: 0,crim,zn,indus,chas,nox,rm,age,dis,rad,tax,ptratio,black,lstat,medv
0,0.00632,18.0,2.31,0,0.538,6.575,65.2,4.09,1,296,15.3,396.9,4.98,24.0
1,0.02731,0.0,7.07,0,0.469,6.421,78.9,4.9671,2,242,17.8,396.9,9.14,21.6
2,0.02729,0.0,7.07,0,0.469,7.185,61.1,4.9671,2,242,17.8,392.83,4.03,34.7
3,0.03237,0.0,2.18,0,0.458,6.998,45.8,6.0622,3,222,18.7,394.63,2.94,33.4
4,0.06905,0.0,2.18,0,0.458,7.147,54.2,6.0622,3,222,18.7,396.9,5.33,36.2


In [None]:
X = boston[['lstat']]
y = boston['medv']

X_train, X_test, y_train, y_test = train_test_split(X, y, test_size = 0.3, random_state = 24)

In [None]:
lr = LinearRegression()
lr.fit(X_train, y_train)

In [None]:
y_pred = lr.predict(X_test)
r2_score(y_test, y_pred)

0.5279301917445975

#### Polynomial Features

In [None]:
poly = PolynomialFeatures(degree = 3, include_bias = False).set_output(transform = 'pandas')

X_train_poly = poly.fit_transform(X_train)
X_test_poly = poly.transform(X_test)

lr.fit(X_train_poly, y_train)

y_pred = lr.predict(X_test_poly)

r2_score(y_test, y_pred)

0.6464637609380741

In [None]:
#taking columns 'rad' and 'lstat'
X=boston[["lstat",'rad']]
y=boston['medv']
X_train, X_test, y_train, y_test = train_test_split(X, y, test_size = 0.3, random_state = 24)

In [None]:
poly=PolynomialFeatures(degree=3).set_output(transform='pandas')
X_train_poly=poly.fit_transform(X_train)
X_test_poly=poly.transform(X_test)


In [None]:
lr.fit(X_train_poly,y_train)
y_pred=lr.predict(X_test_poly)
r2_score(y_test,y_pred)

0.6883404673191278

In [None]:
X_test_poly.columns

Index(['1', 'lstat', 'rad', 'lstat^2', 'lstat rad', 'rad^2', 'lstat^3',
       'lstat^2 rad', 'lstat rad^2', 'rad^3'],
      dtype='object')

In [None]:
y=boston['medv']
X=boston.drop('medv',axis=1)
X_train, X_test, y_train, y_test = train_test_split(X, y, test_size = 0.3, random_state = 24)

In [None]:
poly=PolynomialFeatures(degree=2).set_output(transform = "pandas")
X_Train_poly=poly.fit_transform(X_train)
X_Test_poly=poly.transform(X_test)

In [None]:
lr.fit(X_Train_poly,y_train)
ypred=lr.predict(X_Test_poly)
lr.score(X_Test_poly,y_test)
r2_score(y_test,ypred)

0.688155666277088

In [None]:
#Concrete dataset

In [None]:
cdf=pd.read_excel('Concrete_Data.xls')
cdf.head()
cdf.columns


Index(['Cement (component 1)(kg in a m^3 mixture)',
       'Blast Furnace Slag (component 2)(kg in a m^3 mixture)',
       'Fly Ash (component 3)(kg in a m^3 mixture)',
       'Water  (component 4)(kg in a m^3 mixture)',
       'Superplasticizer (component 5)(kg in a m^3 mixture)',
       'Coarse Aggregate  (component 6)(kg in a m^3 mixture)',
       'Fine Aggregate (component 7)(kg in a m^3 mixture)', 'Age (day)',
       'Concrete compressive strength(MPa, megapascals) '],
      dtype='object')

In [None]:
y=cdf["Concrete compressive strength(MPa, megapascals) "]
X=cdf.drop("Concrete compressive strength(MPa, megapascals) ",axis=1)

In [None]:
X_train,X_test,y_train,y_test=train_test_split(X,y,test_size=0.3,random_state=24)

In [None]:
poly=PolynomialFeatures(3).set_output(transform="pandas")
X_train_poly=poly.fit_transform(X_train)
X_test_poly=poly.transform(X_test)

In [None]:
lr.fit(X_train_poly,y_train)
ypred=lr.predict(X_test_poly)
r2_score(y_test,ypred)

0.8622911623298171

##Using Pipelines

In [None]:
from sklearn.pipeline import Pipeline

In [None]:
poly = PolynomialFeatures(degree = 2).set_output(transform = 'pandas')
lr = LinearRegression()

In [None]:
pipe = Pipeline([('POLY', poly), ('LR', lr)])
pipe.fit(X_train, y_train)

In [None]:
y_pred = pipe.predict(X_test)
r2_score(y_test, y_pred)


0.7807279046870703

#Housing Dataset

In [None]:
housing=pd.read_csv('Housing.csv')
housing.head()

Unnamed: 0,price,lotsize,bedrooms,bathrms,stories,driveway,recroom,fullbase,gashw,airco,garagepl,prefarea
0,42000.0,5850,3,1,2,yes,no,yes,no,no,1,no
1,38500.0,4000,2,1,1,yes,no,no,no,no,0,no
2,49500.0,3060,3,1,1,yes,no,no,no,no,0,no
3,60500.0,6650,3,1,2,yes,yes,no,no,no,0,no
4,61000.0,6360,2,1,1,yes,no,no,no,no,0,no


In [None]:
y=housing['price']
X=housing[["driveway"]]
from sklearn.model_selection import train_test_split
X_train,X_test,y_train,y_test=train_test_split(X,y,test_size=0.2,random_state=42)

In [None]:
from sklearn.pipeline import Pipeline

**One Hot Encoding**

`.get_dummies()` from *pandas*

In [None]:
dum_X = pd.get_dummies(X, drop_first = True)
dum_X.head(3)

Unnamed: 0,driveway_yes
0,True
1,True
2,True


In [None]:
lr = LinearRegression()
lr.fit(dum_X, y)
lr.intercept_, lr.coef_

(48555.77922077924, array([22778.11630161]))

In [None]:
dum_X = pd.get_dummies(X)
dum_X.head(3)

Unnamed: 0,driveway_no,driveway_yes
0,False,True
1,False,True
2,False,True


In [None]:
dum_X.drop('driveway_yes', axis = 1, inplace = True)
dum_X.head(3)

Unnamed: 0,driveway_no
0,False
1,False
2,False


In [None]:
lr = LinearRegression()
lr.fit(dum_X, y)
lr.intercept_, lr.coef_

(71333.89552238806, array([-22778.11630161]))

In [None]:
71333.89552238806 - 22778.11630161

48555.77922077806

**OneHotEncoder**

In [None]:
from sklearn.preprocessing import OneHotEncoder
ohe = OneHotEncoder(sparse_output = False, drop = 'first').set_output(transform = 'pandas')

In [None]:
dum_X = ohe.fit_transform(X)
lr = LinearRegression()
lr.fit(dum_X, y)
lr.intercept_, lr.coef_

(48555.77922077924, array([22778.11630161]))

#### Two Columns "driveway" and "airco"

In [None]:
X=housing[["driveway","airco"]]
y=housing['price']


In [None]:
ohe=OneHotEncoder(sparse_output=False,drop='first').set_output(transform="pandas")
dum_X=ohe.fit_transform(X)

In [None]:
lr=LinearRegression()
lr.fit(dum_X,y)
lr.intercept_,lr.coef_

(43790.670320318815, array([19302.6687155, 24460.8923557]))

**Column Transformers**

In [None]:
from sklearn.compose import make_column_transformer
from sklearn.compose import make_column_selector

In [None]:
# str_cols = list(housing.select_dtypes(include = 'object'))
str_cols = list(housing.columns[ housing.dtypes == 'object' ])
num_cols = list(housing.columns[ housing.dtypes != 'object' ])
ohe = OneHotEncoder(sparse_output = False, drop = 'first').set_output(transform = 'pandas')

In [None]:
# ct = make_column_transformer(('passthrough', num_cols), (ohe, str_cols)).set_output(transform = 'pandas')

ct = make_column_transformer( ('passthrough', num_cols), (ohe, str_cols), verbose_feature_names_out = False).set_output(transform = 'pandas')

ct.fit_transform(housing).head(3)


Unnamed: 0,price,lotsize,bedrooms,bathrms,stories,garagepl,driveway_yes,recroom_yes,fullbase_yes,gashw_yes,airco_yes,prefarea_yes
0,42000.0,5850,3,1,2,1,1.0,0.0,1.0,0.0,0.0,0.0
1,38500.0,4000,2,1,1,0,1.0,0.0,0.0,0.0,0.0,0.0
2,49500.0,3060,3,1,1,0,1.0,0.0,0.0,0.0,0.0,0.0


*Using `make_column_selector()`*

In [None]:
ct = make_column_transformer( ('passthrough', make_column_selector(dtype_exclude=object)), (ohe, make_column_selector(dtype_include=object)), verbose_feature_names_out = False).set_output(transform = 'pandas')

ct.fit_transform(housing).head(3)

Unnamed: 0,price,lotsize,bedrooms,bathrms,stories,garagepl,driveway_yes,recroom_yes,fullbase_yes,gashw_yes,airco_yes,prefarea_yes
0,42000.0,5850,3,1,2,1,1.0,0.0,1.0,0.0,0.0,0.0
1,38500.0,4000,2,1,1,0,1.0,0.0,0.0,0.0,0.0,0.0
2,49500.0,3060,3,1,1,0,1.0,0.0,0.0,0.0,0.0,0.0


**Use Hot Encoding with ` train_tets_split `**

In [None]:
y = housing['price']
X = housing.drop('price', axis = 1)

X_train, X_test, y_train, y_test = train_test_split(X, y, test_size = 0.3, random_state = 24)

In [None]:
X_ohe_train = ct.fit_transform(X_train)
X_ohe_test = ct.transform(X_test)

lr = LinearRegression()

lr.fit(X_ohe_train, y_train)

y_pred = lr.predict(X_ohe_test)

r2_score(y_test, y_pred)

0.6246856191453718

In [None]:
from sklearn.pipeline import Pipeline
pipe=Pipeline([("CT",ct),("LR",lr)])

In [None]:
pipe.fit(X_train,y_train)

In [None]:
y_pred=pipe.predict(X_test)
r2_score(y_test,y_pred)

0.6246856191453718

***OHE*, *Polynomial Transformation*, and *LR* in one Pipe**

In [None]:
poly = PolynomialFeatures(degree = 2, include_bias=False).set_output(transform = 'pandas')

pipe = Pipeline([('CT', ct), ('POLY', poly), ('LR', lr)])
pipe.fit(X_train, y_train)

In [None]:
y_pred = pipe.predict(X_test)
r2_score(y_test, y_pred)

0.5558314098854267

# **Scaling**

In [None]:
x=np.array([10,20,30,40,50,60,70,80,90,100])
y=np.array([1,2,3,4,5,6,7,8,9,10])

In [None]:
sx=x.std()
sy=y.std()
mx=x.mean()
my=y.mean()
scl_x=(x-mx)/sx
scl_y=(y-my)/sy

In [None]:
scl_x.mean()

-6.661338147750939e-17

In [None]:
scl_x.std()

1.0

In [None]:
from sklearn.preprocessing import StandardScaler
import pandas as pd


In [None]:
ss=StandardScaler()
df=pd.DataFrame({'x':x,'y':y})
ss.fit(df)
df

Unnamed: 0,x,y
0,10,1
1,20,2
2,30,3
3,40,4
4,50,5
5,60,6
6,70,7
7,80,8
8,90,9
9,100,10


In [None]:
ss.mean_

2

In [None]:
ss.scale_

array([28.72281323,  2.87228132])

In [None]:
ss.transform(df)

array([[-1.5666989 , -1.5666989 ],
       [-1.21854359, -1.21854359],
       [-0.87038828, -0.87038828],
       [-0.52223297, -0.52223297],
       [-0.17407766, -0.17407766],
       [ 0.17407766,  0.17407766],
       [ 0.52223297,  0.52223297],
       [ 0.87038828,  0.87038828],
       [ 1.21854359,  1.21854359],
       [ 1.5666989 ,  1.5666989 ]])

### Concrete Cement

In [None]:
import pandas as pd
from sklearn.preprocessing import StandardScaler
from sklearn.linear_model import LinearRegression
from sklearn.model_selection import train_test_split
from sklearn.metrics import r2_score
from sklearn.preprocessing import PolynomialFeatures
df=pd.read_excel("Concrete_Data.xls")

In [None]:
ss=StandardScaler()
poly=PolynomialFeatures(degree=2,include_bias=False).set_output(transform="pandas")
lr=LinearRegression()
# ss_scaled=ss.fit_transform(df)
# ss_scaled=pd.DataFrame(ss_scaled,columns=df.columns)


In [None]:
X=df.drop("Concrete compressive strength(MPa, megapascals) ",axis=1)
y=df["Concrete compressive strength(MPa, megapascals) "]
X_Train,X_Test,y_Train,y_Test=train_test_split(X,y,test_size=0.3,random_state=24)

In [None]:
from sklearn.pipeline import Pipeline

In [None]:
pipe=Pipeline([("SS",ss),("PL",poly),("LR",lr)])

In [None]:
pipe.fit(X_Train,y_Train)

In [None]:
y_pred = pipe.predict(X_Test)
r2_score(y_Test, y_pred)

0.780727904686751

# **Ridge Regression**

In [None]:
import pandas as pd
import numpy as np
import matplotlib.pyplot as plt
import seaborn as sns
from sklearn.linear_model import LinearRegression, Ridge
from sklearn.model_selection import train_test_split
from sklearn.metrics import r2_score
from sklearn.preprocessing import PolynomialFeatures

In [None]:
df = pd.read_excel('Concrete_Data.xls')
df.head(1)

Unnamed: 0,Cement (component 1)(kg in a m^3 mixture),Blast Furnace Slag (component 2)(kg in a m^3 mixture),Fly Ash (component 3)(kg in a m^3 mixture),Water (component 4)(kg in a m^3 mixture),Superplasticizer (component 5)(kg in a m^3 mixture),Coarse Aggregate (component 6)(kg in a m^3 mixture),Fine Aggregate (component 7)(kg in a m^3 mixture),Age (day),"Concrete compressive strength(MPa, megapascals)"
0,540.0,0.0,0.0,162.0,2.5,1040.0,676.0,28,79.986111


In [None]:
y = df['Concrete compressive strength(MPa, megapascals) ']
X = df.drop('Concrete compressive strength(MPa, megapascals) ', axis = 1)

In [None]:
X_train, X_test, y_train, y_test = train_test_split(X, y, test_size = 0.3, random_state = 24)

**Find using Linear Regression for comparison**

In [None]:
lr = LinearRegression()
lr.fit(X_train, y_train)

In [None]:
y_pred = lr.predict(X_test)
r2_score(y_test, y_pred)

0.5770874758663003

**Now, Find using Ridge Regression for comparison**

In [None]:
ridge = Ridge()
ridge.fit(X_train, y_train)

In [None]:
y_pred = ridge.predict(X_test)
r2_score(y_test, y_pred)

0.5770871125455272

**Now, Using Linear Regression for comparison**

In [None]:
poly = PolynomialFeatures(degree = 3, include_bias = False).set_output(transform = 'pandas')
lr = LinearRegression()

X_poly_train = poly.fit_transform(X_train)
X_poly_test = poly.transform(X_test)

pipe = Pipeline([('POLY', poly), ('LR', lr)])
pipe.fit(X_train, y_train)
r2_score(y_test, pipe.predict(X_test))

0.8622903272709064

In [None]:
pipe = Pipeline([('POLY', poly), ('RIDGE', ridge)])
pipe.fit(X_train, y_train)
r2_score(y_test, pipe.predict(X_test))

  return linalg.solve(A, Xy, assume_a="pos", overwrite_a=True).T


0.8695084843927098

**FIlter the extracted features based on coefficient values**

In [None]:
# list(X_poly_train.columns)
# list(ridge.coef_ )

df_coef = pd.DataFrame({'feature': X_poly_train.columns, 'coef': ridge.coef_})

print(df_coef.shape)

df_coef[df_coef['coef'] > 0.0001]

(164, 2)


Unnamed: 0,feature,coef
1,Blast Furnace Slag (component 2)(kg in a m^3 m...,0.628844
5,Coarse Aggregate (component 6)(kg in a m^3 mi...,2.601218
6,Fine Aggregate (component 7)(kg in a m^3 mixture),0.911804
7,Age (day),0.12384
8,Cement (component 1)(kg in a m^3 mixture)^2,0.010499
9,Cement (component 1)(kg in a m^3 mixture) Blas...,0.031341
10,Cement (component 1)(kg in a m^3 mixture) Fly ...,0.008691
14,Cement (component 1)(kg in a m^3 mixture) Fine...,0.023291
16,Blast Furnace Slag (component 2)(kg in a m^3 m...,0.004981
17,Blast Furnace Slag (component 2)(kg in a m^3 m...,0.015336


**Considering different values of alpha**

In [None]:
ridge = Ridge(alpha = 0.22)
ridge.fit(X_train, y_train)
r2_score(y_test, ridge.predict(X_test))

0.577087395932842

**Tuning for alpha**

Method 1: alphas = [0.01, 0.1, 0.3, 0.6, 1, 1.5, 2, 4, 10]

In [None]:
alphas = [0.01, 0.1, 0.3, 0.6, 1, 1.5, 2, 4, 10]
scores = []
for a in alphas:
  ridge = Ridge(alpha = a)
  ridge.fit(X_train, y_train)
  y_pred = ridge.predict(X_test)
  scores.append (r2_score(y_test, y_pred))

print(scores)
i_max = np.argmax(scores) # Index of the alpha giving the maximum r2_score
print("Best alpha:", alphas[i_max])
print("Best Score:", scores[i_max])

[0.5770874722329257, 0.5770874395327079, 0.577087366866534, 0.577087257869797, 0.5770871125455272, 0.577086930897764, 0.5770867492584173, 0.5770860227852062, 0.5770838441740158]
Best alpha: 0.01
Best Score: 0.5770874722329257


Method 2: np.linspace()

In [None]:
alphas = np.linspace(0.0001, 10, 20)
scores = []
for a in alphas:
  ridge = Ridge(alpha = a)
  ridge.fit(X_train, y_train)
  y_pred = ridge.predict(X_test)
  scores.append (r2_score(y_test, y_pred))

print(scores)

i_max = np.argmax(scores) # Index of the alpha giving the maximum r2_score
print("Best alpha:", alphas[i_max])
print("Best Score:", scores[i_max])

[0.5770874758299661, 0.5770872846062249, 0.5770870933918076, 0.5770869021867151, 0.5770867109909483, 0.5770865198045079, 0.577086328627394, 0.5770861374596081, 0.5770859463011508, 0.5770857551520228, 0.5770855640122242, 0.5770853728817572, 0.5770851817606215, 0.5770849906488178, 0.5770847995463473, 0.5770846084532105, 0.5770844173694083, 0.577084226294941, 0.5770840352298099, 0.5770838441740158]
Best alpha: 0.0001
Best Score: 0.5770874758299661


# **Lasso Regression**

In [None]:
from sklearn.linear_model import Lasso
alphas = np.linspace(0.0001, 10, 20)
scores = []
for a in alphas:
  lasso = Lasso(alpha = a)
  lasso.fit(X_train, y_train)
  y_pred = lasso.predict(X_test)
  scores.append (r2_score(y_test, y_pred))

print(scores)

i_max = np.argmax(scores) # Index of the alpha giving the maximum r2_score
print("Best alpha:", alphas[i_max])
print("Best Score:", scores[i_max])

[0.5770874150304416, 0.5767041829082049, 0.5761952824755794, 0.575561233792488, 0.5748018623212912, 0.5739180100716871, 0.5729095588751245, 0.5720871198045379, 0.572248499866566, 0.5723960126659194, 0.5725307377583977, 0.5726525815997647, 0.5727622936244379, 0.5728589299237389, 0.5729423271328136, 0.5730131262584663, 0.5730713673166632, 0.5731168625438297, 0.5731412769633368, 0.5731470537980619]
Best alpha: 0.0001
Best Score: 0.5770874150304416


# **Hybrid Regression**

In [None]:
import pandas as pd
import numpy as np
import matplotlib.pyplot as plt
import seaborn as sns
from sklearn.linear_model import LinearRegression, Ridge, ElasticNet, Lasso
from sklearn.model_selection import train_test_split
from sklearn.metrics import r2_score
from sklearn.preprocessing import PolynomialFeatures

In [None]:
df = pd.read_excel('Concrete_Data.xls')
df.head(1)

Unnamed: 0,Cement (component 1)(kg in a m^3 mixture),Blast Furnace Slag (component 2)(kg in a m^3 mixture),Fly Ash (component 3)(kg in a m^3 mixture),Water (component 4)(kg in a m^3 mixture),Superplasticizer (component 5)(kg in a m^3 mixture),Coarse Aggregate (component 6)(kg in a m^3 mixture),Fine Aggregate (component 7)(kg in a m^3 mixture),Age (day),"Concrete compressive strength(MPa, megapascals)"
0,540.0,0.0,0.0,162.0,2.5,1040.0,676.0,28,79.986111


In [None]:
y = df['Concrete compressive strength(MPa, megapascals) ']
X = df.drop('Concrete compressive strength(MPa, megapascals) ', axis = 1)

In [None]:
X_train, X_test, y_train, y_test = train_test_split(X, y, test_size = 0.3, random_state = 24)
elastic = ElasticNet()
elastic.fit(X_train, y_train)

y_pred = elastic.predict(X_test)
r2_score(y_test, y_pred)

0.5765974767906119

**Hyper Parameter Optimization**

In [None]:
alphas = np.linspace(0.0001, 10, 20)
l1     = np.linspace(0.0001, 1,  10)

scores = []

for a in alphas:
  for i in l1:
    elastic = ElasticNet(alpha = a, l1_ratio = i)
    elastic.fit(X_train, y_train)
    y_pred = elastic.predict(X_test)
    scores.append( [ a, i, r2_score(y_test, y_pred) ] )

df_scores = pd.DataFrame(scores, columns = ['alpha', 'l1_ratio', 'score'])
df_scores.sort_values('score', ascending = False)

Unnamed: 0,alpha,l1_ratio,score
0,0.000100,0.0001,0.577087
1,0.000100,0.1112,0.577087
2,0.000100,0.2223,0.577087
3,0.000100,0.3334,0.577087
4,0.000100,0.4445,0.577087
...,...,...,...
164,8.421068,0.4445,0.572060
97,4.736895,0.7778,0.572048
184,9.473689,0.4445,0.572042
135,6.842137,0.5556,0.572009


# **Perform Hybrid Regression on Housing.csv**

In [None]:
import pandas as pd
import numpy as np
import matplotlib.pyplot as plt
import seaborn as sns
from sklearn.linear_model import LinearRegression, Ridge, ElasticNet, Lasso
from sklearn.model_selection import train_test_split
from sklearn.metrics import r2_score
from sklearn.preprocessing import PolynomialFeatures
from sklearn.preprocessing import OneHotEncoder
from sklearn.compose import make_column_transformer
from sklearn.compose import make_column_selector

In [None]:
housing = pd.read_csv('Housing.csv')
housing.head(1)

In [None]:
y = housing['price']
X = housing.drop('price', axis = 1)

In [None]:
X_train, X_test, y_train, y_test = train_test_split(X, y, test_size = 0.3, random_state = 24)
X_train.head()
ohe = OneHotEncoder(sparse_output=False, drop = 'first')

ct = make_column_transformer( ('passthrough', make_column_selector(dtype_exclude = object)), (ohe, make_column_selector(dtype_include = object)), verbose_feature_names_out = False).set_output(transform = 'pandas')

X_ohe_train = ct.fit_transform(X_train)
X_ohe_test = ct.transform(X_test)

In [None]:
l1 = np.linspace(0.0001, 1, 20)
alphas = np.linspace(0.0001, 10, 20)
score = []

for a in alphas:
  for i in l1:
    elastic = ElasticNet(alpha = a, l1_ratio = i)
    elastic.fit(X_ohe_train, y_train)
    y_pred = elastic.predict(X_ohe_test)
    score.append([a, i, r2_score(y_test, y_pred)])

df_score = pd.DataFrame(score, columns = ['alpha', 'l1_ratio', 'score'])
df_score.sort_values('score', ascending = False, inplace = True)

Best_alpha = df_score['alpha'].iloc[0]
Best_score = df_score['score'].iloc[0]
Best_l1    = df_score['l1_ratio'].iloc[0]

print("Best alpha:",    Best_alpha)
print("Best Score:",    Best_score)
print("Best l1_ratio:", Best_l1)

df_score.head()

Best alpha: 0.0001
Best Score: 0.6246856181760901
Best l1_ratio: 1.0


Unnamed: 0,alpha,l1_ratio,score
19,0.0001,1.0,0.624686
18,0.0001,0.947374,0.624685
17,0.0001,0.894747,0.624684
16,0.0001,0.842121,0.624683
15,0.0001,0.789495,0.624682


**Building Best Model on Best Score**

In [None]:
elastic = ElasticNet(alpha = Best_alpha, l1_ratio = Best_l1)
elastic.fit(X_ohe_train, y_train)