In [1]:
import pandas as pd
import numpy as np
from sklearn.model_selection import train_test_split
from sklearn.linear_model import Lasso, LinearRegression
from tqdm import tqdm
from sklearn.preprocessing import OneHotEncoder
from sklearn.compose import make_column_selector, ColumnTransformer
from sklearn.metrics import r2_score, root_mean_squared_error, mean_absolute_error
import os
os.chdir("D:/Training/Academy/ML(Python)/Datasets")

In [2]:
boston = pd.read_csv("Boston.csv")
X = boston.drop('medv', axis=1)
y = boston['medv']
X_train, X_test, y_train, y_test = train_test_split(X, y, test_size=0.3, random_state=25)

Hyper-parameter Tuning

In [3]:
alphas = [0.001, 0.01, 0.1, 1, 1.5, 2.5, 5, 10]
scores = []
for a in alphas:
    lasso = Lasso(alpha=a)
    lasso.fit(X_train, y_train)
    y_pred = lasso.predict(X_test)
    scores.append([a, mean_absolute_error(y_test, y_pred)])
df_scores = pd.DataFrame(scores, columns=['alpha','score'])
df_scores.sort_values('score')

Unnamed: 0,alpha,score
1,0.01,3.088221
0,0.001,3.1142
2,0.1,3.129219
3,1.0,3.373066
4,1.5,3.542921
5,2.5,3.632183
6,5.0,3.856294
7,10.0,3.94214


#### Housing

In [4]:
housing = pd.read_csv("Housing.csv")
X, y = housing.drop('price', axis=1), housing['price']

In [5]:
ohe = OneHotEncoder(drop='first', sparse_output=False).set_output(transform='pandas')
col_trnf = ColumnTransformer([('OHE',ohe, make_column_selector(dtype_include=object) )],
                             remainder='passthrough',
                             verbose_feature_names_out=False)
col_trnf = col_trnf.set_output(transform='pandas')
X = col_trnf.fit_transform(X)
X_train, X_test, y_train, y_test = train_test_split(X, y, test_size=0.3, random_state=25)

In [6]:
alphas = [0.001, 0.01, 0.1, 1, 1.5, 2.5, 5, 10]
scores = []
for a in alphas:
    lasso = Lasso(alpha=a)
    lasso.fit(X_train, y_train)
    y_pred = lasso.predict(X_test)
    scores.append([a, mean_absolute_error(y_test, y_pred)])
df_scores = pd.DataFrame(scores, columns=['alpha','score'])
df_scores.sort_values('score')

Unnamed: 0,alpha,score
7,10.0,11594.596229
6,5.0,11597.777786
5,2.5,11599.368564
4,1.5,11599.989301
3,1.0,11600.307457
2,0.1,11600.878772
1,0.01,11600.935421
0,0.001,11600.941078


In [7]:
np.arange(0.001, 15, 0.5)

array([1.0000e-03, 5.0100e-01, 1.0010e+00, 1.5010e+00, 2.0010e+00,
       2.5010e+00, 3.0010e+00, 3.5010e+00, 4.0010e+00, 4.5010e+00,
       5.0010e+00, 5.5010e+00, 6.0010e+00, 6.5010e+00, 7.0010e+00,
       7.5010e+00, 8.0010e+00, 8.5010e+00, 9.0010e+00, 9.5010e+00,
       1.0001e+01, 1.0501e+01, 1.1001e+01, 1.1501e+01, 1.2001e+01,
       1.2501e+01, 1.3001e+01, 1.3501e+01, 1.4001e+01, 1.4501e+01])

In [8]:
np.linspace(0.001, 15, 20)

array([1.00000000e-03, 7.90421053e-01, 1.57984211e+00, 2.36926316e+00,
       3.15868421e+00, 3.94810526e+00, 4.73752632e+00, 5.52694737e+00,
       6.31636842e+00, 7.10578947e+00, 7.89521053e+00, 8.68463158e+00,
       9.47405263e+00, 1.02634737e+01, 1.10528947e+01, 1.18423158e+01,
       1.26317368e+01, 1.34211579e+01, 1.42105789e+01, 1.50000000e+01])

In [9]:
alphas = np.linspace(0.001, 15, 30)
scores = []
for a in alphas:
    lasso = Lasso(alpha=a)
    lasso.fit(X_train, y_train)
    y_pred = lasso.predict(X_test)
    scores.append([a, mean_absolute_error(y_test, y_pred)])
df_scores = pd.DataFrame(scores, columns=['alpha','score'])
df_scores.sort_values('score')

Unnamed: 0,alpha,score
29,15.0,11591.414672
28,14.482793,11591.743777
27,13.965586,11592.072881
26,13.448379,11592.401986
25,12.931172,11592.731091
24,12.413966,11593.060195
23,11.896759,11593.3893
22,11.379552,11593.718404
21,10.862345,11594.047509
20,10.345138,11594.376614


#### Exp_Salaries

In [10]:
sals = pd.read_csv("Exp_Salaries.csv")
X, y = sals.drop('Salary', axis=1), sals['Salary']

In [11]:
ohe = OneHotEncoder(drop='first', sparse_output=False).set_output(transform='pandas')
col_trnf = ColumnTransformer([('OHE',ohe, make_column_selector(dtype_include=object) )],
                             remainder='passthrough',
                             verbose_feature_names_out=False)
col_trnf = col_trnf.set_output(transform='pandas')
X = col_trnf.fit_transform(X)
X_train, X_test, y_train, y_test = train_test_split(X, y, test_size=0.3, random_state=25)

In [12]:
alphas = np.linspace(0.001, 15, 30)
scores = []
for a in alphas:
    lasso = Lasso(alpha=a)
    lasso.fit(X_train, y_train)
    y_pred = lasso.predict(X_test)
    scores.append([a, mean_absolute_error(y_test, y_pred)])
df_scores = pd.DataFrame(scores, columns=['alpha','score'])
df_scores.sort_values('score')

Unnamed: 0,alpha,score
29,15.0,3028.469359
28,14.482793,3028.694647
27,13.965586,3028.919936
26,13.448379,3029.145224
25,12.931172,3029.370512
24,12.413966,3029.5958
23,11.896759,3029.821088
22,11.379552,3030.046376
21,10.862345,3030.271665
20,10.345138,3030.496953
