In [31]:
import pandas as pd
from sklearn.model_selection import train_test_split

In [32]:
data = pd.read_csv("C:\\boston.csv")

In [33]:
train_data, test_data = train_test_split(data, test_size=0.3, random_state=42)
X_train = train_data.drop(columns=['MEDV']) 
y_train = train_data['MEDV']
X_test = test_data.drop(columns=['MEDV'])
y_test = test_data['MEDV']

In [34]:
from sklearn.preprocessing import StandardScaler
scaler = StandardScaler()
X_train_scaled = scaler.fit_transform(X_train)
X_test_scaled = scaler.transform(X_test)

In [35]:
from sklearn.linear_model import LinearRegression
model = LinearRegression()
model.fit(X_train_scaled, y_train)
train_pred_reg = model.predict(X_train_scaled)
test_pred_reg = model.predict(X_test_scaled)

In [36]:
from sklearn.metrics import mean_squared_error
rmse_train_reg = mean_squared_error(y_train,train_pred_reg, squared=False)
rmse_validation_reg = mean_squared_error(y_test,test_pred_reg, squared=False)
print("RMSE for training data Reg:",rmse_train_reg)
print("RMSE for validation data Reg:",rmse_validation_reg)

RMSE for training data Reg: 4.748208239685937
RMSE for validation data Reg: 4.638689926172821


In [37]:
from sklearn.linear_model import Lasso
from sklearn.model_selection import GridSearchCV
lasso = Lasso()
parameters = {'alpha': [0.001, 0.01, 0.1, 1, 10, 100]}
grid_search = GridSearchCV(lasso, parameters, cv=5)
grid_search.fit(X_train_scaled, y_train)
best_lambda_lasso = grid_search.best_params_['alpha']
lasso_best = Lasso(alpha=best_lambda_lasso)
lasso_best.fit(X_train_scaled, y_train)
train_pred_lasso = lasso_best.predict(X_train_scaled)
test_pred_lasso = lasso_best.predict(X_test_scaled)
rmse_train_lasso = mean_squared_error(y_train, train_pred_lasso, squared=False)
rmse_validation_lasso= mean_squared_error(y_test, test_pred_lasso, squared=False)
print("best lambda lasso:", best_lambda_lasso)
print("RMSE for training data Lasso:",rmse_train_lasso)
print("RMSE for validation data Lasso:",rmse_validation_lasso)

best lambda lasso: 0.001
RMSE for training data Lasso: 4.748215906554991
RMSE for validation data Lasso: 4.639128228287351


In [38]:
from sklearn.linear_model import Ridge
ridge = Ridge()
parameters = {'alpha': [0.001, 0.01, 0.1, 1, 10, 100]}
grid_search = GridSearchCV(ridge, parameters, cv=5)
grid_search.fit(X_train_scaled, y_train)
best_lambda_ridge = grid_search.best_params_['alpha']
ridge_best = Ridge(alpha=best_lambda_ridge)
ridge_best.fit(X_train_scaled, y_train)
train_pred_ridge = ridge_best.predict(X_train_scaled)
test_pred_ridge = ridge_best.predict(X_test_scaled)
rmse_train_ridge = mean_squared_error(y_train, train_pred_ridge)
rmse_validation_ridge = mean_squared_error(y_test, test_pred_ridge)
print("best lambda:",best_lambda_ridge)
print("RMSE for training data Ridge:",rmse_train_ridge)
print("RMSE for validation data Ridge:",rmse_validation_ridge)

best lambda: 10
RMSE for training data Ridge: 22.645075682504864
RMSE for validation data Ridge: 21.811243071491567


In [40]:
results = pd.DataFrame({
    'Model': ['Simple Linear Regression', 'Lasso Regression', 'Ridge Regression'],
    'RMSE Training': [rmse_train_reg, rmse_train_lasso, rmse_train_ridge],
    'RMSE Test': [rmse_validation_reg, rmse_validation_lasso, rmse_validation_ridge]
})
print(results)

                      Model  RMSE Training  RMSE Test
0  Simple Linear Regression       4.748208   4.638690
1          Lasso Regression       4.748216   4.639128
2          Ridge Regression      22.645076  21.811243
