In [1]:
import pandas as pd
from sklearn.model_selection import train_test_split, GridSearchCV
from sklearn.preprocessing import StandardScaler
from sklearn.linear_model import Ridge, Lasso
from sklearn.metrics import mean_squared_error, r2_score
 
df = pd.read_csv("House Price India.csv" )
df

Unnamed: 0,id,Date,number of bedrooms,number of bathrooms,living area,lot area,number of floors,waterfront present,number of views,condition of the house,...,Built Year,Renovation Year,Postal Code,Lattitude,Longitude,living_area_renov,lot_area_renov,Number of schools nearby,Distance from the airport,Price
0,6762810635,42491,4,2.50,2920,4000,1.5,0,0,5,...,1909,0,122004,52.8878,-114.470,2470,4000,2,51,1400000
1,6762810998,42491,5,2.75,2910,9480,1.5,0,0,3,...,1939,0,122004,52.8852,-114.468,2940,6600,1,53,1200000
2,6762812605,42491,4,2.50,3310,42998,2.0,0,0,3,...,2001,0,122005,52.9532,-114.321,3350,42847,3,76,838000
3,6762812919,42491,3,2.00,2710,4500,1.5,0,0,4,...,1929,0,122006,52.9047,-114.485,2060,4500,1,51,805000
4,6762813105,42491,3,2.50,2600,4750,1.0,0,0,4,...,1951,0,122007,52.9133,-114.590,2380,4750,1,67,790000
...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...
14614,6762830250,42734,2,1.50,1556,20000,1.0,0,0,4,...,1957,0,122066,52.6191,-114.472,2250,17286,3,76,221700
14615,6762830339,42734,3,2.00,1680,7000,1.5,0,0,4,...,1968,0,122072,52.5075,-114.393,1540,7480,3,59,219200
14616,6762830618,42734,2,1.00,1070,6120,1.0,0,0,3,...,1962,0,122056,52.7289,-114.507,1130,6120,2,64,209000
14617,6762830709,42734,4,1.00,1030,6621,1.0,0,0,4,...,1955,0,122042,52.7157,-114.411,1420,6631,3,54,205000


In [3]:
df_clean = df.drop(columns=["id", "Date", "Postal Code"], errors="ignore")

X = df_clean.drop(columns=["Price"])
y = df_clean["Price"]

X_train, X_test, y_train, y_test = train_test_split(X, y, test_size=0.2, random_state=42)

scaler = StandardScaler()
X_train_scaled = scaler.fit_transform(X_train)
X_test_scaled = scaler.transform(X_test)


In [27]:
alpha_values = [0.01, 0.1, 1, 10, 100]

ridge = Ridge()
ridge_cv = GridSearchCV(ridge, param_grid={"alpha": alpha_values}, scoring="r2", cv=5)
ridge_cv.fit(X_train_scaled, y_train)
best_alpha_ridge = ridge_cv.best_params_["alpha"]

In [15]:
lasso = Lasso(max_iter=50000, tol=0.01) 
lasso_cv = GridSearchCV(lasso, param_grid={"alpha": [1, 10, 100, 500, 1000]}, scoring="r2", cv=5)
lasso_cv.fit(X_train_scaled, y_train)


In [17]:
ridge_best = Ridge(alpha=best_alpha_ridge).fit(X_train_scaled, y_train)
lasso_best = Lasso(alpha=best_alpha_lasso, max_iter=5000).fit(X_train_scaled, y_train)


In [23]:
y_pred_ridge = ridge_best.predict(X_test_scaled)
y_pred_lasso = lasso_best.predict(X_test_scaled)

ridge_mse = mean_squared_error(y_test, y_pred_ridge)
ridge_r2 = r2_score(y_test, y_pred_ridge)

lasso_mse = mean_squared_error(y_test, y_pred_lasso)
lasso_r2 = r2_score(y_test, y_pred_lasso)

In [29]:
print(f"Best Ridge Alpha: {best_alpha_ridge}")
print(f"Ridge Regression - MSE: {ridge_mse:.2f}, R² Score: {ridge_r2:.4f}")

print(f"\nBest Lasso Alpha: {best_alpha_lasso}")
print(f"Lasso Regression - MSE: {lasso_mse:.2f}, R² Score: {lasso_r2:.4f}")

Best Ridge Alpha: 100
Ridge Regression - MSE: 44207590353.57, R² Score: 0.7013

Best Lasso Alpha: 100
Lasso Regression - MSE: 44173416672.93, R² Score: 0.7015
