In [43]:
# Importing pandas for data manipulation and analysis
import pandas as pd

In [44]:
# Importing numpy for numerical operations
import numpy as np

In [45]:
# Importing model selection utilities:
# - train_test_split to split data into training and testing sets
# - cross_val_score to evaluate models with cross-validation
# - GridSearchCV for hyperparameter tuning using grid search with cross-validation
from sklearn.model_selection import train_test_split, cross_val_score, GridSearchCV

In [46]:
# Importing preprocessing tools:
# - StandardScaler to normalize numerical features
# - OneHotEncoder to convert categorical features into one-hot encoded format
from sklearn.preprocessing import StandardScaler, OneHotEncoder

In [47]:
# ColumnTransformer allows applying different preprocessing steps to different columns
from sklearn.compose import ColumnTransformer

In [48]:
# Pipeline is used to chain preprocessing and modeling steps into one object
from sklearn.pipeline import Pipeline

In [49]:
# Importing regression models:
# - LinearRegression for simple linear modeling
# - Ridge for linear regression with L2 regularization
from sklearn.linear_model import LinearRegression, Ridge

In [50]:
# RandomForestRegressor is an ensemble model that builds multiple decision trees for regression
from sklearn.ensemble import RandomForestRegressor

In [51]:
# Importing performance metrics for regression models:
# - mean_absolute_error: average absolute difference between predicted and actual values
# - r2_score: proportion of variance explained by the model
# - mean_squared_error: average squared difference between predicted and actual values
from sklearn.metrics import mean_absolute_error, r2_score, mean_squared_error

In [52]:
# Importing XGBoost, a powerful gradient boosting library for regression/classification
import xgboost as xgb

In [53]:
# Plotly Express and Graph Objects for interactive visualizations
import plotly.express as px
import plotly.graph_objects as go

In [54]:
# Joblib is used to save and load Python objects (e.g., trained models)
import joblib

In [55]:
# Suppress all warnings to keep the output clean (useful in notebooks or production)
import warnings
warnings.filterwarnings('ignore')

In [56]:
# Step 1: Load and explore the dataset
data = pd.read_csv('/kaggle/input/house-price/house_prices.csv')

In [57]:
data.info()

<class 'pandas.core.frame.DataFrame'>
RangeIndex: 187531 entries, 0 to 187530
Data columns (total 21 columns):
 #   Column             Non-Null Count   Dtype  
---  ------             --------------   -----  
 0   Index              187531 non-null  int64  
 1   Title              187531 non-null  object 
 2   Description        184508 non-null  object 
 3   Amount(in rupees)  187531 non-null  object 
 4   Price (in rupees)  169866 non-null  float64
 5   location           187531 non-null  object 
 6   Carpet Area        106858 non-null  object 
 7   Status             186916 non-null  object 
 8   Floor              180454 non-null  object 
 9   Transaction        187448 non-null  object 
 10  Furnishing         184634 non-null  object 
 11  facing             117298 non-null  object 
 12  overlooking        106095 non-null  object 
 13  Society            77853 non-null   object 
 14  Bathroom           186703 non-null  object 
 15  Balcony            138596 non-null  object 
 16  Ca

In [58]:
pd.set_option("display.max_columns", 50)
data.head(5)

Unnamed: 0,Index,Title,Description,Amount(in rupees),Price (in rupees),location,Carpet Area,Status,Floor,Transaction,Furnishing,facing,overlooking,Society,Bathroom,Balcony,Car Parking,Ownership,Super Area,Dimensions,Plot Area
0,0,1 BHK Ready to Occupy Flat for sale in Srushti...,"Bhiwandi, Thane has an attractive 1 BHK Flat f...",42 Lac,6000.0,thane,500 sqft,Ready to Move,10 out of 11,Resale,Unfurnished,,,Srushti Siddhi Mangal Murti Complex,1,2.0,,,,,
1,1,2 BHK Ready to Occupy Flat for sale in Dosti V...,One can find this stunning 2 BHK flat for sale...,98 Lac,13799.0,thane,473 sqft,Ready to Move,3 out of 22,Resale,Semi-Furnished,East,Garden/Park,Dosti Vihar,2,,1 Open,Freehold,,,
2,2,2 BHK Ready to Occupy Flat for sale in Sunrise...,Up for immediate sale is a 2 BHK apartment in ...,1.40 Cr,17500.0,thane,779 sqft,Ready to Move,10 out of 29,Resale,Unfurnished,East,Garden/Park,Sunrise by Kalpataru,2,,1 Covered,Freehold,,,
3,3,1 BHK Ready to Occupy Flat for sale Kasheli,This beautiful 1 BHK Flat is available for sal...,25 Lac,,thane,530 sqft,Ready to Move,1 out of 3,Resale,Unfurnished,,,,1,1.0,,,,,
4,4,2 BHK Ready to Occupy Flat for sale in TenX Ha...,"This lovely 2 BHK Flat in Pokhran Road, Thane ...",1.60 Cr,18824.0,thane,635 sqft,Ready to Move,20 out of 42,Resale,Unfurnished,West,"Garden/Park, Main Road",TenX Habitat Raymond Realty,2,,1 Covered,Co-operative Society,,,


In [59]:
data.describe()

Unnamed: 0,Index,Price (in rupees),Dimensions,Plot Area
count,187531.0,169866.0,0.0,0.0
mean,93765.0,7583.772,,
std,54135.681003,27241.71,,
min,0.0,0.0,,
25%,46882.5,4297.0,,
50%,93765.0,6034.0,,
75%,140647.5,9450.0,,
max,187530.0,6700000.0,,


In [60]:
data.isnull().sum()

Index                     0
Title                     0
Description            3023
Amount(in rupees)         0
Price (in rupees)     17665
location                  0
Carpet Area           80673
Status                  615
Floor                  7077
Transaction              83
Furnishing             2897
facing                70233
overlooking           81436
Society              109678
Bathroom                828
Balcony               48935
Car Parking          103357
Ownership             65517
Super Area           107685
Dimensions           187531
Plot Area            187531
dtype: int64

In [61]:
data.shape

(187531, 21)

In [62]:
data = data.drop(columns=['Plot Area', 'Dimensions'])

In [63]:
data.shape

(187531, 19)

In [64]:
# Drop columns with too many missing values (>50% missing) and not useful Columns
data = data.drop(columns=['Society', 'Description', 'Index','Title','Car Parking', 'Super Area']) 

In [65]:
data.shape

(187531, 13)

In [66]:
data.head(5)

Unnamed: 0,Amount(in rupees),Price (in rupees),location,Carpet Area,Status,Floor,Transaction,Furnishing,facing,overlooking,Bathroom,Balcony,Ownership
0,42 Lac,6000.0,thane,500 sqft,Ready to Move,10 out of 11,Resale,Unfurnished,,,1,2.0,
1,98 Lac,13799.0,thane,473 sqft,Ready to Move,3 out of 22,Resale,Semi-Furnished,East,Garden/Park,2,,Freehold
2,1.40 Cr,17500.0,thane,779 sqft,Ready to Move,10 out of 29,Resale,Unfurnished,East,Garden/Park,2,,Freehold
3,25 Lac,,thane,530 sqft,Ready to Move,1 out of 3,Resale,Unfurnished,,,1,1.0,
4,1.60 Cr,18824.0,thane,635 sqft,Ready to Move,20 out of 42,Resale,Unfurnished,West,"Garden/Park, Main Road",2,,Co-operative Society
