<a href="https://colab.research.google.com/github/Yashb254/AI-Beginner-proj./blob/main/housing_price_prediction_linear.ipynb" target="_parent"><img src="https://colab.research.google.com/assets/colab-badge.svg" alt="Open In Colab"/></a>

# 🏡 Housing Price Prediction using Linear Regression

In [None]:
# Import required libraries
import pandas as pd
import numpy as np
from sklearn.model_selection import train_test_split
from sklearn.linear_model import LinearRegression
from sklearn.ensemble import RandomForestRegressor
from sklearn.pipeline import Pipeline
from sklearn.compose import ColumnTransformer
from sklearn.preprocessing import OneHotEncoder, StandardScaler
from sklearn.metrics import mean_squared_error, r2_score
import joblib

In [None]:
from google.colab import drive
drive.mount('/content/drive')

## 📥 Load Dataset

In [None]:
# Load dataset
df = pd.read_csv("nagpur_housing_dataset_5000.csv")
df.head()

Unnamed: 0,Area (sqft),Bedrooms,Bathrooms,BHK,Age,Location,Furnishing,Parking,price
0,3674,4,1,4,2,Trimurti Nagar,Semi-Furnished,Yes,281.94
1,1360,3,2,3,29,Wardhaman Nagar,Semi-Furnished,Yes,118.82
2,1794,2,3,3,4,Civil Lines,Furnished,Yes,215.52
3,1630,3,2,2,8,Trimurti Nagar,Unfurnished,Yes,148.8
4,1595,2,2,2,27,Dharampeth,Unfurnished,Yes,167.04


## 🛠️ Data Preprocessing

In [None]:
# Clean column names
df.columns = df.columns.str.strip().str.replace(' ', '_').str.lower()

# Define feature matrix and target variable
X = df.drop('price', axis=1)
y = df['price']

# Define numeric and categorical features
numeric_features = ['area_(sqft)', 'bedrooms', 'bathrooms', 'bhk', 'age']
categorical_features = ['location', 'furnishing', 'parking']

# Column transformer for preprocessing
preprocessor = ColumnTransformer(
    transformers=[
        ('num', StandardScaler(), numeric_features),
        ('cat', OneHotEncoder(handle_unknown='ignore'), categorical_features)
    ]
)

## 🔍 Train Linear Regression Model

In [None]:
# Train-test split
X_train, X_test, y_train, y_test = train_test_split(X, y, test_size=0.2, random_state=42)

# Create the pipeline
model = Pipeline(steps=[
    ('preprocessor', preprocessor),
    ('regressor', RandomForestRegressor())
])

# Fit the model
model.fit(X_train, y_train)

## 📊 Evaluate the Model

In [None]:
from sklearn.metrics import mean_squared_error, r2_score
import numpy as np

# Calculate RMSE
mse = mean_squared_error(y_test, y_pred)
rmse = np.sqrt(mse)

# Calculate R² Score
r2 = r2_score(y_test, y_pred)

# Print both
print("RMSE:", rmse)
print("R² Score:", r2)


RMSE: 7.474562870659393
R² Score: 0.9885451007981704


## 💾 Save the Model

In [None]:
# Save model to a .pkl file
joblib.dump(model, "linear_regressor_model2.pkl")

['linear_regressor_model2.pkl']

## 📥 Download the Model

In [None]:
# Download code for Colab or Jupyter with IPython
from IPython.display import FileLink
FileLink(r'linear_regressor_model.pkl')

✅ You're now ready to make predictions with your saved model!