In [1]:
import pandas as pd
import numpy as np

from sklearn.model_selection import train_test_split
from sklearn.preprocessing import LabelEncoder
from sklearn.linear_model import LinearRegression
from sklearn.tree import DecisionTreeRegressor
from sklearn.metrics import mean_absolute_error, r2_score

In [2]:
df = pd.read_csv("pune_rent_dataset.csv")
df.head()

Unnamed: 0,Locality,BHK,Area_sqft,Furnishing,Floor_Number,Total_Building_Floors,Age_Years,Balconies,Bathrooms,Parking,Lift,Availability,Tenant_Type,Security_Deposit,Rent_INR
0,Akurdi,1 BHK,318.0,Semi-Furnished,13.0,15.0,15.0,0.0,1.0,1.0,Yes,Immediate,Bachelor,25875.0,8625.0
1,Akurdi,2 BHK,757.0,Semi-Furnished,1.0,3.0,20.0,1.0,2.0,1.0,No,Immediate,Family,121575.0,40525.0
2,Akurdi,1 BHK,510.0,Fully Furnished,14.0,15.0,10.0,0.0,1.0,1.0,Yes,After 1 Month,Family,52444.0,26222.0
3,Akurdi,1 BHK,414.0,Semi-Furnished,1.0,3.0,3.0,0.0,1.0,1.0,No,Immediate,Family,51705.0,17235.0
4,Akurdi,2 BHK,954.0,Semi-Furnished,7.0,12.0,7.0,1.0,1.0,2.0,Yes,Not Sure,Family,58014.0,58014.0


In [3]:
# Remove missing values
df.dropna(inplace=True)

In [4]:
encoder = LabelEncoder()

categorical_cols = df.select_dtypes(include='object').columns

for col in categorical_cols:
    df[col] = encoder.fit_transform(df[col])

In [5]:
X = df.drop("Rent_INR", axis=1)
y = df["Rent_INR"]

In [6]:
X_train, X_test, y_train, y_test = train_test_split(
    X, y, test_size=0.2, random_state=42
)

In [7]:
lr = LinearRegression()
lr.fit(X_train, y_train)

y_pred_lr = lr.predict(X_test)

print("Linear Regression MAE:", mean_absolute_error(y_test, y_pred_lr))
print("Linear Regression R2 Score:", r2_score(y_test, y_pred_lr))

Linear Regression MAE: 6241.640800670041
Linear Regression R2 Score: 0.8293490452251768


In [8]:
dt = DecisionTreeRegressor(random_state=42)
dt.fit(X_train, y_train)

y_pred_dt = dt.predict(X_test)

print("Decision Tree MAE:", mean_absolute_error(y_test, y_pred_dt))
print("Decision Tree R2 Score:", r2_score(y_test, y_pred_dt))

Decision Tree MAE: 6286.316417910448
Decision Tree R2 Score: 0.691970228747548


In [9]:
sample_house = X.iloc[0].values.reshape(1, -1)

predicted_rent = dt.predict(sample_house)
print("Predicted Rent (INR):", predicted_rent[0])

Predicted Rent (INR): 8822.0


