In [94]:
# Import Libraries

import pandas as pd
import numpy as np
import pickle
import warnings
warnings.filterwarnings("ignore")

from sklearn.model_selection import train_test_split
from sklearn.preprocessing import StandardScaler, PolynomialFeatures
from sklearn.linear_model import LinearRegression
from sklearn.tree import DecisionTreeRegressor
from sklearn.ensemble import RandomForestRegressor
from sklearn.svm import SVR
from sklearn.metrics import r2_score


In [95]:
# Load Dataset
data = pd.read_csv("Hyderabad_House_Data.csv")

In [96]:
# Dta Understanding
data.head()


Unnamed: 0.1,Unnamed: 0,Bedrooms,Washrooms,Furnishing,Tennants,Area,Price,Locality
0,0,3 BHK Builder Floor,2,Furnished,Bachelors/Family,1800 sqft,34000,"Bhagyalaxmi Nagar, Kavadiguda"
1,1,3 BHK Apartment,2,Semi-Furnished,Family,2500 sqft,45000,"Gachibowli, Outer Ring Road"
2,2,1 BHK Builder Floor,Immediately,Furnished,Bachelors/Family,read more,18000,Gachibowli
3,3,3 BHK Apartment,Immediately,Furnished,Bachelors/Family,2160 sqft,40000,"Moosapet, NH"
4,4,3 BHK Apartment,2,Semi-Furnished,Family,1580 sqft,23000,Raghavendra Colony kondapur


In [97]:
data.tail()


Unnamed: 0.1,Unnamed: 0,Bedrooms,Washrooms,Furnishing,Tennants,Area,Price,Locality
1166,1166,2 BHK Apartment,1,Unfurnished,Family,900 sqft,15000,KPHB Road
1167,1167,1 BHK Apartment,1,Semi-Furnished,Bachelors/Family,East Facing Property,10000,"Miyapur, NH"
1168,1168,3 BHK Apartment,1,Unfurnished,Bachelors,1515 sqft,14000,"Pragathi Nagar, Kukatpally"
1169,1169,3 BHK Apartment,From Nov '19,Family,3,1500 sqft,16000,"Habsiguda, NH"
1170,1170,2 BHK Apartment,2,Semi-Furnished,Bachelors/Family,1100 sqft,12000,"Umas Aadya and Arha Mensions, Kukatpally, NH"


In [98]:
data.shape


(1171, 8)

In [99]:
df.columns


Index(['Bedrooms', 'Washrooms', 'Furnishing', 'Tennants', 'Area', 'Price',
       'Locality'],
      dtype='object')

In [100]:
data.info()


<class 'pandas.core.frame.DataFrame'>
RangeIndex: 1171 entries, 0 to 1170
Data columns (total 8 columns):
 #   Column      Non-Null Count  Dtype 
---  ------      --------------  ----- 
 0   Unnamed: 0  1171 non-null   int64 
 1   Bedrooms    1171 non-null   object
 2   Washrooms   1150 non-null   object
 3   Furnishing  1171 non-null   object
 4   Tennants    1170 non-null   object
 5   Area        1149 non-null   object
 6   Price       1171 non-null   object
 7   Locality    1171 non-null   object
dtypes: int64(1), object(7)
memory usage: 73.3+ KB


In [101]:
data.describe()


Unnamed: 0.1,Unnamed: 0
count,1171.0
mean,585.0
std,338.182889
min,0.0
25%,292.5
50%,585.0
75%,877.5
max,1170.0


In [102]:
data.isnull().sum()


Unnamed: 0     0
Bedrooms       0
Washrooms     21
Furnishing     0
Tennants       1
Area          22
Price          0
Locality       0
dtype: int64

In [103]:
# Remove unwanted column
if "Unnamed: 0" in data.columns:
    data.drop(columns=["Unnamed: 0"], inplace=True)

# Extract numeric values
data["Bedrooms"] = data["Bedrooms"].str.extract(r"(\d+)").astype(float)
data["Washrooms"] = pd.to_numeric(data["Washrooms"], errors="coerce")
data["Area"] = data["Area"].str.extract(r"(\d+)").astype(float)

# Clean Price (remove commas)
data["Price"] = data["Price"].str.replace(",", "")
data["Price"] = data["Price"].astype(float)

data.dropna(inplace=True)

print("After Cleaning:", data.shape)


After Cleaning: (792, 7)


In [104]:
data.isnull().sum()


Bedrooms      0
Washrooms     0
Furnishing    0
Tennants      0
Area          0
Price         0
Locality      0
dtype: int64

In [105]:
categorical_cols = ["Furnishing", "Tennants", "Locality"]

encoders = {}

for col in categorical_cols:
    le = LabelEncoder()
    data[col] = le.fit_transform(data[col])
    encoders[col] = le


In [106]:
X = data.drop("Price", axis=1)
y = data["Price"]


In [107]:
X_train, X_test, y_train, y_test = train_test_split(
    X, y, test_size=0.2, random_state=42
)

print("Training samples:", X_train.shape[0])
print("Testing samples:", X_test.shape[0])


Training samples: 633
Testing samples: 159


In [108]:
model = RandomForestRegressor(n_estimators=300, random_state=42)

model.fit(X_train, y_train)


0,1,2
,n_estimators,300
,criterion,'squared_error'
,max_depth,
,min_samples_split,2
,min_samples_leaf,1
,min_weight_fraction_leaf,0.0
,max_features,1.0
,max_leaf_nodes,
,min_impurity_decrease,0.0
,bootstrap,True


In [110]:
y_pred = model.predict(X_test)

r2 = r2_score(y_test, y_pred)
mae = mean_absolute_error(y_test, y_pred)

print("Model Testing Results")
print("R2 Score:", round(r2*100,2), "%")
print("Mean Absolute Error:", round(mae,2))


Model Testing Results
R2 Score: 67.87 %
Mean Absolute Error: 5072.31


In [111]:
print("Example Test Prediction:")


actual_price = y_test.iloc[0]
predicted_price = y_pred[0]

print("Actual Rent:", actual_price)
print("Predicted Rent:", round(predicted_price,2))


Example Test Prediction:
Actual Rent: 45000.0
Predicted Rent: 33880.0


In [113]:
# Safe locality (first available from dataset)
locality_value = encoders["Locality"].classes_[0]

furnishing = encoders["Furnishing"].transform(["Semi-Furnished"])[0]
tennants = encoders["Tennants"].transform(["Family"])[0]
locality = encoders["Locality"].transform([locality_value])[0]

new_house = np.array([[2, 2, furnishing, tennants, 1200, locality]])

rent_prediction = model.predict(new_house)

print("\nNew House Rent Prediction:")

print("Predicted Monthly Rent: ₹", round(rent_prediction[0],2))



New House Rent Prediction:
Predicted Monthly Rent: ₹ 14361.67


In [114]:
with open("house_rent_model.pkl", "wb") as f:
    pickle.dump((model, encoders), f)

print("Model Saved Successfully!")


Model Saved Successfully!
