# HOUSE RENT PREDICTION


### IMPORT LIBRARIES

In [None]:
import pandas as pd
import numpy as np
import matplotlib.pyplot as plt
import seaborn as sns

#### READ THE DATA

In [113]:
data = pd.read_csv("Hyderabad_House_Data.csv")

In [114]:
data.columns

Index(['Unnamed: 0', 'Bedrooms', 'Washrooms', 'Furnishing', 'Tennants', 'Area',
       'Price', 'Locality'],
      dtype='object')

In [115]:
data.head()

Unnamed: 0.1,Unnamed: 0,Bedrooms,Washrooms,Furnishing,Tennants,Area,Price,Locality
0,0,3 BHK Builder Floor,2,Furnished,Bachelors/Family,1800 sqft,34000,"Bhagyalaxmi Nagar, Kavadiguda"
1,1,3 BHK Apartment,2,Semi-Furnished,Family,2500 sqft,45000,"Gachibowli, Outer Ring Road"
2,2,1 BHK Builder Floor,Immediately,Furnished,Bachelors/Family,read more,18000,Gachibowli
3,3,3 BHK Apartment,Immediately,Furnished,Bachelors/Family,2160 sqft,40000,"Moosapet, NH"
4,4,3 BHK Apartment,2,Semi-Furnished,Family,1580 sqft,23000,Raghavendra Colony kondapur


In [116]:
data.tail()

Unnamed: 0.1,Unnamed: 0,Bedrooms,Washrooms,Furnishing,Tennants,Area,Price,Locality
1166,1166,2 BHK Apartment,1,Unfurnished,Family,900 sqft,15000,KPHB Road
1167,1167,1 BHK Apartment,1,Semi-Furnished,Bachelors/Family,East Facing Property,10000,"Miyapur, NH"
1168,1168,3 BHK Apartment,1,Unfurnished,Bachelors,1515 sqft,14000,"Pragathi Nagar, Kukatpally"
1169,1169,3 BHK Apartment,From Nov '19,Family,3,1500 sqft,16000,"Habsiguda, NH"
1170,1170,2 BHK Apartment,2,Semi-Furnished,Bachelors/Family,1100 sqft,12000,"Umas Aadya and Arha Mensions, Kukatpally, NH"


##### SHAPE WILL SHOW THE HOW MANY ROWS & COLUMNS 

In [None]:
data.shape

(1171, 8)

In [118]:
data.describe()

Unnamed: 0.1,Unnamed: 0
count,1171.0
mean,585.0
std,338.182889
min,0.0
25%,292.5
50%,585.0
75%,877.5
max,1170.0


In [143]:
data.info()

<class 'pandas.core.frame.DataFrame'>
RangeIndex: 1171 entries, 0 to 1170
Data columns (total 8 columns):
 #   Column      Non-Null Count  Dtype  
---  ------      --------------  -----  
 0   Unnamed: 0  1171 non-null   int64  
 1   Bedrooms    1171 non-null   object 
 2   Washrooms   1171 non-null   float64
 3   Furnishing  1171 non-null   object 
 4   Tennants    1171 non-null   float64
 5   Area        1171 non-null   float64
 6   Price       1171 non-null   float64
 7   Locality    1171 non-null   object 
dtypes: float64(4), int64(1), object(3)
memory usage: 73.3+ KB


### RETURNS

In [119]:
data.isnull().sum()

Unnamed: 0     0
Bedrooms       0
Washrooms     21
Furnishing     0
Tennants       1
Area          22
Price          0
Locality       0
dtype: int64

In [120]:
data.head()

Unnamed: 0.1,Unnamed: 0,Bedrooms,Washrooms,Furnishing,Tennants,Area,Price,Locality
0,0,3 BHK Builder Floor,2,Furnished,Bachelors/Family,1800 sqft,34000,"Bhagyalaxmi Nagar, Kavadiguda"
1,1,3 BHK Apartment,2,Semi-Furnished,Family,2500 sqft,45000,"Gachibowli, Outer Ring Road"
2,2,1 BHK Builder Floor,Immediately,Furnished,Bachelors/Family,read more,18000,Gachibowli
3,3,3 BHK Apartment,Immediately,Furnished,Bachelors/Family,2160 sqft,40000,"Moosapet, NH"
4,4,3 BHK Apartment,2,Semi-Furnished,Family,1580 sqft,23000,Raghavendra Colony kondapur


In [121]:
data.tail()

Unnamed: 0.1,Unnamed: 0,Bedrooms,Washrooms,Furnishing,Tennants,Area,Price,Locality
1166,1166,2 BHK Apartment,1,Unfurnished,Family,900 sqft,15000,KPHB Road
1167,1167,1 BHK Apartment,1,Semi-Furnished,Bachelors/Family,East Facing Property,10000,"Miyapur, NH"
1168,1168,3 BHK Apartment,1,Unfurnished,Bachelors,1515 sqft,14000,"Pragathi Nagar, Kukatpally"
1169,1169,3 BHK Apartment,From Nov '19,Family,3,1500 sqft,16000,"Habsiguda, NH"
1170,1170,2 BHK Apartment,2,Semi-Furnished,Bachelors/Family,1100 sqft,12000,"Umas Aadya and Arha Mensions, Kukatpally, NH"


##### IT WILL REMOVE THE NULL VALUES IN WASHROOMS ROW

In [122]:
data['Washrooms'] = pd.to_numeric(data['Washrooms'],errors='coerce')

In [123]:
data["Washrooms"] = data['Washrooms'].fillna(data['Washrooms'].median())

In [124]:
data.isnull().sum()

Unnamed: 0     0
Bedrooms       0
Washrooms      0
Furnishing     0
Tennants       1
Area          22
Price          0
Locality       0
dtype: int64

In [125]:
data['Tennants'] = pd.to_numeric(data['Tennants'],errors='coerce')

In [126]:
data["Tennants"] = data['Tennants'].fillna(data['Tennants'].median())

In [127]:
data.isnull().sum()

Unnamed: 0     0
Bedrooms       0
Washrooms      0
Furnishing     0
Tennants       0
Area          22
Price          0
Locality       0
dtype: int64

In [128]:
data['Area'] = data['Area'].astype(str).str.extract('(\d+)')

In [129]:
data['Area'] = pd.to_numeric(data['Area'],errors='coerce')

In [130]:
data["Area"] = data['Area'].fillna(data['Area'].median())

In [131]:
data.isnull().sum()

Unnamed: 0    0
Bedrooms      0
Washrooms     0
Furnishing    0
Tennants      0
Area          0
Price         0
Locality      0
dtype: int64

In [132]:
data['Price'] = (
    data['Price']
    .astype(str)
    .str.replace(r'[^\d]', '', regex=True)
)

In [133]:
data['Price'] = data['Price'].astype(float)

In [134]:
data['Price'].dtype

dtype('float64')

In [135]:
data['Price'].apply(type).value_counts()

Price
<class 'float'>    1171
Name: count, dtype: int64

In [None]:
x = data.drop('Price', axis=1)
y = data['Price']

In [137]:
x = pd.get_dummies(x, drop_first=True)

In [138]:
from sklearn.preprocessing import StandardScaler
scaler = StandardScaler()
x_scaled = scaler.fit_transform(x)


In [139]:
from sklearn.model_selection import train_test_split
x_train, x_test, y_train, y_test = train_test_split(x_scaled, y, test_size=0.2, random_state=42)

In [140]:
from sklearn.linear_model import LinearRegression
model = LinearRegression()
model.fit(x_train, y_train)

In [141]:
print(x_train.dtype)
print(y_train.dtype)

float64
float64


In [142]:
mae = mean_absolute_error(y_test, y_pred)
mse = mean_squared_error(y_test, y_pred)
rmse = np.sqrt(mse)
r2 = r2_score(y_test, y_pred)

print("MAE :", mae)
print("MSE :", mse)
print("RMSE :", rmse)
print("R2 Score :", r2)

NameError: name 'mean_absolute_error' is not defined

In [None]:
new_house = pd.DataFrame({
    'Area' : [1400],
    'Bedrooms' : [6],
    'Washrooms' : [3]
})

new_house = pd.get_dummies(new_house)
new_house = new_house.reindex(columns=x.columns, fill_value=0)

new_scaled = scaler.transform(new_house)

predicted_price = model.predict(new_scaled)
print("predicted House Price:",predicted_price[0])

predicted House Price: 22059.542898483334


In [None]:
plt.scatter(y_test, y_pred)
plt.xlabel("actual Price")
plt.ylabel('Predicted Price')
plt.title('Actual vs Predicted House Prices')
plt.show()

NameError: name 'y_pred' is not defined