In [16]:
import pandas as pd
import numpy as np
import matplotlib.pyplot as plt
import seaborn as sb
from sklearn.model_selection import train_test_split
from sklearn.linear_model import LinearRegression
from sklearn.preprocessing import OrdinalEncoder
from sklearn.metrics import r2_score, mean_absolute_error

In [17]:
df = pd.read_csv("house_rent.csv")
df.head()

Unnamed: 0,LocationQuality,Furnishing,FloorLevel,Bedrooms,Size (sqft),MonthlyRent ($)
0,Average,Furnished,Mid,2,2310,13539.8
1,Excellent,Unfurnished,Mid,3,2166,12351.73
2,Good,Unfurnished,Low,1,1134,6903.35
3,Average,Furnished,Low,4,1503,9108.23
4,Poor,Semi-furnished,Low,2,2025,11084.26


In [18]:
df.isnull().sum()

Unnamed: 0,0
LocationQuality,0
Furnishing,0
FloorLevel,0
Bedrooms,0
Size (sqft),0
MonthlyRent ($),0


In [19]:
df.shape

(438, 6)

In [20]:
df.info()

<class 'pandas.core.frame.DataFrame'>
RangeIndex: 438 entries, 0 to 437
Data columns (total 6 columns):
 #   Column           Non-Null Count  Dtype  
---  ------           --------------  -----  
 0   LocationQuality  438 non-null    object 
 1   Furnishing       438 non-null    object 
 2   FloorLevel       438 non-null    object 
 3   Bedrooms         438 non-null    int64  
 4   Size (sqft)      438 non-null    int64  
 5   MonthlyRent ($)  438 non-null    float64
dtypes: float64(1), int64(2), object(3)
memory usage: 20.7+ KB


In [21]:
location_order = ['Poor', 'Average', 'Good', 'Excellent']
furnishing_order = ['Unfurnished', 'Semi-furnished', 'Furnished']
floor_order = ['Low', 'Mid', 'High']

encoder = OrdinalEncoder(categories=[location_order, furnishing_order, floor_order])

# Copy DataFrame to preserve original
df_encoded = df.copy()

# Apply encoding to ordinal features
df_encoded[['LocationQuality', 'Furnishing', 'FloorLevel']] = encoder.fit_transform(
    df[['LocationQuality', 'Furnishing', 'FloorLevel']]
)

# Show encoded data
print(df_encoded.head())

   LocationQuality  Furnishing  FloorLevel  Bedrooms  Size (sqft)  \
0              1.0         2.0         1.0         2         2310   
1              3.0         0.0         1.0         3         2166   
2              2.0         0.0         0.0         1         1134   
3              1.0         2.0         0.0         4         1503   
4              0.0         1.0         0.0         2         2025   

   MonthlyRent ($)  
0         13539.80  
1         12351.73  
2          6903.35  
3          9108.23  
4         11084.26  


In [22]:
x = df_encoded.drop(columns=['MonthlyRent ($)'])
y = df_encoded['MonthlyRent ($)']

In [23]:
x_train, x_test, y_train, y_test = train_test_split(x,y,test_size = 0.2, random_state=42)
print(x_train.shape)
print(x_test.shape)

(350, 5)
(88, 5)


In [24]:
model = LinearRegression()
model.fit(x_train,y_train)

In [25]:
y_pred = model.predict(x_test)

In [26]:
r2 = r2_score(y_test,y_pred)
mae = mean_absolute_error(y_test,y_pred)
print(f"R2 score is {r2}")
print(f"Mean absolute error is {mae}")

R2 score is 0.9913137171847313
Mean absolute error is 246.5372384714481
