In [1]:
import numpy as np
import pandas as pd
import matplotlib.pyplot as plt
import seaborn as sns
### Loading the Dataset
data = pd.read_csv("House_Rent_Dataset.csv")

In [2]:
### Pre-processing
data = data[['Size', 'Area Type', 'Furnishing Status', 'City','Rent']]

In [3]:
data

Unnamed: 0,Size,Area Type,Furnishing Status,City,Rent
0,1100,Super Area,Unfurnished,Kolkata,10000
1,800,Super Area,Semi-Furnished,Kolkata,20000
2,1000,Super Area,Semi-Furnished,Kolkata,17000
3,800,Super Area,Unfurnished,Kolkata,10000
4,850,Carpet Area,Unfurnished,Kolkata,7500
...,...,...,...,...,...
4741,1000,Carpet Area,Semi-Furnished,Hyderabad,15000
4742,2000,Super Area,Semi-Furnished,Hyderabad,29000
4743,1750,Carpet Area,Semi-Furnished,Hyderabad,35000
4744,1500,Carpet Area,Semi-Furnished,Hyderabad,45000


In [4]:
def one_hot_encode(data, column):
    encoded = pd.get_dummies(data[column], drop_first= True)
    data = data.drop(column, axis = 1)
    data = data.join(encoded)
    return data
data = one_hot_encode(data, 'Furnishing Status')
data = one_hot_encode(data, 'Area Type')
data = one_hot_encode(data, 'City')

In [5]:
data

Unnamed: 0,Size,Rent,Semi-Furnished,Unfurnished,Carpet Area,Super Area,Chennai,Delhi,Hyderabad,Kolkata,Mumbai
0,1100,10000,0,1,0,1,0,0,0,1,0
1,800,20000,1,0,0,1,0,0,0,1,0
2,1000,17000,1,0,0,1,0,0,0,1,0
3,800,10000,0,1,0,1,0,0,0,1,0
4,850,7500,0,1,1,0,0,0,0,1,0
...,...,...,...,...,...,...,...,...,...,...,...
4741,1000,15000,1,0,1,0,0,0,1,0,0
4742,2000,29000,1,0,0,1,0,0,1,0,0
4743,1750,35000,1,0,1,0,0,0,1,0,0
4744,1500,45000,1,0,1,0,0,0,1,0,0


In [6]:
## Remove Rent from the X axis and put into the Y-axis
X = data.drop('Rent', axis= 1)
y = data['Rent']

In [7]:
##Split the dataset into training and test data
from sklearn.model_selection import train_test_split
X_train, X_test, y_train, y_test = train_test_split(X, y, test_size= 0.2, random_state = 42)

In [8]:
## Standardize the Data
from sklearn.preprocessing import StandardScaler
sc = StandardScaler()
X_train = sc.fit_transform(X_train)
X_test = sc.transform(X_test)


In [9]:
## Fitting via gradient descent
from sklearn import linear_model
model = linear_model.LinearRegression()
model.fit(X_train, y_train)
model.coef_

array([33690.08774849, -4123.93319747, -4165.29811375,  -247.54933627,
       -3926.88766155, -2347.04721331,  4592.76948669, -5920.23808736,
       -1207.04009356, 24133.11329198])

In [10]:
### Quantitative Evaluation
y_preds = model.predict(X_test)
from sklearn.metrics import mean_squared_error, r2_score
# The coefficients
print("Coefficients: \n", model.coef_)
# The mean squared error
print("Mean squared error: %.2f" % mean_squared_error(y_test, y_preds))
# The coefficient of determination: 1 is perfect prediction
print("Coefficient of determination: %.2f" % r2_score(y_test, y_preds))

Coefficients: 
 [33690.08774849 -4123.93319747 -4165.29811375  -247.54933627
 -3926.88766155 -2347.04721331  4592.76948669 -5920.23808736
 -1207.04009356 24133.11329198]
Mean squared error: 1899976829.91
Coefficient of determination: 0.52


In [11]:
### Qualitative Evaluation
sample_data = X.iloc[0]
sample_data
sample_data_standardized = sc.transform(X.iloc[0].values.reshape(1,-1))
model_rent_forecast = model.predict(sample_data_standardized)[0]
model_rent_forecast
y.iloc[0]



10000

In [13]:
data.head(1)

Unnamed: 0,Size,Rent,Semi-Furnished,Unfurnished,Carpet Area,Super Area,Chennai,Delhi,Hyderabad,Kolkata,Mumbai
0,1100,10000,0,1,0,1,0,0,0,1,0


In [14]:
model_rent_forecast

23726.04067963545

In [15]:
y.iloc[0]


10000