In [1]:
import pandas as pd
from sklearn.preprocessing import MinMaxScaler
from sklearn.model_selection import train_test_split
from sklearn.linear_model import LinearRegression
from sklearn.metrics import mean_absolute_error, mean_squared_error, r2_score



### Import dataset

In [2]:
df = pd.read_csv('Housing.csv')

In [3]:
df.head()

Unnamed: 0,price,area,bedrooms,bathrooms,stories,mainroad,guestroom,basement,hotwaterheating,airconditioning,parking,prefarea,furnishingstatus
0,13300000,7420,4,2,3,yes,no,no,no,yes,2,yes,furnished
1,12250000,8960,4,4,4,yes,no,no,no,yes,3,no,furnished
2,12250000,9960,3,2,2,yes,no,yes,no,no,2,yes,semi-furnished
3,12215000,7500,4,2,2,yes,no,yes,no,yes,3,yes,furnished
4,11410000,7420,4,1,2,yes,yes,yes,no,yes,2,no,furnished


In [4]:
# check null values
df.isnull().sum()

price               0
area                0
bedrooms            0
bathrooms           0
stories             0
mainroad            0
guestroom           0
basement            0
hotwaterheating     0
airconditioning     0
parking             0
prefarea            0
furnishingstatus    0
dtype: int64

In [5]:
#check datatypes
print(df.dtypes)

price                int64
area                 int64
bedrooms             int64
bathrooms            int64
stories              int64
mainroad            object
guestroom           object
basement            object
hotwaterheating     object
airconditioning     object
parking              int64
prefarea            object
furnishingstatus    object
dtype: object


In [6]:
#rplace categorical values with integers

In [7]:
df['mainroad'].unique()

array(['yes', 'no'], dtype=object)

In [8]:
df['mainroad'].replace({'yes': 1, 'no': 0}, inplace=True)

In [9]:
df['guestroom'].unique()

array(['no', 'yes'], dtype=object)

In [10]:
df['guestroom'].replace({'yes': 1, 'no': 0}, inplace=True)

In [11]:
df['basement'].unique()

array(['no', 'yes'], dtype=object)

In [12]:
df['basement'].replace({'yes': 1, 'no': 0}, inplace=True)

In [13]:
df['hotwaterheating'].unique()

array(['no', 'yes'], dtype=object)

In [14]:
df['hotwaterheating'].replace({'yes': 1, 'no': 0}, inplace=True)

In [15]:
df['airconditioning'].unique()

array(['yes', 'no'], dtype=object)

In [16]:
df['airconditioning'].replace({'yes': 1, 'no': 0}, inplace=True)

In [17]:
df['prefarea'].unique()

array(['yes', 'no'], dtype=object)

In [18]:
df['prefarea'].replace({'yes': 1, 'no': 0}, inplace=True)

In [19]:
df['furnishingstatus'].unique()

array(['furnished', 'semi-furnished', 'unfurnished'], dtype=object)

In [20]:
df.head()

Unnamed: 0,price,area,bedrooms,bathrooms,stories,mainroad,guestroom,basement,hotwaterheating,airconditioning,parking,prefarea,furnishingstatus
0,13300000,7420,4,2,3,1,0,0,0,1,2,1,furnished
1,12250000,8960,4,4,4,1,0,0,0,1,3,0,furnished
2,12250000,9960,3,2,2,1,0,1,0,0,2,1,semi-furnished
3,12215000,7500,4,2,2,1,0,1,0,1,3,1,furnished
4,11410000,7420,4,1,2,1,1,1,0,1,2,0,furnished


In [21]:
dummy_col = pd.get_dummies(df['furnishingstatus'])
dummy_col.head()

Unnamed: 0,furnished,semi-furnished,unfurnished
0,1,0,0
1,1,0,0
2,0,1,0
3,1,0,0
4,1,0,0


In [22]:
dummy_col = pd.get_dummies(df['furnishingstatus'], drop_first=True)
dummy_col.head()

Unnamed: 0,semi-furnished,unfurnished
0,0,0
1,0,0
2,1,0
3,0,0
4,0,0


In [23]:
df = pd.concat([df, dummy_col], axis=1)
df.head()

Unnamed: 0,price,area,bedrooms,bathrooms,stories,mainroad,guestroom,basement,hotwaterheating,airconditioning,parking,prefarea,furnishingstatus,semi-furnished,unfurnished
0,13300000,7420,4,2,3,1,0,0,0,1,2,1,furnished,0,0
1,12250000,8960,4,4,4,1,0,0,0,1,3,0,furnished,0,0
2,12250000,9960,3,2,2,1,0,1,0,0,2,1,semi-furnished,1,0
3,12215000,7500,4,2,2,1,0,1,0,1,3,1,furnished,0,0
4,11410000,7420,4,1,2,1,1,1,0,1,2,0,furnished,0,0


In [24]:
df.drop(['furnishingstatus'], axis=1, inplace=True)

In [25]:
df.head()

Unnamed: 0,price,area,bedrooms,bathrooms,stories,mainroad,guestroom,basement,hotwaterheating,airconditioning,parking,prefarea,semi-furnished,unfurnished
0,13300000,7420,4,2,3,1,0,0,0,1,2,1,0,0
1,12250000,8960,4,4,4,1,0,0,0,1,3,0,0,0
2,12250000,9960,3,2,2,1,0,1,0,0,2,1,1,0
3,12215000,7500,4,2,2,1,0,1,0,1,3,1,0,0
4,11410000,7420,4,1,2,1,1,1,0,1,2,0,0,0


In [26]:
print(df.dtypes)

price              int64
area               int64
bedrooms           int64
bathrooms          int64
stories            int64
mainroad           int64
guestroom          int64
basement           int64
hotwaterheating    int64
airconditioning    int64
parking            int64
prefarea           int64
semi-furnished     uint8
unfurnished        uint8
dtype: object


In [27]:
#scalling 
scaler = MinMaxScaler()
col_to_scale = ['area', 'bedrooms', 'bathrooms', 'stories', 'parking', 'price']
df[col_to_scale] = scaler.fit_transform(df[col_to_scale])

### Split the Dataset into Trinig and Testing 

In [28]:
X = df.drop('price', axis = 1)
y = df['price']

In [29]:
X_train, X_test, y_train, y_test = train_test_split(X, y, test_size=0.2, random_state=32)

### Train the Model

In [30]:
model = LinearRegression()

In [31]:
model.fit(X_train, y_train)

LinearRegression()

In [32]:
predictions = model.predict(X_test)

In [33]:
results = pd.DataFrame({'Actual':y_test, 'Predicted':predictions})
results.head()

Unnamed: 0,Actual,Predicted
443,0.127273,0.073672
128,0.35697,0.356383
79,0.424242,0.430756
348,0.178788,0.170344
335,0.187879,0.264794


### Evaluate the Model

In [34]:
MAE = mean_absolute_error(y_test, predictions)
MSE = mean_squared_error(y_test, predictions)
r2 = r2_score(y_test, predictions)

In [35]:
MAE

0.06552787668695689

In [36]:
MSE

0.007673803856015366

In [37]:
r2

0.6988292561548909

In [38]:
coefficients = pd.DataFrame({'feature': X_train.columns, 'coefficient': model.coef_})
print(coefficients)

            feature  coefficient
0              area     0.279090
1          bedrooms     0.051574
2         bathrooms     0.266963
3           stories     0.129899
4          mainroad     0.048473
5         guestroom     0.019589
6          basement     0.033009
7   hotwaterheating     0.077018
8   airconditioning     0.070551
9           parking     0.072617
10         prefarea     0.054569
11   semi-furnished    -0.003226
12      unfurnished    -0.034864
