## Multi Variate Linear Regression
- The notebook implements a simple linear regression model to predict the prices of houses based on its feature set.
- We use a data set with the following features;
    - House Age
    - Distance from Market
    - Number of nearby stores
    - Price

In [199]:
import pandas as pd
import numpy as np
from sklearn.linear_model import LinearRegression
from sklearn import preprocessing
from sklearn.metrics import mean_squared_error

In [200]:
path = "RealEstate.csv"
df = pd.read_csv(path)

In [201]:
df.columns

Index(['House_Age', 'Dist_MRT', 'Num_Store', 'Price'], dtype='object')

In [202]:
df['House_Age2'] = df["House_Age"]**2
df['Dist_MRT2'] = df["Dist_MRT"]**2
df['Num_Store2'] = df["Num_Store"]**2
df = df[['House_Age', 'Dist_MRT', 'Num_Store', 'House_Age2', 'Dist_MRT2', 'Num_Store2', 'Price']]

In [203]:
df.head()

Unnamed: 0,House_Age,Dist_MRT,Num_Store,House_Age2,Dist_MRT2,Num_Store2,Price
0,32.0,84.87882,10,1024.0,7204.414085,100,37.9
1,19.5,306.5947,9,380.25,94000.310068,81,42.2
2,13.3,561.9845,5,176.89,315826.57824,25,47.3
3,13.3,561.9845,5,176.89,315826.57824,25,54.8
4,5.0,390.5684,5,25.0,152543.675079,25,43.1


In [204]:
X= df.iloc[:, :-1]
Y= df.iloc[:,6]

In [205]:
X

Unnamed: 0,House_Age,Dist_MRT,Num_Store,House_Age2,Dist_MRT2,Num_Store2
0,32.0,84.87882,10,1024.00,7.204414e+03,100
1,19.5,306.59470,9,380.25,9.400031e+04,81
2,13.3,561.98450,5,176.89,3.158266e+05,25
3,13.3,561.98450,5,176.89,3.158266e+05,25
4,5.0,390.56840,5,25.00,1.525437e+05,25
5,7.1,2175.03000,3,50.41,4.730756e+06,9
6,34.5,623.47310,7,1190.25,3.887187e+05,49
7,20.3,287.60250,6,412.09,8.271520e+04,36
8,31.7,5512.03800,1,1004.89,3.038256e+07,1
9,17.9,1783.18000,3,320.41,3.179731e+06,9


In [206]:
Y

0      37.9
1      42.2
2      47.3
3      54.8
4      43.1
5      32.1
6      40.3
7      46.7
8      18.8
9      22.1
10     41.4
11     58.1
12     39.3
13     23.8
14     34.3
15     50.5
16     70.1
17     37.4
18     42.3
19     47.7
20     29.3
21     51.6
22     24.6
23     47.9
24     38.8
25     27.0
26     56.2
27     33.6
28     47.0
29     57.1
       ... 
384    12.9
385    46.6
386    55.3
387    25.6
388    27.3
389    67.7
390    38.6
391    31.3
392    35.3
393    40.3
394    24.7
395    42.5
396    31.9
397    32.2
398    23.0
399    37.3
400    35.5
401    27.7
402    28.5
403    39.7
404    41.2
405    37.2
406    40.5
407    22.3
408    28.1
409    15.4
410    50.0
411    40.6
412    52.5
413    63.9
Name: Price, Length: 414, dtype: float64

In [207]:
X= preprocessing.StandardScaler().fit_transform(X)

  return self.partial_fit(X, y)
  return self.fit(X, **fit_params).transform(X)


In [208]:
X

array([[ 1.25562833, -0.79249504,  2.00740743,  1.23525621, -0.45425169,
         2.79028482],
       [ 0.15708622, -0.61661163,  1.66750286, -0.1339068 , -0.43994926,
         2.07945215],
       [-0.38779067, -0.41401527,  0.30788458, -0.56642407, -0.4033962 ,
        -0.01563362],
       ...,
       [ 0.09556786, -0.54967846,  0.98769372, -0.19092779, -0.43025068,
         0.88226028],
       [-0.84478419, -0.77668389,  0.30788458, -0.80310051, -0.45362869,
        -0.01563362],
       [-0.98539758, -0.78807071,  1.66750286, -0.85278385, -0.45409055,
         2.07945215]])

In [209]:
X_train = X[:300,:]
Y_train = Y[:300]

In [210]:
X_test = X[300:, :]
Y_test = Y[300:]

In [211]:
print(len(X_train))
print(len(Y_train))
print(len(X_test))
print(len(Y_test))

300
300
114
114


In [212]:
regressor = LinearRegression()
regressor.fit(X_train,Y_train)

print(regressor.intercept_)
print(regressor.coef_)

38.06894307300537
[-10.10220033 -17.20908233   1.63949345   6.89635716  10.34659022
   0.15860544]


In [213]:
Y_pred= regressor.predict(X_test)

In [214]:
mean_squared_error(Y_test, Y_pred)

62.374534166243066