### Linear regression formula with multiple variables

$y = m_1 * x_1 + m_2 * x_2 + m_3 * x_3 + b$

m -> Slope\
b -> Y-Intercept

### We will be predicting the price of a house based on the area, bedrooms and age, so the formula becomes

$y = m_1 * area + m_2 * bedrooms + m_3 * age - b$

In [6]:
import pandas as pd
import numpy as np
import math
import matplotlib.pyplot as plt
from sklearn import linear_model

In [7]:
df = pd.read_csv('../python/csv_files/homeprices.csv')
df

Unnamed: 0,area,bedrooms,age,price
0,2600,3.0,20,550000
1,3000,4.0,15,565000
2,3200,,18,610000
3,3600,3.0,30,595000
4,4000,5.0,8,760000


In [8]:
# We can see there is a missing data piece in the data set
# We can fill it with the median value of the column
median_bedrooms = math.floor(df.bedrooms.median())
df.bedrooms = df.bedrooms.fillna(median_bedrooms)
df

Unnamed: 0,area,bedrooms,age,price
0,2600,3.0,20,550000
1,3000,4.0,15,565000
2,3200,3.0,18,610000
3,3600,3.0,30,595000
4,4000,5.0,8,760000


In [9]:
reg = linear_model.LinearRegression()
reg.fit(df[['area', 'bedrooms', 'age']], df.price)

In [None]:
# This is the value of m
print(reg.coef_)

# This is the value of b
print(reg.intercept_)

# Now we can calculate the price of the house with the formula y = m*x + b
# y = m1*x1 + m2*x2 + m3*x3 + b
# y = 137.25*3000 - 26025*2 - 6825*40 + 383724.999
# y = 470424.999

print(reg.predict([[3000, 2, 40]]))

[   137.25 -26025.    -6825.  ]
383724.9999999998
[470425.]




# Exercise
### Predict salary for 2 years experience, 9 test score, 6 interview score

In [13]:
data = pd.read_csv('../python/csv_files/hiring.csv')
data

Unnamed: 0,experience,test_score(out of 10),interview_score(out of 10),salary($)
0,,8.0,9,50000
1,,8.0,6,45000
2,5.0,6.0,7,60000
3,2.0,10.0,10,65000
4,7.0,9.0,6,70000
5,3.0,7.0,10,62000
6,10.0,,7,72000
7,11.0,7.0,8,80000


In [18]:
data.experience = data.experience.fillna(data.experience.median())
data['test_score(out of 10)'] = data['test_score(out of 10)'].fillna(data['test_score(out of 10)'].median())
data

Unnamed: 0,experience,test_score(out of 10),interview_score(out of 10),salary($)
0,6.0,8.0,9,50000
1,6.0,8.0,6,45000
2,5.0,6.0,7,60000
3,2.0,10.0,10,65000
4,7.0,9.0,6,70000
5,3.0,7.0,10,62000
6,10.0,8.0,7,72000
7,11.0,7.0,8,80000


In [19]:
model = linear_model.LinearRegression()
model.fit(data[['experience', 'test_score(out of 10)', 'interview_score(out of 10)']], data['salary($)'])

model.predict([[2, 9, 6]])



array([47056.91056911])

In [20]:
model.predict([[12, 10, 10]])



array([88227.64227642])