## Predicting Home Prices

In [1]:
import pandas as pd
import numpy as np
from sklearn import linear_model

In [3]:
df = pd.read_csv('homeprices.csv')
df

Unnamed: 0,area,bedrooms,age,price
0,2600,3.0,20,550000
1,3000,4.0,15,565000
2,3200,,18,610000
3,3600,3.0,30,595000
4,4000,5.0,8,760000
5,4100,6.0,8,810000


however, there is an NaN value in the bedrooms column in the 2nd row. You must fix this

In [6]:
import math
median_bathrooms = math.floor(df.bedrooms.median())
median_bathrooms

4

In [8]:
df.bedrooms = df.bedrooms.fillna(median_bathrooms)
df #before applying any machine learning model, you NEED to preprocess your data

Unnamed: 0,area,bedrooms,age,price
0,2600,3.0,20,550000
1,3000,4.0,15,565000
2,3200,4.0,18,610000
3,3600,3.0,30,595000
4,4000,5.0,8,760000
5,4100,6.0,8,810000


In [10]:
reg = linear_model.LinearRegression() #create linear regression class object

In [11]:
reg.fit(df[['area', 'bedrooms', 'age']], df[['price']]) #the first one is independent variable, second is target variable

LinearRegression(copy_X=True, fit_intercept=True, n_jobs=None,
         normalize=False)

In [12]:
reg.coef_ #this is m1, m2, m3 for price = m1*area + m2*bedrooms + m3*age

array([[  112.06244194, 23388.88007794, -3231.71790863]])

In [14]:
reg.intercept_

array([221323.0018654])

In [15]:
reg.predict([[3000, 3, 40]]) #Predict price of home given that area = 3000 sq. ft., bedrooms= 3, and age = 40 years.

array([[498408.25158031]])

In [20]:
reg.coef_[0,0]*3000 + reg.coef_[0,1]*3 + reg.coef_[0,2]*40 + reg.intercept_

array([498408.25158031])

In [21]:
reg.predict([[2500, 4, 5]]) #Predict price of home given that area = 2500 sq. ft., bedrooms= 4, and age = 5 years.

array([[578876.03748933]])

## Hiring Prediction

In [2]:
import pandas as pd 
import numpy as np
from sklearn.linear_model import LinearRegression

In [5]:
hiredf = pd.read_csv('hiring.csv')
hiredf

Unnamed: 0,experience,test_score(out of 10),interview_score(out of 10),salary($)
0,,8.0,9,50000
1,,8.0,6,45000
2,five,6.0,7,60000
3,two,10.0,10,65000
4,seven,9.0,6,70000
5,three,7.0,10,62000
6,ten,,7,72000
7,eleven,7.0,8,80000


In [16]:
hiredf.experience = hiredf.experience.fillna('zero')
hiredf

Unnamed: 0,experience,test_score(out of 10),interview_score(out of 10),salary($)
0,zero,8.0,9,50000
1,zero,8.0,6,45000
2,five,6.0,7,60000
3,two,10.0,10,65000
4,seven,9.0,6,70000
5,three,7.0,10,62000
6,ten,8.0,7,72000
7,eleven,7.0,8,80000


In [13]:
med = hiredf.iloc[:,1].median()

In [15]:
hiredf.iloc[:,1] = hiredf.iloc[:,1].fillna(med)
hiredf

Unnamed: 0,experience,test_score(out of 10),interview_score(out of 10),salary($)
0,,8.0,9,50000
1,,8.0,6,45000
2,five,6.0,7,60000
3,two,10.0,10,65000
4,seven,9.0,6,70000
5,three,7.0,10,62000
6,ten,8.0,7,72000
7,eleven,7.0,8,80000


In [18]:
import sys
!{sys.executable} -m pip install word2number

Collecting word2number
  Downloading https://files.pythonhosted.org/packages/4a/29/a31940c848521f0725f0df6b25dca8917f13a2025b0e8fcbe5d0457e45e6/word2number-1.1.zip
Building wheels for collected packages: word2number
  Building wheel for word2number (setup.py) ... [?25ldone
[?25h  Stored in directory: /Users/catherineng/Library/Caches/pip/wheels/46/2f/53/5f5c1d275492f2fce1cdab9a9bb12d49286dead829a4078e0e
Successfully built word2number
Installing collected packages: word2number
Successfully installed word2number-1.1


In [36]:
from word2number import w2n

In [33]:
numbers = []
for i in range(len(hiredf)):
    numbers.append(w2n.word_to_num(hiredf.experience[i]))

In [35]:
hiredf.experience = numbers
hiredf

Unnamed: 0,experience,test_score(out of 10),interview_score(out of 10),salary($)
0,0,8.0,9,50000
1,0,8.0,6,45000
2,5,6.0,7,60000
3,2,10.0,10,65000
4,7,9.0,6,70000
5,3,7.0,10,62000
6,10,8.0,7,72000
7,11,7.0,8,80000


In [37]:
hirereg = LinearRegression()

In [38]:
hirereg.fit(hiredf.iloc[:,:-1], hiredf[['salary($)']])

LinearRegression(copy_X=True, fit_intercept=True, n_jobs=None,
         normalize=False)

In [39]:
hirereg.coef_

array([[2812.95487627, 1845.70596798, 2205.24017467]])

In [40]:
hirereg.intercept_

array([17737.26346434])

In [41]:
hirereg.predict([[2, 9, 6]])#predict salary of candidate with 2 years experience, 9 test scores, 6 interview scores 

array([[53205.96797671]])

In [42]:
hirereg.predict([[12, 10, 10]]) #predict salary of candidate with 12 years experience, 10 test score, 10 interview score

array([[92002.18340611]])