In [1]:
import pandas as pd
import numpy as np
from sklearn import linear_model

In [2]:
df = pd.read_csv("Data/home_price.csv")
df

Unnamed: 0,area,bedrooms,age,price
0,2600,3.0,20,550000
1,3000,4.0,15,565000
2,3200,,18,610000
3,3600,3.0,30,595000
4,4000,5.0,8,760000
5,4100,6.0,8,810000


### For Fill NaN Values

In [3]:
import math
df.bedrooms.median()

4.0

In [4]:
# Replace Nan Values in bedrooms columns

df.bedrooms = df.bedrooms.fillna(df.bedrooms.median())
df

Unnamed: 0,area,bedrooms,age,price
0,2600,3.0,20,550000
1,3000,4.0,15,565000
2,3200,4.0,18,610000
3,3600,3.0,30,595000
4,4000,5.0,8,760000
5,4100,6.0,8,810000


In [5]:
model = linear_model.LinearRegression()
model.fit(df.drop("price", axis=1), df.price) # df.price is our target variable

In [6]:
model.predict([[3000,4,5]])



array([634907.2584604])

In [7]:
model.coef_

array([  112.06244194, 23388.88007794, -3231.71790863])

In [8]:
model.intercept_

221323.00186540408

In [9]:
hr_df = pd.read_csv("Data/hiring.csv")
hr_df

Unnamed: 0,experience,test_score(out of 10),interview_score(out of 10),salary($)
0,,8.0,9,50000
1,,8.0,6,45000
2,five,6.0,7,60000
3,two,10.0,10,65000
4,seven,9.0,6,70000
5,three,7.0,10,62000
6,ten,,7,72000
7,eleven,7.0,8,80000


In [10]:
hr_df.experience = hr_df.experience.fillna("Zero")
hr_df

Unnamed: 0,experience,test_score(out of 10),interview_score(out of 10),salary($)
0,Zero,8.0,9,50000
1,Zero,8.0,6,45000
2,five,6.0,7,60000
3,two,10.0,10,65000
4,seven,9.0,6,70000
5,three,7.0,10,62000
6,ten,,7,72000
7,eleven,7.0,8,80000


In [11]:
from word2number import w2n

hr_df.experience = hr_df.experience.apply(w2n.word_to_num)
hr_df

Unnamed: 0,experience,test_score(out of 10),interview_score(out of 10),salary($)
0,0,8.0,9,50000
1,0,8.0,6,45000
2,5,6.0,7,60000
3,2,10.0,10,65000
4,7,9.0,6,70000
5,3,7.0,10,62000
6,10,,7,72000
7,11,7.0,8,80000


In [12]:
m_test_score = hr_df['test_score(out of 10)'].mean()
m_test_score

7.857142857142857

In [13]:
hr_df['test_score(out of 10)'] = hr_df['test_score(out of 10)'].fillna(m_test_score)
hr_df

Unnamed: 0,experience,test_score(out of 10),interview_score(out of 10),salary($)
0,0,8.0,9,50000
1,0,8.0,6,45000
2,5,6.0,7,60000
3,2,10.0,10,65000
4,7,9.0,6,70000
5,3,7.0,10,62000
6,10,7.857143,7,72000
7,11,7.0,8,80000


In [14]:
reg = linear_model.LinearRegression()
reg.fit(hr_df[['experience', 'test_score(out of 10)', 'interview_score(out of 10)']], hr_df['salary($)'])

In [15]:
reg.predict([[1,5,5]])



array([40614.53108006])

## Save Trained Model

In [16]:
import pickle

In [17]:
with open('Data/model_house','wb') as file:
    pickle.dump(model, file)

In [18]:
with open('Data/model_hiring', 'wb') as file:
    pickle.dump(reg, file)

## Open Saved Model

### House Prediction

In [19]:
with open('Data/model_house', 'rb') as f:
    m_house = pickle.load(f)

In [20]:
m_house.coef_

array([  112.06244194, 23388.88007794, -3231.71790863])

In [21]:
m_house.predict([[1700,1,5,]])



array([419059.44370181])

### hiring Prediction

In [22]:
with open('Data/model_hiring', 'rb') as f:
    m_hiring = pickle.load(f)

In [23]:
m_hiring.coef_

array([2827.63404314, 1912.93803053, 2196.9753141 ])

In [30]:
m_hiring.predict([[1,8,7]])



array([50747.29579988])