### Importing packages

In [1]:
import pandas as pd
import numpy as np
import matplotlib.pyplot as plt
from word2number import w2n
%matplotlib inline

### Importing Data

In [2]:
df = pd.read_csv("hiring.csv")
df.head()

Unnamed: 0,Experience,Test_score,Interview_score,Salary
0,,8.0,9,50000
1,,8.0,6,45000
2,five,6.0,7,60000
3,two,10.0,10,65000
4,seven,9.0,6,70000



### Cleaning data

In [3]:
df.Experience = df.Experience.fillna("zero")
df.Experience = df.Experience.apply(w2n.word_to_num)
df

Unnamed: 0,Experience,Test_score,Interview_score,Salary
0,0,8.0,9,50000
1,0,8.0,6,45000
2,5,6.0,7,60000
3,2,10.0,10,65000
4,7,9.0,6,70000
5,3,7.0,10,62000
6,10,,7,72000
7,11,7.0,8,80000


In [4]:
#Filling NaN values with median of test_score
import math
median = math.floor(df.Test_score.mean())
df.Test_score = df.Test_score.fillna(median)
df

Unnamed: 0,Experience,Test_score,Interview_score,Salary
0,0,8.0,9,50000
1,0,8.0,6,45000
2,5,6.0,7,60000
3,2,10.0,10,65000
4,7,9.0,6,70000
5,3,7.0,10,62000
6,10,7.0,7,72000
7,11,7.0,8,80000


### Modeling

In [5]:
from sklearn import linear_model
reg = linear_model.LinearRegression()
x = df[['Experience', 'Test_score', 'Interview_score']]
y = df[['Salary']]
reg.fit(x, y)
print("coefficient:",reg.coef_)
print("intercept:",reg.intercept_)

coefficient: [[2922.26901502 2221.30909959 2147.48256637]]
intercept: [14992.65144669]


### Prediction

In [6]:
#creating PredictionDataframe
dict1 = {'Experience':[2,12,2,5], 'Test_score':[9,10,6,3], 'Interview_score':[6,10,7,6]}
prediction = pd.DataFrame(dict1)
prediction

Unnamed: 0,Experience,Test_score,Interview_score
0,2,9,6
1,12,10,10
2,2,6,7
3,5,3,6


In [7]:
#Predicting Values and adding in prediction Dataframe
sal=reg.predict(prediction[['Experience', 'Test_score', 'Interview_score']])
prediction['Salary'] = sal
prediction

Unnamed: 0,Experience,Test_score,Interview_score,Salary
0,2,9,6,53713.866771
1,12,10,10,93747.796287
2,2,6,7,49197.422039
3,5,3,6,49152.819219


### Accuracy

In [8]:
from sklearn.metrics import r2_score
x = df[['Experience', 'Test_score', 'Interview_score']]
y = df[['Salary']]
y_p = reg.predict(x)
print("R2-score: %.2f" % r2_score(y_p , y) )

R2-score: 0.98


In [9]:
reg.predict([[2,10,10]])

array([[64525.1061363]])

In [10]:
df.Salary[3]

65000