In [1]:
import pandas as pd

In [2]:
# Importing Data from CSV file
salary_data = pd.read_csv("salaries-dataset.csv")

In [27]:
# Data
salary_data.head()

Unnamed: 0,company,job,degree,salary_more_then_100k
0,google,sales executive,bachelors,0
1,google,sales executive,masters,0
2,google,business manager,bachelors,1
3,google,business manager,masters,1
4,google,computer programmer,bachelors,0


In [33]:
# Independent Variables
inputs = salary_data.drop(["salary_more_then_100k"],axis="columns")
inputs.head()

Unnamed: 0,company,job,degree
0,google,sales executive,bachelors
1,google,sales executive,masters
2,google,business manager,bachelors
3,google,business manager,masters
4,google,computer programmer,bachelors


In [34]:
# Dependent Variables
target = salary_data.drop(["company","job","degree"],axis="columns")
target.head()

Unnamed: 0,salary_more_then_100k
0,0
1,0
2,1
3,1
4,0


In [30]:
# Data Prep
from sklearn.preprocessing import LabelEncoder
le_company = LabelEncoder()
le_job = LabelEncoder()
le_degree = LabelEncoder()

In [35]:
inputs["company"] = le_company.fit_transform(inputs["company"])
inputs['job'] = le_company.fit_transform(inputs['job'])
inputs['degree'] = le_company.fit_transform(inputs['degree'])
inputs.head()

Unnamed: 0,company,job,degree
0,2,2,0
1,2,2,1
2,2,0,0
3,2,0,1
4,2,1,0


In [36]:
# Cresting Model Object
from sklearn import tree
salary_model = tree.DecisionTreeClassifier()

In [37]:
# Training Model
salary_model.fit(inputs,target)

DecisionTreeClassifier()

In [38]:
# Model Score
print("Model Score : {:.4f}".format(salary_model.score(inputs,target)))

Model Score : 1.0000


In [42]:
# Splitting Data for testing
from sklearn.model_selection import train_test_split
X_train, X_test, y_train, y_test = train_test_split(inputs,target,test_size=0.5)

In [43]:
# Testing
salary_predicted = salary_model.predict(X_test)
salary_predicted

array([1, 1, 0, 1, 1, 0, 1, 1], dtype=int64)

In [47]:
# Results
result = pd.concat([X_test,y_test],axis="columns")
result['predicted_salary_above_100k'] = salary_predicted
result

Unnamed: 0,company,job,degree,salary_more_then_100k,predicted_salary_above_100k
15,1,1,1,1,1
14,1,1,0,1,1
8,0,0,0,0,0
5,2,1,1,1,1
3,2,0,1,1,1
6,0,2,1,0,0
12,1,0,0,1,1
9,0,0,1,1,1


In [50]:
# Exporting Results to CSV file
result.to_csv("result.csv",index=False)

In [51]:
# Saving Salary Model to binary file
import joblib
joblib.dump(salary_model,"salary-prediction-model")

['salary-prediction-model']