In [1]:
# Importing Libraries

import pandas as pd

import numpy as np

In [2]:
# Importing Dataset

dataset = pd.read_csv('hirable.csv')

In [3]:
# Cleaning up dataset

dataset = dataset.drop(["sl_no","ssc_p","ssc_b","hsc_p","hsc_b","hsc_s","specialisation","salary","degree_t"], axis=1)



In [4]:
dataset.head()

Unnamed: 0,gender,degree_p,workex,etest_p,mba_p,status
0,M,58.0,No,55.0,58.8,Placed
1,M,77.48,Yes,86.5,66.28,Placed
2,M,64.0,No,75.0,57.8,Placed
3,M,52.0,No,66.0,59.43,Not Placed
4,M,73.3,No,96.8,55.5,Placed


In [5]:
dataset = dataset.rename(columns = {'degree_p': 'bsc', 'mba_p': 'msc'})

In [6]:
dataset['gender'] = dataset.gender.replace(['M', 'F'], [1, 2])

In [7]:
dataset['workex'] = dataset.workex.replace(['Yes', 'No'], [1, 0])

In [8]:
dataset['status'] = dataset.status.replace(['Placed', 'Not Placed'], [1, 0])

In [9]:
dataset

Unnamed: 0,gender,bsc,workex,etest_p,msc,status
0,1,58.00,0,55.0,58.80,1
1,1,77.48,1,86.5,66.28,1
2,1,64.00,0,75.0,57.80,1
3,1,52.00,0,66.0,59.43,0
4,1,73.30,0,96.8,55.50,1
...,...,...,...,...,...,...
210,1,77.60,0,91.0,74.49,1
211,1,72.00,0,74.0,53.62,1
212,1,73.00,1,59.0,69.72,1
213,2,58.00,0,70.0,60.23,1


In [10]:
# Downscalling Method For BSc & MSc grades

def downscale(score):
    return score/10/2

degrees = ['bsc', 'msc']

for col in degrees:
    dataset[col] = downscale(dataset[col])

In [11]:
# Separating into dependent and independent variables

X = dataset.drop(['status'], axis=1)

y = dataset.status

In [12]:
y

0      1
1      1
2      1
3      0
4      1
      ..
210    1
211    1
212    1
213    1
214    0
Name: status, Length: 215, dtype: int64

In [13]:
X

Unnamed: 0,gender,bsc,workex,etest_p,msc
0,1,2.900,0,55.0,2.9400
1,1,3.874,1,86.5,3.3140
2,1,3.200,0,75.0,2.8900
3,1,2.600,0,66.0,2.9715
4,1,3.665,0,96.8,2.7750
...,...,...,...,...,...
210,1,3.880,0,91.0,3.7245
211,1,3.600,0,74.0,2.6810
212,1,3.650,1,59.0,3.4860
213,2,2.900,0,70.0,3.0115


In [14]:
pip install scikit-learn

Defaulting to user installation because normal site-packages is not writeable
Note: you may need to restart the kernel to use updated packages.


In [15]:
import sklearn

In [16]:
# Splitting dataset into trainig and testing

from sklearn.model_selection import train_test_split

X_train, X_test, y_train, y_test = train_test_split(X, y,train_size=0.8,random_state=1)

In [23]:
# Fitting with random forest model

from sklearn.ensemble import RandomForestClassifier

model=RandomForestClassifier(n_estimators=100)

model.fit(X_train.values,y_train)

In [24]:
# Prediction and testing

y_pred=model.predict(X_test)



In [25]:
# Report and Accuracy Score

from sklearn import metrics

from sklearn.metrics import classification_report

print("Accuracy:",metrics.accuracy_score(y_test, y_pred))

print("Classification Report RF:\n",classification_report(y_test,y_pred))

Accuracy: 0.8837209302325582
Classification Report RF:
               precision    recall  f1-score   support

           0       0.81      0.87      0.84        15
           1       0.93      0.89      0.91        28

    accuracy                           0.88        43
   macro avg       0.87      0.88      0.87        43
weighted avg       0.89      0.88      0.88        43



In [26]:
# Model testing on new data

# [[gender, bsc, workex, etest_p, msc]]

# Sample 1

sample = np.array([[0, 2.9, 1, 78.50, 3.7]])

model.predict(sample)

array([1])

In [29]:
# Sample 2

sample = np.array([[1, 0, 1, 78.50, 3.7]])

model.predict(sample)

array([0])

In [31]:
# Saving model

import pickle

pickle.dump(model, open('hireable.pkl', 'wb'))



In [33]:
loaded_model = pickle.load(open('hireable.pkl', 'rb'))

result = loaded_model.score(X_test.values, y_test)

print(result)

0.8837209302325582
