## Importing the dependancies

In [1]:
import numpy as np
import pandas as pd
from sklearn.model_selection import train_test_split
from sklearn.linear_model import LogisticRegression
from sklearn.metrics import accuracy_score

## Data Collection and Analysis

In [2]:
# Loading the Candidate evalualuaton dataset 
recruitment_dataset = pd.read_csv('cleaned_dataset.csv')

In [3]:
# Printing the first 5 rows of the dataset
recruitment_dataset.head()

Unnamed: 0,UID,Email address,OOPS-21,Javascript-14,Cordova-4,Database-4,Testing-12,Analytical-5,Comprehensive-5,Total,Status,Reference,Experience,Role (QA/DEV/DM/Intern),Date of test,Date of eval
0,1,1@xyz.com,10,6,0,3,3,0,3,24/65,Reject,HAD,3.3yrs,Dev,11/18/2016,11/21/0206
1,3,3@xyz.com,9,5,2,2,6,0,0,24/65,Reject,HAD,4.2yrs,Dev,11/18/2016,1/21/2016
2,4,4@xyz.com,12,7,0,2,8,3,2,33/65,Reject,OSC,1yr,Dev,11/17/2016,11/17/2016
3,5,5@xyz.com,10,1,1,3,11,2,1,29/65,Reject,OSC,2.4yrs,QA,11/17/2016,11/17/2016
4,7,7@xyz.com,6,9,0,1,7,2,2,26/65,Reject,HAD,4yrs,Dev,11/18/2016,11/18/2016


In [4]:
# Number of rows and Columns in this dataset
recruitment_dataset.shape

(119, 16)

In [5]:
# Getting the statistical measures of the data
recruitment_dataset.describe()

Unnamed: 0,UID,OOPS-21,Javascript-14,Cordova-4,Database-4,Testing-12,Analytical-5,Comprehensive-5
count,119.0,119.0,119.0,119.0,119.0,119.0,119.0,119.0
mean,110.016807,11.067227,6.991597,1.680672,2.537815,6.689076,1.823529,1.478992
std,75.837134,2.146069,2.260553,1.352542,0.899999,2.134256,1.046763,1.27458
min,1.0,6.0,0.0,0.0,0.0,2.0,0.0,0.0
25%,38.5,10.0,6.0,0.0,2.0,5.0,1.0,1.0
50%,132.0,11.0,7.0,2.0,3.0,7.0,2.0,1.0
75%,166.0,12.0,8.0,3.0,3.0,8.0,3.0,2.0
max,254.0,17.0,13.0,4.0,4.0,11.0,4.0,5.0


In [6]:
recruitment_dataset["Status"].value_counts()

Reject     111
Offered      8
Name: Status, dtype: int64

In [7]:
# separating the data and labels
X = recruitment_dataset.drop(columns=['UID','Status','Total','Email address','Reference','Experience','Role (QA/DEV/DM/Intern)','Date of test','Date of eval'],axis=1)
Y = recruitment_dataset["Status"]

In [8]:
print(X)
print(Y)

     OOPS-21  Javascript-14  Cordova-4  Database-4  Testing-12  Analytical-5  \
0         10              6          0           3           3             0   
1          9              5          2           2           6             0   
2         12              7          0           2           8             3   
3         10              1          1           3          11             2   
4          6              9          0           1           7             2   
..       ...            ...        ...         ...         ...           ...   
114       11              8          3           3           4             0   
115       13              7          2           1          10             1   
116       12              8          2           1           5             1   
117       10             10          4           3          10             1   
118       13             10          4           3           8             3   

     Comprehensive-5  
0               

## Train-Test split

In [9]:
X_train, X_test, Y_train, Y_test = train_test_split(X, Y, test_size = 0.1, stratify=Y, random_state=1)

In [10]:
print(X.shape, X_train.shape, X_test.shape)

(119, 7) (107, 7) (12, 7)


In [11]:
print(X_train)
print(Y_train)

     OOPS-21  Javascript-14  Cordova-4  Database-4  Testing-12  Analytical-5  \
81        11              4          2           3           8             1   
40        11             11          0           4           2             3   
70        11              6          0           4           9             2   
10        10              3          0           3           6             2   
100        7              7          4           2           3             3   
..       ...            ...        ...         ...         ...           ...   
75        10              9          3           2           8             3   
94         9              9          3           4           8             1   
46        10             13          0           3           3             3   
20        11              8          2           2           7             2   
72        12              5          2           1           8             2   

     Comprehensive-5  
81              

## Training the model

In [12]:
model = LogisticRegression()

In [13]:
#training the Logistic Regression model with training data
model.fit(X_train, Y_train)

LogisticRegression()

## Model Evaluation

In [14]:
#accuracy on training data
X_train_prediction = model.predict(X_train)
training_data_accuracy = accuracy_score(X_train_prediction, Y_train) 

In [15]:
print('Accuracy on training data : ', training_data_accuracy)

Accuracy on training data :  0.9439252336448598


In [16]:
#accuracy on test data
X_test_prediction = model.predict(X_test)
test_data_accuracy = accuracy_score(X_test_prediction, Y_test) 

In [17]:
print('Accuracy on test data : ', test_data_accuracy)

Accuracy on test data :  0.9166666666666666


## Making a prediction system

In [65]:
input_data = (15,10,3,3,5,3,5)

# changing the input_data to a numpy array
input_data_as_numpy_array = np.asarray(input_data)

# reshape the np array as we are predicting for one instance
input_data_reshaped = input_data_as_numpy_array.reshape(1,-1)

prediction = model.predict(input_data_reshaped)
print(prediction)

if (prediction[0]=='Reject'):
   print('The candidates application is rejected')
else:
  print('The candidate has been offered a job')

['Offered']
The candidate has been offered a job
