In [42]:
import pandas as pd
import numpy as np
import matplotlib.pyplot as plt

In [43]:
data = pd.read_excel("Student-Employability-Datasets.xlsx")
data.head()

Unnamed: 0,Name of Student,GENERAL APPEARANCE,MANNER OF SPEAKING,PHYSICAL CONDITION,MENTAL ALERTNESS,SELF-CONFIDENCE,ABILITY TO PRESENT IDEAS,COMMUNICATION SKILLS,Student Performance Rating,CLASS
0,Student 1,4,5,4,5,5,5,5,5,Employable
1,Student 2,4,4,4,4,4,4,3,5,Employable
2,Student 3,4,3,3,3,3,3,2,5,LessEmployable
3,Student 4,3,3,3,2,3,3,3,5,LessEmployable
4,Student 5,4,4,3,3,4,4,3,5,Employable


In [44]:
# check is there any missing data
missing_values_count = data.isnull().sum()
missing_values_count


Name of Student               0
GENERAL APPEARANCE            0
MANNER OF SPEAKING            0
PHYSICAL CONDITION            0
MENTAL ALERTNESS              0
SELF-CONFIDENCE               0
ABILITY TO PRESENT IDEAS      0
COMMUNICATION SKILLS          0
Student Performance Rating    0
CLASS                         0
dtype: int64

In [45]:
# drop the Name of student column
data = data.drop(["Name of Student"], axis = 1)
data.head()

Unnamed: 0,GENERAL APPEARANCE,MANNER OF SPEAKING,PHYSICAL CONDITION,MENTAL ALERTNESS,SELF-CONFIDENCE,ABILITY TO PRESENT IDEAS,COMMUNICATION SKILLS,Student Performance Rating,CLASS
0,4,5,4,5,5,5,5,5,Employable
1,4,4,4,4,4,4,3,5,Employable
2,4,3,3,3,3,3,2,5,LessEmployable
3,3,3,3,2,3,3,3,5,LessEmployable
4,4,4,3,3,4,4,3,5,Employable


In [46]:
# categorical to numeric
numeric_classes = pd.get_dummies(data.iloc[:,-1])
numeric_class = pd.DataFrame(numeric_classes.iloc[:,0])
numeric_class.head(3)

Unnamed: 0,Employable
0,1
1,1
2,0


In [47]:
x = data.drop(["CLASS"], axis = 1).values
y = numeric_class.values
print(type(y), type(x))

<class 'numpy.ndarray'> <class 'numpy.ndarray'>


In [48]:
from sklearn.model_selection import train_test_split

x_train, x_test, y_train, y_test = train_test_split(x, y, test_size = 0.15, random_state = 0)
print(x_train.shape, x_test.shape, y_train.shape, y_test.shape)

(2534, 8) (448, 8) (2534, 1) (448, 1)


## Classification with Neural Network

In [65]:
from keras import layers
from keras import models
from keras import optimizers
from keras import losses
from keras import metrics

model=models.Sequential()
model.add(layers.Dense(512,activation='relu',input_shape=(x_train.shape[1], )))
model.add(layers.Dropout(0.3))
model.add(layers.Dense(256, activation = 'relu'))
model.add(layers.Dropout(0.3))
model.add(layers.Dense(256, activation = 'relu'))
model.add(layers.Dense(8, activation = 'relu'))
model.add(layers.Dense(1,activation='sigmoid'))

model.summary()

model.compile(optimizer='rmsprop',loss='binary_crossentropy',metrics=['accuracy'])

history = model.fit(x_train,y_train, epochs=80, batch_size = 128, validation_data=(x_test, y_test))

Model: "sequential_20"
_________________________________________________________________
 Layer (type)                Output Shape              Param #   
 dense_96 (Dense)            (None, 512)               4608      
                                                                 
 dropout_31 (Dropout)        (None, 512)               0         
                                                                 
 dense_97 (Dense)            (None, 256)               131328    
                                                                 
 dropout_32 (Dropout)        (None, 256)               0         
                                                                 
 dense_98 (Dense)            (None, 256)               65792     
                                                                 
 dense_99 (Dense)            (None, 8)                 2056      
                                                                 
 dense_100 (Dense)           (None, 1)               

In [66]:
print("score on test: " + str(model.evaluate(x_test,y_test)[1]))
print("score on train: "+ str(model.evaluate(x_train,y_train)[1]))

score on test: 0.9129464030265808
score on train: 0.9100236892700195


In [67]:
from sklearn.metrics import confusion_matrix
from sklearn.metrics import accuracy_score

y_pred_nn = model.predict(x_test)

confusion_matrix_nn = confusion_matrix(y_test, y_pred_nn.round())
print(confusion_matrix_nn)

print("Accuracy Score : ", accuracy_score(y_test, y_pred_nn.round(), normalize=False))

[[156  31]
 [  8 253]]
Accuracy Score :  409


## Classification with Naive Bayes

In [68]:
from sklearn.naive_bayes import MultinomialNB

mnb = MultinomialNB().fit(x_train, y_train.ravel())

print("score on test: " + str(mnb.score(x_test, y_test)))
print("score on train: "+ str(mnb.score(x_train, y_train)))

score on test: 0.5825892857142857
score on train: 0.579321231254933


In [69]:
y_pred_mnb = mnb.predict(x_test)

confusion_matrix_mnb = confusion_matrix(y_test, y_pred_mnb.round())
print(confusion_matrix_mnb)

print("Accuracy Score : ", accuracy_score(y_test, y_pred_mnb.round(), normalize = False))

[[  0 187]
 [  0 261]]
Accuracy Score :  261


## Classification with Logistic Regression

In [70]:
from sklearn.linear_model import LogisticRegression

lr=LogisticRegression()

lr.fit(x_train, y_train.ravel())

print("score on test: " + str(lr.score(x_test, y_test)))
print("score on train: "+ str(lr.score(x_train, y_train)))

score on test: 0.5825892857142857
score on train: 0.6191791633780585


In [71]:
y_pred_lr = lr.predict(x_test)

confusion_matrix_lr = confusion_matrix(y_test, y_pred_lr.round())
print(confusion_matrix_lr)

print("Accuracy Score : ", accuracy_score(y_test, y_pred_lr.round(), normalize = False))

[[ 67 120]
 [ 67 194]]
Accuracy Score :  261


## Classification with K-Nearest Neighbours

In [72]:
from sklearn.neighbors import KNeighborsClassifier

knn = KNeighborsClassifier()
knn.fit(x_train, y_train.ravel())

print("score on test: " + str(knn.score(x_test, y_test)))
print("score on train: "+ str(knn.score(x_train, y_train)))

score on test: 0.8995535714285714
score on train: 0.893843725335438


In [73]:
y_pred_knn = knn.predict(x_test)

confusion_matrix_knn = confusion_matrix(y_test, y_pred_knn.round())
print(confusion_matrix_knn)

print("Accuracy Score : ", accuracy_score(y_test, y_pred_knn.round(), normalize = False))

[[158  29]
 [ 16 245]]
Accuracy Score :  403


## Support Vector Machine

In [74]:
from sklearn.svm import LinearSVC

svm=LinearSVC(C = 0.0001)
svm.fit(x_train, y_train.ravel())

print("score on test: " + str(svm.score(x_test, y_test)))
print("score on train: "+ str(svm.score(x_train, y_train)))

score on test: 0.5825892857142857
score on train: 0.579321231254933


In [75]:
y_pred_svm = svm.predict(x_test)

confusion_matrix_svm = confusion_matrix(y_test, y_pred_svm.round())
print(confusion_matrix_svm)

print("Accuracy Score: ", accuracy_score(y_test, y_pred_svm.round(), normalize = False))

[[  0 187]
 [  0 261]]
Accuracy Score:  261


## Classification with Decision Tree

In [76]:
from sklearn.tree import DecisionTreeClassifier

dt = DecisionTreeClassifier()
dt.fit(x_train, y_train.ravel())

print("score on test: "  + str(dt.score(x_test, y_test)))
print("score on train: " + str(dt.score(x_train, y_train)))

score on test: 0.90625
score on train: 0.9119968429360694


In [77]:
y_pred_dt = dt.predict(x_test)

confusion_matrix_dt = confusion_matrix(y_test, y_pred_dt.round())
print(confusion_matrix_dt)

print("Accuracy Score: ", accuracy_score(y_test, y_pred_dt.round(), normalize = False))

[[167  20]
 [ 22 239]]
Accuracy Score:  406


## Classification with Bagging Decision Tree (Ensemble Learning)

In [78]:
from sklearn.ensemble import BaggingClassifier
from sklearn.tree import DecisionTreeClassifier

bg = BaggingClassifier(DecisionTreeClassifier(),n_estimators=10)
bg.fit(x_train, y_train.ravel())
print("score on test: " + str(bg.score(x_test, y_test)))
print("score on train: "+ str(bg.score(x_train, y_train)))

score on test: 0.90625
score on train: 0.9119968429360694


In [79]:
y_pred_bg = dt.predict(x_test)

confusion_matrix_bg = confusion_matrix(y_test, y_pred_bg)
print(confusion_matrix_bg)

print("Accuracy Score: ", accuracy_score(y_test, y_pred_bg, normalize = False))

[[167  20]
 [ 22 239]]
Accuracy Score:  406


## Classification with Boosting Decision Tree (Ensemble Learning)

In [80]:
from sklearn.ensemble import AdaBoostClassifier
from sklearn.tree import DecisionTreeClassifier

adb = AdaBoostClassifier(DecisionTreeClassifier())
adb.fit(x_train, y_train.ravel())

print("score on test: " + str(adb.score(x_test, y_test)))
print("score on train: "+ str(adb.score(x_train, y_train)))

score on test: 0.90625
score on train: 0.9119968429360694


In [81]:
y_pred_adb = dt.predict(x_test)

confusion_matrix_adb = confusion_matrix(y_test, y_pred_adb)
print(confusion_matrix_adb)

print("Accuracy Score: ", accuracy_score(y_test, y_pred_adb, normalize = False))

[[167  20]
 [ 22 239]]
Accuracy Score:  406


## Classification with Random Forest (Ensemble Learning)

In [82]:
from sklearn.ensemble import RandomForestClassifier

rf = RandomForestClassifier(n_estimators=30, max_depth=9)
rf.fit(x_train, y_train.ravel())

print("score on test: " + str(rf.score(x_test, y_test)))
print("score on train: "+ str(rf.score(x_train, y_train)))

score on test: 0.8973214285714286
score on train: 0.9013417521704814


In [83]:
y_pred_rf = dt.predict(x_test)

confusion_matrix_rf = confusion_matrix(y_test, y_pred_rf)
print(confusion_matrix_rf)

print("Accuracy Score: ", accuracy_score(y_test, y_pred_rf, normalize = False))

[[167  20]
 [ 22 239]]
Accuracy Score:  406


## Classification with Voting Classifier

In [84]:
from sklearn.ensemble import VotingClassifier

# 1) naive bias = mnb
# 2) logistic regression =lr
# 3) random forest =rf
# 4) support vector machine = svm

evc=VotingClassifier(estimators=[('mnb',mnb),('lr',lr),('rf',rf),('svm',svm)],voting='hard')
evc.fit(x_train, y_train.ravel())

print("score on test: " + str(evc.score(x_test, y_test)))
print("score on train: "+ str(evc.score(x_train, y_train)))

score on test: 0.7165178571428571
score on train: 0.7237569060773481


In [85]:
y_pred_evc = dt.predict(x_test)

confusion_matrix_evc = confusion_matrix(y_test, y_pred_evc)
print(confusion_matrix_evc)

print("Accuracy Score: ", accuracy_score(y_test, y_pred_evc, normalize = False))

[[167  20]
 [ 22 239]]
Accuracy Score:  406


## Accuracy Scores for Algorithms

In [86]:
print("Accuracy Score Neural Network: ", accuracy_score(y_test, y_pred_nn.round(), normalize=False))
print("Accuracy Score Naive Bayes : ", accuracy_score(y_test, y_pred_mnb, normalize = False))
print("Accuracy Score Logistic Regression: ", accuracy_score(y_test, y_pred_lr, normalize = False))
print("Accuracy Score K-Nearest Neighbors: ", accuracy_score(y_test, y_pred_knn, normalize = False))
print("Accuracy Score Support Vector Machine: ", accuracy_score(y_test, y_pred_svm, normalize = False))
print("Accuracy Score Decision Tree: ", accuracy_score(y_test, y_pred_dt, normalize = False))
print("Accuracy Score Bagging Decision Tree: ", accuracy_score(y_test, y_pred_bg, normalize = False))
print("Accuracy Score Boosting Decision Tree: ", accuracy_score(y_test, y_pred_adb, normalize = False))
print("Accuracy Score Random Forest: ", accuracy_score(y_test, y_pred_rf, normalize = False))
print("Accuracy Score Voting Classifier: ", accuracy_score(y_test, y_pred_evc, normalize = False))

Accuracy Score Neural Network:  409
Accuracy Score Naive Bayes :  261
Accuracy Score Logistic Regression:  261
Accuracy Score K-Nearest Neighbors:  403
Accuracy Score Support Vector Machine:  261
Accuracy Score Decision Tree:  406
Accuracy Score Bagging Decision Tree:  406
Accuracy Score Boosting Decision Tree:  406
Accuracy Score Random Forest:  406
Accuracy Score Voting Classifier:  406
