<h1>Import libraries</h1>

In [20]:
import pandas as pd
import numpy as np
import matplotlib.pyplot as plt
pd.options.mode.chained_assignment = None

<h1>Import dataset</h1>

In [21]:
df = pd.read_csv('student.csv')

In [22]:
df.head()

Unnamed: 0,Name,OverallGrade,Obedient,ResearchScore,ProjectScore,Recommend,Unnamed: 6
0,Henry,A,Y,90,85,Yes,
1,John,C,N,85,51,Yes,
2,David,F,N,10,17,No,
3,Holmes,B,Y,75,71,No,
4,Marvin,E,N,20,30,No,


<h1>Data preparation</h1>

In [25]:
features_name = ['OverallGrade','Obedient','ResearchScore','ProjectScore']
training_feature= df[features_name]
outcomes_names = ['Recommend']
outcome_label = df[outcomes_names]

In [26]:
training_feature

Unnamed: 0,OverallGrade,Obedient,ResearchScore,ProjectScore
0,A,Y,90,85
1,C,N,85,51
2,F,N,10,17
3,B,Y,75,71
4,E,N,20,30
5,A,Y,92,79
6,B,Y,60,59
7,C,Y,75,33


In [27]:
outcome_label

Unnamed: 0,Recommend
0,Yes
1,Yes
2,No
3,No
4,No
5,Yes
6,No
7,No


<h1>List down the data based on type</h1>

In [28]:
numeric_columns_name= ['ResearchScore','ProjectScore']
categorical_column_name = ['OverallGrade','Obedient']

In [30]:
from sklearn.preprocessing import StandardScaler
ss = StandardScaler()

In [31]:
ss.fit(training_feature[numeric_columns_name])
training_feature [numeric_columns_name] = ss.transform(training_feature[numeric_columns_name])
training_feature

Unnamed: 0,OverallGrade,Obedient,ResearchScore,ProjectScore
0,A,Y,0.899583,1.37665
1,C,N,0.730648,-0.091777
2,F,N,-1.80339,-1.560203
3,B,Y,0.392776,0.772004
4,E,N,-1.465519,-0.998746
5,A,Y,0.967158,1.117516
6,B,Y,-0.114032,0.253735
7,C,Y,0.392776,-0.869179


In [32]:
training_feature = pd.get_dummies(training_feature,columns=categorical_column_name)
training_feature

Unnamed: 0,ResearchScore,ProjectScore,OverallGrade_A,OverallGrade_B,OverallGrade_C,OverallGrade_E,OverallGrade_F,Obedient_N,Obedient_Y
0,0.899583,1.37665,1,0,0,0,0,0,1
1,0.730648,-0.091777,0,0,1,0,0,1,0
2,-1.80339,-1.560203,0,0,0,0,1,1,0
3,0.392776,0.772004,0,1,0,0,0,0,1
4,-1.465519,-0.998746,0,0,0,1,0,1,0
5,0.967158,1.117516,1,0,0,0,0,0,1
6,-0.114032,0.253735,0,1,0,0,0,0,1
7,0.392776,-0.869179,0,0,1,0,0,0,1


In [33]:
categorical_engineerd_feature = list(set(training_feature.columns) - set(numeric_columns_name))

<h1>Modeling</h1>

In [34]:
from sklearn.linear_model import LogisticRegression

In [35]:
lr = LogisticRegression()
model = lr.fit(training_feature, np.array(outcome_label['Recommend']))
model



LogisticRegression(C=1.0, class_weight=None, dual=False, fit_intercept=True,
                   intercept_scaling=1, l1_ratio=None, max_iter=100,
                   multi_class='auto', n_jobs=None, penalty='l2',
                   random_state=None, solver='lbfgs', tol=0.0001, verbose=0,
                   warm_start=False)

<h1>Model Evaluation</h1>

In [37]:
pred_labels = model.predict(training_feature)
actual_labels= np.array(outcome_label['Recommend'])


In [39]:
from sklearn.metrics import accuracy_score
from sklearn.metrics import classification_report

print ('Accuracy: ' , float(accuracy_score(actual_labels,pred_labels))*100, '%')
print('Classification Stats: ')
print (classification_report(actual_labels,pred_labels))

Accuracy:  100.0 %
Classification Stats: 
              precision    recall  f1-score   support

          No       1.00      1.00      1.00         5
         Yes       1.00      1.00      1.00         3

    accuracy                           1.00         8
   macro avg       1.00      1.00      1.00         8
weighted avg       1.00      1.00      1.00         8



<h1>Model Deployment</h1>

In [40]:
from sklearn.externals import joblib
import os



In [41]:
if not os.path.exists('Model'):
    os.mkdir('Model')
if not os.path.exists ('Scaler'):
    os.mkdir('Scaler')

In [42]:
joblib.dump(model, r'Model/model.pickle')
joblib.dump(ss, r'Scaler/scaler.pickle')

['Scaler/scaler.pickle']

<h1>Prediction in Action</h1>

In [43]:
model = joblib.load(r'Model/model.pickle')
scaler = joblib.load(r'Scaler/scaler.pickle')

In [44]:
new_data = pd.DataFrame([{'Name':'Muhaimin', 'OverallGrade': 'B', 'Obedient': 'N', 'ResearchScore': '30', 'ProjectScore': '20'},
                         {'Name':'Nouman', 'OverallGrade': 'A', 'Obedient': 'Y', 'ResearchScore': '40', 'ProjectScore': '50'},
                         {'Name':'Hassan', 'OverallGrade': 'C', 'Obedient': 'N', 'ResearchScore': '20', 'ProjectScore': '60'}])

In [45]:
new_data

Unnamed: 0,Name,OverallGrade,Obedient,ResearchScore,ProjectScore
0,Muhaimin,B,N,30,20
1,Nouman,A,Y,40,50
2,Hassan,C,N,20,60


In [46]:
prediction_feature = new_data[features_name]
prediction_feature[numeric_columns_name] = scaler.transform(prediction_feature[numeric_columns_name])

In [47]:
prediction_feature = pd.get_dummies(prediction_feature,columns=categorical_column_name)
prediction_feature

Unnamed: 0,ResearchScore,ProjectScore,OverallGrade_A,OverallGrade_B,OverallGrade_C,Obedient_N,Obedient_Y
0,-1.12765,-1.43064,0,1,0,1,0
1,-0.789775,-0.134966,1,0,0,0,1
2,-1.46552,0.296924,0,0,1,1,0


In [48]:
current_categorical_engineered_feature = set(prediction_feature.columns) - set(numeric_columns_name)
missing_feature= set(categorical_engineerd_feature) - set(current_categorical_engineered_feature)
for feature in missing_feature:
    prediction_feature[feature] = [0] * len(prediction_feature)
    
prediction_feature

Unnamed: 0,ResearchScore,ProjectScore,OverallGrade_A,OverallGrade_B,OverallGrade_C,Obedient_N,Obedient_Y,OverallGrade_F,OverallGrade_E
0,-1.12765,-1.43064,0,1,0,1,0,0,0
1,-0.789775,-0.134966,1,0,0,0,1,0,0
2,-1.46552,0.296924,0,0,1,1,0,0,0


In [49]:
prediction = model.predict(prediction_feature)
new_data['Recommend'] = prediction
new_data

Unnamed: 0,Name,OverallGrade,Obedient,ResearchScore,ProjectScore,Recommend
0,Muhaimin,B,N,30,20,No
1,Nouman,A,Y,40,50,No
2,Hassan,C,N,20,60,No
