# Student Reccommender Pipeline Example

In [2]:
# Data retrieval
import pandas as pd

pd.options.mode.chained_assignment = None
df = pd.read_csv('../../xdata/student_records.csv')
df

Unnamed: 0,Name,OverallGrade,Obedient,ResearchScore,ProjectScore,Recommend
0,Henry,A,Y,90,85,Yes
1,John,C,N,85,51,Yes
2,David,F,N,10,17,No
3,Holmes,B,Y,75,71,No
4,Marvin,E,N,20,30,No
5,Simon,A,Y,92,79,Yes
6,Robert,B,Y,60,59,No
7,Trent,C,Y,75,33,No


Data preparation

In [2]:
# Extract features
feature_names = ['OverallGrade', 'Obedient', 'ResearchScore','ProjectScore']
training_features = df[feature_names]

outcome_name = ['Recommend']
outcome_labels = df[outcome_name]

In [3]:
# View features
training_features

Unnamed: 0,OverallGrade,Obedient,ResearchScore,ProjectScore
0,A,Y,90,85
1,C,N,85,51
2,F,N,10,17
3,B,Y,75,71
4,E,N,20,30
5,A,Y,92,79
6,B,Y,60,59
7,C,Y,75,33


In [4]:
# Outcome labels
outcome_labels

Unnamed: 0,Recommend
0,Yes
1,Yes
2,No
3,No
4,No
5,Yes
6,No
7,No


In [5]:
# List features based on type
numeric_feature_names = ['ResearchScore','ProjectScore']
categorical_feature_names = ['OverallGrade', 'Obedient']

Feature Scaling

In [6]:
# Numeric feature scaling
from sklearn.preprocessing import StandardScaler

ss = StandardScaler()

# fit scaler on numeric features
ss.fit(training_features[numeric_feature_names])

# Scale numeric features
training_features[numeric_feature_names] = ss.transform(training_features[numeric_feature_names])

# View updated feature set
training_features

Unnamed: 0,OverallGrade,Obedient,ResearchScore,ProjectScore
0,A,Y,0.899583,1.37665
1,C,N,0.730648,-0.091777
2,F,N,-1.80339,-1.560203
3,B,Y,0.392776,0.772004
4,E,N,-1.465519,-0.998746
5,A,Y,0.967158,1.117516
6,B,Y,-0.114032,0.253735
7,C,Y,0.392776,-0.869179


In [7]:
# Engineer categorical features
training_features = pd.get_dummies(training_features, columns=categorical_feature_names)

# view newly engineering features
training_features

Unnamed: 0,ResearchScore,ProjectScore,OverallGrade_A,OverallGrade_B,OverallGrade_C,OverallGrade_E,OverallGrade_F,Obedient_N,Obedient_Y
0,0.899583,1.37665,1,0,0,0,0,0,1
1,0.730648,-0.091777,0,0,1,0,0,1,0
2,-1.80339,-1.560203,0,0,0,0,1,1,0
3,0.392776,0.772004,0,1,0,0,0,0,1
4,-1.465519,-0.998746,0,0,0,1,0,1,0
5,0.967158,1.117516,1,0,0,0,0,0,1
6,-0.114032,0.253735,0,1,0,0,0,0,1
7,0.392776,-0.869179,0,0,1,0,0,0,1


In [8]:
# List gategorical features
categorical_engineered_features = list(set(training_features.columns) - set(numeric_feature_names))


Modelling

In [9]:
from sklearn.linear_model import LogisticRegression
import numpy as np

# fit the model
lr = LogisticRegression()
model = lr.fit(training_features, np.array(outcome_labels['Recommend']))

# view model parameters
model

LogisticRegression()

Model Evaluation

In [10]:
# Evaluate training data
pred_labels = model.predict(training_features)
actual_labels = np.array(outcome_labels['Recommend'])

# Evaluate model performance
from sklearn.metrics import accuracy_score
from sklearn.metrics import classification_report

print('Acuracy: ', float(accuracy_score(actual_labels, pred_labels))*100, '%')
print('Classification Stats: ')
print(classification_report(actual_labels, pred_labels))

Acuracy:  100.0 %
Classification Stats: 
              precision    recall  f1-score   support

          No       1.00      1.00      1.00         5
         Yes       1.00      1.00      1.00         3

    accuracy                           1.00         8
   macro avg       1.00      1.00      1.00         8
weighted avg       1.00      1.00      1.00         8



Model Deployment

In [11]:
# Deploy model
import sklearn.externals
import os