In [1]:
import numpy as np
import pandas as pd
from sklearn import preprocessing
from sklearn.preprocessing import StandardScaler
from sklearn.linear_model import LogisticRegression
from sklearn.metrics import accuracy_score 
from sklearn.metrics import classification_report
import joblib
import os

In [2]:
data = pd.read_json("Student_Record.json")
data.head(10)

Unnamed: 0,name,overallgrade,obedient,researchscore,projectscore,recommend
0,John,A,,85.0,92,yes
1,Alice,B,N,75.0,88,
2,Bob,C,Y,,78,no
3,Eva,B,Y,79.0,85,yes
4,David,A,Y,92.0,96,yes
5,Sophia,C,N,70.0,82,no
6,Michael,A,Y,88.0,94,yes
7,Olivia,B,Y,81.0,90,yes
8,William,D,N,62.0,72,no
9,Mia,A,Y,91.0,98,yes


In [3]:
data = data.dropna()
data.head(7)

Unnamed: 0,name,overallgrade,obedient,researchscore,projectscore,recommend
3,Eva,B,Y,79.0,85,yes
4,David,A,Y,92.0,96,yes
5,Sophia,C,N,70.0,82,no
6,Michael,A,Y,88.0,94,yes
7,Olivia,B,Y,81.0,90,yes
8,William,D,N,62.0,72,no
9,Mia,A,Y,91.0,98,yes


In [4]:
X = data[[
 'overallgrade',
 'obedient',
 'researchscore',
 'projectscore']]
X

Unnamed: 0,overallgrade,obedient,researchscore,projectscore
3,B,Y,79.0,85
4,A,Y,92.0,96
5,C,N,70.0,82
6,A,Y,88.0,94
7,B,Y,81.0,90
8,D,N,62.0,72
9,A,Y,91.0,98


In [5]:
Y = data["recommend"]

In [6]:
ss = StandardScaler()
ss.fit(data[['researchscore','projectscore']])
data[['researchscore','projectscore']] = ss.transform(data[['researchscore','projectscore']])
data[['researchscore','projectscore']]

Unnamed: 0,researchscore,projectscore
3,-0.137673,-0.370075
4,1.115148,0.925187
5,-1.00501,-0.723328
6,0.729665,0.689685
7,0.055069,0.218681
8,-1.775977,-1.900838
9,1.018777,1.160689


In [7]:
features = pd.get_dummies(X, 
                          columns=['overallgrade','obedient'])
features

Unnamed: 0,researchscore,projectscore,overallgrade_A,overallgrade_B,overallgrade_C,overallgrade_D,obedient_N,obedient_Y
3,79.0,85,0,1,0,0,0,1
4,92.0,96,1,0,0,0,0,1
5,70.0,82,0,0,1,0,1,0
6,88.0,94,1,0,0,0,0,1
7,81.0,90,0,1,0,0,0,1
8,62.0,72,0,0,0,1,1,0
9,91.0,98,1,0,0,0,0,1


In [8]:
lr = LogisticRegression()

lr.fit(features,Y)

In [9]:
predictedLabels = lr.predict(features)

In [10]:
predictedLabels

array(['yes', 'yes', 'no', 'yes', 'yes', 'no', 'yes'], dtype=object)

In [11]:
Y

3    yes
4    yes
5     no
6    yes
7    yes
8     no
9    yes
Name: recommend, dtype: object

In [12]:
acc_score = accuracy_score(Y, predictedLabels)*100
print(f'Accuracy Score: {acc_score} %')

print('Classification Report:')
print(classification_report(Y, predictedLabels))

Accuracy Score: 100.0 %
Classification Report:
              precision    recall  f1-score   support

          no       1.00      1.00      1.00         2
         yes       1.00      1.00      1.00         5

    accuracy                           1.00         7
   macro avg       1.00      1.00      1.00         7
weighted avg       1.00      1.00      1.00         7



In [13]:
if not os.path.exists('Models'):
    os.mkdir('Models')

if not os.path.exists('Scalers'):
    os.mkdir('Scalers')
    
joblib.dump(lr, r'Models/model.pickle')
joblib.dump(ss, r'Scalers/scaler.pickle')


['Scalers/scaler.pickle']