In [1]:
import numpy as np
import pandas as pd
from sklearn import preprocessing
from sklearn.preprocessing import StandardScaler
from sklearn.linear_model import LogisticRegression
from sklearn.metrics import accuracy_score 
from sklearn.metrics import classification_report
import joblib
import os

In [2]:
data = pd.read_xml("Student_Record.xml")
data.head(10)

Unnamed: 0,name,overallgrade,obedient,researchscore,projectscore,recommend
0,John,A,Y,85.0,92,yes
1,Alice,B,N,75.0,88,no
2,Bob,C,Y,,78,no
3,Eva,B,,79.0,85,yes
4,David,A,Y,92.0,96,yes


In [3]:
data = data.dropna()
data.head(7)

Unnamed: 0,name,overallgrade,obedient,researchscore,projectscore,recommend
0,John,A,Y,85.0,92,yes
1,Alice,B,N,75.0,88,no
4,David,A,Y,92.0,96,yes


In [4]:
X = data[[
 'overallgrade',
 'obedient',
 'researchscore',
 'projectscore']]
X

Unnamed: 0,overallgrade,obedient,researchscore,projectscore
0,A,Y,85.0,92
1,B,N,75.0,88
4,A,Y,92.0,96


In [5]:
Y = data["recommend"]

In [6]:
ss = StandardScaler()
ss.fit(data[['researchscore','projectscore']])
data[['researchscore','projectscore']] = ss.transform(data[['researchscore','projectscore']])
data[['researchscore','projectscore']]

Unnamed: 0,researchscore,projectscore
0,0.143346,0.0
1,-1.29011,-1.224745
4,1.146764,1.224745


In [7]:
features = pd.get_dummies(X, 
                          columns=['overallgrade','obedient'])
features

Unnamed: 0,researchscore,projectscore,overallgrade_A,overallgrade_B,obedient_N,obedient_Y
0,85.0,92,1,0,0,1
1,75.0,88,0,1,1,0
4,92.0,96,1,0,0,1


In [8]:
lr = LogisticRegression()

lr.fit(features,Y)

In [9]:
predictedLabels = lr.predict(features)

In [10]:
predictedLabels

array(['yes', 'no', 'yes'], dtype=object)

In [11]:
Y

0    yes
1     no
4    yes
Name: recommend, dtype: object

In [12]:
acc_score = accuracy_score(Y, predictedLabels)*100
print(f'Accuracy Score: {acc_score} %')

print('Classification Report:')
print(classification_report(Y, predictedLabels))

Accuracy Score: 100.0 %
Classification Report:
              precision    recall  f1-score   support

          no       1.00      1.00      1.00         1
         yes       1.00      1.00      1.00         2

    accuracy                           1.00         3
   macro avg       1.00      1.00      1.00         3
weighted avg       1.00      1.00      1.00         3



In [13]:
if not os.path.exists('Models'):
    os.mkdir('Models')

if not os.path.exists('Scalers'):
    os.mkdir('Scalers')
    
joblib.dump(lr, r'Models/model.pickle')
joblib.dump(ss, r'Scalers/scaler.pickle')


['Scalers/scaler.pickle']