In [1]:
import numpy as np
import pandas as pd
from sklearn import preprocessing
from sklearn.preprocessing import StandardScaler
from sklearn.linear_model import LogisticRegression
from sklearn.metrics import accuracy_score 
from sklearn.metrics import classification_report
import joblib
import os

In [2]:
data = pd.read_csv('Students_Records.csv', encoding='utf-8')
data.head()

Unnamed: 0,Name,OverAllGrade,Obedient,ResearchScore,ProjectScore,Recommend
0,Ahmed,A,Y,90,85,Yes
1,Zahid,C,N,85,51,Yes
2,Amjad,F,N,10,17,No
3,Tamoor,B,Y,75,71,No
4,Zahoor,E,N,20,30,No


In [3]:
encoder = preprocessing.LabelEncoder() 
data['OverAllGrade']= encoder.fit_transform(data['OverAllGrade'])
data

Unnamed: 0,Name,OverAllGrade,Obedient,ResearchScore,ProjectScore,Recommend
0,Ahmed,0,Y,90,85,Yes
1,Zahid,2,N,85,51,Yes
2,Amjad,4,N,10,17,No
3,Tamoor,1,Y,75,71,No
4,Zahoor,3,N,20,30,No
5,Afzal,0,Y,92,79,Yes
6,Akmal,1,Y,60,59,No
7,Sajid,2,Y,75,33,No


In [4]:
data['Obedient']= encoder.fit_transform(data['Obedient'])
data

Unnamed: 0,Name,OverAllGrade,Obedient,ResearchScore,ProjectScore,Recommend
0,Ahmed,0,1,90,85,Yes
1,Zahid,2,0,85,51,Yes
2,Amjad,4,0,10,17,No
3,Tamoor,1,1,75,71,No
4,Zahoor,3,0,20,30,No
5,Afzal,0,1,92,79,Yes
6,Akmal,1,1,60,59,No
7,Sajid,2,1,75,33,No


In [5]:
Features = data[['OverAllGrade','Obedient','ProjectScore','ResearchScore']]
Features

Unnamed: 0,OverAllGrade,Obedient,ProjectScore,ResearchScore
0,0,1,85,90
1,2,0,51,85
2,4,0,17,10
3,1,1,71,75
4,3,0,30,20
5,0,1,79,92
6,1,1,59,60
7,2,1,33,75


In [6]:
Labels = data['Recommend']
Labels

0    Yes
1    Yes
2     No
3     No
4     No
5    Yes
6     No
7     No
Name: Recommend, dtype: object

In [7]:
ss = StandardScaler()
ss.fit(Features[['ResearchScore','ProjectScore']])
Features[['ResearchScore','ProjectScore']] = ss.transform(Features[['ResearchScore','ProjectScore']])
Features[['ResearchScore','ProjectScore']]

A value is trying to be set on a copy of a slice from a DataFrame.
Try using .loc[row_indexer,col_indexer] = value instead

See the caveats in the documentation: https://pandas.pydata.org/pandas-docs/stable/user_guide/indexing.html#returning-a-view-versus-a-copy
  Features[['ResearchScore','ProjectScore']] = ss.transform(Features[['ResearchScore','ProjectScore']])


Unnamed: 0,ResearchScore,ProjectScore
0,0.899583,1.37665
1,0.730648,-0.091777
2,-1.80339,-1.560203
3,0.392776,0.772004
4,-1.465519,-0.998746
5,0.967158,1.117516
6,-0.114032,0.253735
7,0.392776,-0.869179


In [8]:
Features

Unnamed: 0,OverAllGrade,Obedient,ProjectScore,ResearchScore
0,0,1,1.37665,0.899583
1,2,0,-0.091777,0.730648
2,4,0,-1.560203,-1.80339
3,1,1,0.772004,0.392776
4,3,0,-0.998746,-1.465519
5,0,1,1.117516,0.967158
6,1,1,0.253735,-0.114032
7,2,1,-0.869179,0.392776


In [9]:
# PHase 5: Modeling 
lr = LogisticRegression()

# Training / Learning of ML Method 

lr.fit(Features, Labels)

0    Yes
1    Yes
2     No
3     No
4     No
5    Yes
6     No
7     No
Name: Recommend, dtype: object

In [10]:
predictedLabels = lr.predict(Features)
predictedLabels

array(['Yes', 'No', 'No', 'No', 'No', 'Yes', 'No', 'No'], dtype=object)

In [11]:
Labels

0    Yes
1    Yes
2     No
3     No
4     No
5    Yes
6     No
7     No
Name: Recommend, dtype: object

In [12]:
# Phase 6: Model Evaluation 


acc_score = accuracy_score(Labels, predictedLabels)*100
acc_score




87.5

In [13]:
print(classification_report(Labels, predictedLabels))

              precision    recall  f1-score   support

          No       0.83      1.00      0.91         5
         Yes       1.00      0.67      0.80         3

    accuracy                           0.88         8
   macro avg       0.92      0.83      0.85         8
weighted avg       0.90      0.88      0.87         8



In [14]:
if not os.path.exists('Models'):
    os.mkdir('Models')

if not os.path.exists('Scalers'):
    os.mkdir('Scalers')
    
joblib.dump(lr, r'Models/model.pickle')
joblib.dump(ss, r'Scalers/scaler.pickle')

['Scalers/scaler.pickle']