In [16]:
import numpy as np
import pandas as pd
from sklearn import preprocessing
from sklearn.preprocessing import StandardScaler
from sklearn.linear_model import LogisticRegression
from sklearn.metrics import accuracy_score 
from sklearn.metrics import classification_report
import joblib
import os

In [17]:
data = pd.read_csv("Students_Records.csv")
data.head()

Unnamed: 0,Name,OverAllGrade,Obedient,ResearchScore,ProjectScore,Recommend
0,Ahmed,A,Y,,85,Yes
1,Zahid,C,N,85.0,51,Yes
2,Amjad,F,N,10.0,17,No
3,Tamoor,,Y,75.0,71,No
4,Zahoor,E,N,20.0,30,No


In [18]:
data.shape

(8, 6)

In [19]:
data_columns = data.columns.values.tolist()

In [20]:
data.dtypes

Name              object
OverAllGrade      object
Obedient          object
ResearchScore    float64
ProjectScore       int64
Recommend         object
dtype: object

In [21]:
data.columns[data.isnull().any()].to_list()

['OverAllGrade', 'ResearchScore']

In [22]:
data.loc[:,data.isnull().any()]


Unnamed: 0,OverAllGrade,ResearchScore
0,A,
1,C,85.0
2,F,10.0
3,,75.0
4,E,20.0
5,A,92.0
6,B,60.0
7,C,75.0


In [23]:
len(pd.isnull(data).any(1)==True)

  len(pd.isnull(data).any(1)==True)


8

In [24]:
row_index = data.loc[data.isnull().any(axis=1)]
row_index

Unnamed: 0,Name,OverAllGrade,Obedient,ResearchScore,ProjectScore,Recommend
0,Ahmed,A,Y,,85,Yes
3,Tamoor,,Y,75.0,71,No


In [25]:
data.info

<bound method DataFrame.info of      Name OverAllGrade Obedient  ResearchScore  ProjectScore Recommend
0   Ahmed            A        Y            NaN            85       Yes
1   Zahid            C        N           85.0            51       Yes
2   Amjad            F        N           10.0            17        No
3  Tamoor          NaN        Y           75.0            71        No
4  Zahoor            E        N           20.0            30        No
5   Afzal            A        Y           92.0            79       Yes
6   Akmal            B        Y           60.0            59        No
7   Sajid            C        Y           75.0            33        No>

In [26]:
data.describe

<bound method NDFrame.describe of      Name OverAllGrade Obedient  ResearchScore  ProjectScore Recommend
0   Ahmed            A        Y            NaN            85       Yes
1   Zahid            C        N           85.0            51       Yes
2   Amjad            F        N           10.0            17        No
3  Tamoor          NaN        Y           75.0            71        No
4  Zahoor            E        N           20.0            30        No
5   Afzal            A        Y           92.0            79       Yes
6   Akmal            B        Y           60.0            59        No
7   Sajid            C        Y           75.0            33        No>

In [27]:
data = data.dropna()
data.head()

Unnamed: 0,Name,OverAllGrade,Obedient,ResearchScore,ProjectScore,Recommend
1,Zahid,C,N,85.0,51,Yes
2,Amjad,F,N,10.0,17,No
4,Zahoor,E,N,20.0,30,No
5,Afzal,A,Y,92.0,79,Yes
6,Akmal,B,Y,60.0,59,No


In [28]:
data_columns = data.columns.values.tolist()
data_columns

['Name',
 'OverAllGrade',
 'Obedient',
 'ResearchScore',
 'ProjectScore',
 'Recommend']

In [29]:
column_names = {col: col.lower().replace(" ","_") for col in data_columns}
column_names

{'Name': 'name',
 'OverAllGrade': 'overallgrade',
 'Obedient': 'obedient',
 'ResearchScore': 'researchscore',
 'ProjectScore': 'projectscore',
 'Recommend': 'recommend'}

In [30]:
data.rename(columns= column_names, inplace= True)
data.head()

A value is trying to be set on a copy of a slice from a DataFrame

See the caveats in the documentation: https://pandas.pydata.org/pandas-docs/stable/user_guide/indexing.html#returning-a-view-versus-a-copy
  data.rename(columns= column_names, inplace= True)


Unnamed: 0,name,overallgrade,obedient,researchscore,projectscore,recommend
1,Zahid,C,N,85.0,51,Yes
2,Amjad,F,N,10.0,17,No
4,Zahoor,E,N,20.0,30,No
5,Afzal,A,Y,92.0,79,Yes
6,Akmal,B,Y,60.0,59,No


In [31]:
data_columns = data.columns.values.tolist()
data_columns

['name',
 'overallgrade',
 'obedient',
 'researchscore',
 'projectscore',
 'recommend']

In [33]:
X = data[[
 'overallgrade',
 'obedient',
 'researchscore',
 'projectscore']]
X

Unnamed: 0,overallgrade,obedient,researchscore,projectscore
1,C,N,85.0,51
2,F,N,10.0,17
4,E,N,20.0,30
5,A,Y,92.0,79
6,B,Y,60.0,59
7,C,Y,75.0,33


In [34]:
Y = data["recommend"]

In [35]:
ss = StandardScaler()
ss.fit(data[['researchscore','projectscore']])
data[['researchscore','projectscore']] = ss.transform(data[['researchscore','projectscore']])
data[['researchscore','projectscore']]

A value is trying to be set on a copy of a slice from a DataFrame.
Try using .loc[row_indexer,col_indexer] = value instead

See the caveats in the documentation: https://pandas.pydata.org/pandas-docs/stable/user_guide/indexing.html#returning-a-view-versus-a-copy
  data[['researchscore','projectscore']] = ss.transform(data[['researchscore','projectscore']])


Unnamed: 0,researchscore,projectscore
1,0.8914,0.299666
2,-1.496279,-1.352548
4,-1.177922,-0.720819
5,1.11425,1.660314
6,0.095507,0.688423
7,0.573043,-0.575035


In [36]:
features = pd.get_dummies(X, 
                          columns=['overallgrade','obedient'])
features

Unnamed: 0,researchscore,projectscore,overallgrade_A,overallgrade_B,overallgrade_C,overallgrade_E,overallgrade_F,obedient_N,obedient_Y
1,85.0,51,0,0,1,0,0,1,0
2,10.0,17,0,0,0,0,1,1,0
4,20.0,30,0,0,0,1,0,1,0
5,92.0,79,1,0,0,0,0,0,1
6,60.0,59,0,1,0,0,0,0,1
7,75.0,33,0,0,1,0,0,0,1


In [37]:
lr = LogisticRegression()

lr.fit(features,Y)

In [38]:
predictedLabels = lr.predict(features)

In [39]:
predictedLabels

array(['Yes', 'No', 'No', 'Yes', 'No', 'No'], dtype=object)

In [40]:
Y

1    Yes
2     No
4     No
5    Yes
6     No
7     No
Name: recommend, dtype: object

In [41]:
acc_score = accuracy_score(Y, predictedLabels)*100
print(f'Accuracy Score: {acc_score} %')

print('Classification Report:')
print(classification_report(Y, predictedLabels))

Accuracy Score: 100.0 %
Classification Report:
              precision    recall  f1-score   support

          No       1.00      1.00      1.00         4
         Yes       1.00      1.00      1.00         2

    accuracy                           1.00         6
   macro avg       1.00      1.00      1.00         6
weighted avg       1.00      1.00      1.00         6



In [42]:
if not os.path.exists('Models'):
    os.mkdir('Models')

if not os.path.exists('Scalers'):
    os.mkdir('Scalers')
    
joblib.dump(lr, r'Models/model.pickle')
joblib.dump(ss, r'Scalers/scaler.pickle')


['Scalers/scaler.pickle']