In [18]:
import pandas as pd 
import numpy as np 
import seaborn as sns 
import matplotlib.pyplot as plt 

# import machine learning libraries 
from sklearn.metrics import accuracy_score, roc_curve, roc_auc_score, f1_score, recall_score
from sklearn.linear_model import LogisticRegression
from sklearn.ensemble import RandomForestClassifier
from sklearn.tree import DecisionTreeClassifier
from sklearn.preprocessing import MinMaxScaler, MaxAbsScaler, StandardScaler
from sklearn.pipeline import make_pipeline, Pipeline 
from sklearn.model_selection import train_test_split
from sklearn.impute import SimpleImputer
from sklearn.compose import ColumnTransformer

In [7]:
df = pd.read_csv("student performance dataset/student_exam_data.csv")

In [10]:
df.head()

Unnamed: 0,Study Hours,Previous Exam Score,Pass/Fail
0,4.370861,81.889703,0
1,9.556429,72.165782,1
2,7.587945,58.571657,0
3,6.387926,88.827701,1
4,2.404168,81.08387,0


In [9]:
# let's train a machine learning algorithm for this probllem 


Unnamed: 0,Study Hours,Previous Exam Score,Pass/Fail
0,4.370861,81.889703,0
1,9.556429,72.165782,1
2,7.587945,58.571657,0
3,6.387926,88.827701,1
4,2.404168,81.083870,0
...,...,...,...
495,4.180170,45.494924,0
496,6.252905,95.038815,1
497,1.699612,48.209118,0
498,9.769553,97.014241,1


In [14]:
# split the data
features = df.columns.tolist()[:-1]
target = "Pass/Fail"

X_train, X_test, y_train, y_test = train_test_split(df[features], df[target], test_size=0.3, stratify=df[target], random_state=42)

In [28]:
# make the training pipelines 


numeric_pipeline = Pipeline(steps=[
    ("imputer", SimpleImputer(strategy="mean")), 
    ("scaler", MinMaxScaler())
])

preprocessor = ColumnTransformer(transformers=[
    ('num', numeric_pipeline, features)
]) 


model = Pipeline(steps=[
    ("preprocessor", preprocessor), 
    ("classifier", LogisticRegression(random_state=42))
])

In [29]:
model.fit(X_train, y_train)

In [30]:
train_preds = model.predict(X_train)
test_preds = model.predict(X_test)
accuracy_score(test_preds, y_test)

0.9066666666666666

array([[0.01, 0.99],
       [1.  , 0.  ],
       [0.04, 0.96],
       [0.  , 1.  ],
       [0.  , 1.  ],
       [0.03, 0.97],
       [0.  , 1.  ],
       [0.01, 0.99],
       [0.  , 1.  ],
       [1.  , 0.  ],
       [0.01, 0.99],
       [1.  , 0.  ],
       [1.  , 0.  ],
       [0.  , 1.  ],
       [1.  , 0.  ],
       [1.  , 0.  ],
       [0.  , 1.  ],
       [0.92, 0.08],
       [0.99, 0.01],
       [1.  , 0.  ],
       [1.  , 0.  ],
       [1.  , 0.  ],
       [0.  , 1.  ],
       [1.  , 0.  ],
       [1.  , 0.  ],
       [0.02, 0.98],
       [0.03, 0.97],
       [1.  , 0.  ],
       [0.  , 1.  ],
       [1.  , 0.  ],
       [0.99, 0.01],
       [0.01, 0.99],
       [0.98, 0.02],
       [1.  , 0.  ],
       [0.02, 0.98],
       [0.  , 1.  ],
       [1.  , 0.  ],
       [0.99, 0.01],
       [1.  , 0.  ],
       [1.  , 0.  ],
       [0.  , 1.  ],
       [1.  , 0.  ],
       [1.  , 0.  ],
       [0.  , 1.  ],
       [1.  , 0.  ],
       [0.97, 0.03],
       [1.  , 0.  ],
       [1.  ,