# Logistic Regression

# *1. Import Libraries*

In [10]:
import pandas as pd
import numpy as np

from sklearn.model_selection import train_test_split
from sklearn.linear_model import LogisticRegression
from sklearn.metrics import accuracy_score, confusion_matrix, classification_report
from sklearn.preprocessing import StandardScaler

# *2. Load the Dataset*

In [11]:
df = pd.read_csv('StudentsPerformance.csv')

# *3. Convert Categorical Variables*

In [12]:
df_dummies = pd.get_dummies(df, drop_first = True)

# *4. Define the Target Variable*

In [13]:
df_dummies['passed_math'] = (df['math score'] > 50).astype(int)

# *5. Select Features*

In [17]:
x = df_dummies.drop(['math score', 'reading score', 'writing score', 'passed_math'], axis = 1)
y = df_dummies['passed_math']

# *6. Split the Data*

In [19]:
x_train, x_test, y_train, y_test = train_test_split(x, y, test_size = 0.2, random_state = 42)

# *7. Standardize the Features*

In [23]:
scaler = StandardScaler()
x_train_scaled = scaler.fit_transform(x_train)
x_test_scaled = scaler.transform(x_test)

# *8. Train the Logistic Regression Model*

In [24]:
model = LogisticRegression()
model.fit(x_train_scaled, y_train)

# *9. Prediction on the Test Set*

In [26]:
y_pred = model.predict(x_test_scaled)

# *10. Evaluate the Model*

In [30]:
accuracy = accuracy_score(y_test, y_pred)
conf_matrix = confusion_matrix(y_test, y_pred)
class_report = classification_report(y_test, y_pred)

report = f"""
Accuracy: {accuracy}

Confusion Matrix:
{conf_matrix}

Classification Report:
{class_report}

"""

print(report)


Accuracy: 0.82

Confusion Matrix: 
[[  3  33]
 [  3 161]]

Classification Report: 
              precision    recall  f1-score   support

           0       0.50      0.08      0.14        36
           1       0.83      0.98      0.90       164

    accuracy                           0.82       200
   macro avg       0.66      0.53      0.52       200
weighted avg       0.77      0.82      0.76       200



