In [34]:
import pandas as pd

df = pd.read_csv('/StudentPerformanceFactors.csv')
print(df.shape)
print(df.head())

(6607, 20)
   Hours_Studied  Attendance Parental_Involvement Access_to_Resources  \
0             23          84                  Low                High   
1             19          64                  Low              Medium   
2             24          98               Medium              Medium   
3             29          89                  Low              Medium   
4             19          92               Medium              Medium   

  Extracurricular_Activities  Sleep_Hours  Previous_Scores Motivation_Level  \
0                         No            7               73              Low   
1                         No            8               59              Low   
2                        Yes            7               91           Medium   
3                        Yes            8               98           Medium   
4                        Yes            6               65           Medium   

  Internet_Access  Tutoring_Sessions Family_Income Teacher_Quality  \
0    

In [37]:
# =========================
# AI PROJECT: Student Pass/Fail Prediction
# =========================

# 1. Import Libraries
import pandas as pd
import numpy as np

from sklearn.model_selection import train_test_split
from sklearn.linear_model import LogisticRegression
from sklearn.metrics import confusion_matrix, accuracy_score, precision_score, recall_score, f1_score

# 2. Load Dataset
# IMPORTANT: Check the filename in the Files panel (left side)
df = pd.read_csv('/StudentPerformanceFactors.csv')

print("Dataset loaded successfully")
print(df.head())

# 3. Dataset info
print("\nDataset Info:")
print(df.info())

# 4. Handle Missing Values
df.fillna(df.mean(numeric_only=True), inplace=True)

# 5. CHECK if Exam_Score column exists
print("\nColumns are:")
print(df.columns)

# 6. Create Pass/Fail column
# If Exam_Score name is different, CHANGE it here
df['Pass'] = df['Exam_Score'].apply(lambda x: 1 if x >= 70 else 0) # Changed threshold to 70

# 7. Separate Features and Target
X = df.drop(['Exam_Score', 'Pass'], axis=1)
y = df['Pass']

# 8. Convert categorical columns to numeric
X = pd.get_dummies(X, drop_first=True)

# 9. Train-test split
X_train, X_test, y_train, y_test = train_test_split(
    X, y, test_size=0.2, random_state=42,
    stratify=y # Added stratify to ensure both classes are represented in train and test sets
)

# 10. Train model
model = LogisticRegression(max_iter=1000)
model.fit(X_train, y_train)

# 11. Predictions
y_pred = model.predict(X_test)

# 12. Evaluation
cm = confusion_matrix(y_test, y_pred)
accuracy = accuracy_score(y_test, y_pred)
precision = precision_score(y_test, y_pred)
recall = recall_score(y_test, y_pred)
f1 = f1_score(y_test, y_pred)

print("\nConfusion Matrix:\n", cm)
print("Accuracy:", accuracy)
print("Precision:", precision)
print("Recall:", recall)
print("F1 Score:", f1)

Dataset loaded successfully
   Hours_Studied  Attendance Parental_Involvement Access_to_Resources  \
0             23          84                  Low                High   
1             19          64                  Low              Medium   
2             24          98               Medium              Medium   
3             29          89                  Low              Medium   
4             19          92               Medium              Medium   

  Extracurricular_Activities  Sleep_Hours  Previous_Scores Motivation_Level  \
0                         No            7               73              Low   
1                         No            8               59              Low   
2                        Yes            7               91           Medium   
3                        Yes            8               98           Medium   
4                        Yes            6               65           Medium   

  Internet_Access  Tutoring_Sessions Family_Income Teacher