In [47]:
import pandas as pd
import numpy as np
from sklearn.model_selection import train_test_split
from sklearn.preprocessing import LabelEncoder, StandardScaler
from sklearn.ensemble import RandomForestClassifier
from sklearn.metrics import accuracy_score, classification_report

# Load dataset
df = pd.read_csv("student_performance_dataset.csv")

# Drop Student_ID since it's not a predictor
df.drop(columns=["Student_ID"], inplace=True)

# Encode categorical features
label_encoders = {}
categorical_columns = ["Gender", "Parental_Education_Level", "Extra_Curricular_Activities", "Internet_Access_at_Home", "Socioeconomic_Status", "Health_Issues"]

for col in categorical_columns:
    le = LabelEncoder()
    df[col] = le.fit_transform(df[col])
    label_encoders[col] = le

# Encode target variable
le_target = LabelEncoder()
df["Final_Performance"] = le_target.fit_transform(df["Final_Performance"])

# Split features and target
X = df.drop(columns=["Final_Performance"])
y = df["Final_Performance"]

# Split into train and test sets
X_train, X_test, y_train, y_test = train_test_split(X, y, test_size=0.2, random_state=42)

# Standardize numerical features
scaler = StandardScaler()
X_train = scaler.fit_transform(X_train)
X_test = scaler.transform(X_test)

# Train Random Forest Classifier
model = RandomForestClassifier(n_estimators=100, random_state=42)
model.fit(X_train, y_train)

# Predict on test data
y_pred = model.predict(X_test)

# Evaluate model
accuracy = accuracy_score(y_test, y_pred)
print(f"Accuracy: {accuracy:.2f}")
print("Classification Report:")
print(classification_report(y_test, y_pred))


Accuracy: 0.99
Classification Report:
              precision    recall  f1-score   support

           0       0.86      1.00      0.92         6
           1       1.00      1.00      1.00        70
           2       1.00      0.96      0.98        24

    accuracy                           0.99       100
   macro avg       0.95      0.99      0.97       100
weighted avg       0.99      0.99      0.99       100



In [52]:
# Simple Prediction Function
def predict_performance(input_data):
    input_df = pd.DataFrame([input_data], columns=X.columns)
    for col in categorical_columns:
        input_df[col] = label_encoders[col].transform(input_df[col])
    input_df = scaler.transform(input_df)
    prediction = model.predict(input_df)
    return le_target.inverse_transform(prediction)[0]

# Example Usage
example_input = {
    "Age": 20,
    "Gender": "Male",
    "Study_Hours_per_Week": 0,
    "Previous_Scores": 75,
    "Class_Participation": 60,
    "Attendance_Rate": 95,
    "Parental_Education_Level": "Higher",
    "Extra_Curricular_Activities": "Yes",
    "Internet_Access_at_Home": "Yes",
    "Socioeconomic_Status": "Medium",
    "Health_Issues": "No"
}

predicted_performance = predict_performance(example_input)
print(f"Predicted Performance: {predicted_performance}")


Predicted Performance: Low


In [20]:
import pandas as pd
import numpy as np
from sklearn.model_selection import train_test_split
from sklearn.preprocessing import LabelEncoder, StandardScaler
from sklearn.ensemble import RandomForestClassifier
from sklearn.metrics import accuracy_score, classification_report

# Load dataset
df = pd.read_csv("student_performance_datasets.csv")

# Drop Student_ID since it's not a predictor
df.drop(columns=["Student_ID"], inplace=True)

# Encode categorical features
label_encoders = {}
categorical_columns = [
    "Gender", "Parental_Education_Level", "Extra_Curricular_Activities", "Internet_Access_at_Home", 
    "Socioeconomic_Status", "Health_Issues", "Motivation_Level", "Peer_Influence", "Time_Management_Skills",
    "Family_Support", "Stress_Level", "Learning_Style"
]

for col in categorical_columns:
    le = LabelEncoder()
    df[col] = le.fit_transform(df[col])
    label_encoders[col] = le

# Encode target variable
le_target = LabelEncoder()
df["Final_Performance"] = le_target.fit_transform(df["Final_Performance"])

# Split features and target
X = df.drop(columns=["Final_Performance"])
y = df["Final_Performance"]

# Split into train and test sets
X_train, X_test, y_train, y_test = train_test_split(X, y, test_size=0.2, random_state=42)

# Standardize numerical features
scaler = StandardScaler()
X_train = scaler.fit_transform(X_train)
X_test = scaler.transform(X_test)

# Train Random Forest Classifier
model = RandomForestClassifier(n_estimators=100, random_state=42)
model.fit(X_train, y_train)

# Predict on test data
y_pred = model.predict(X_test)

# Evaluate model
accuracy = accuracy_score(y_test, y_pred)
print(f"Accuracy: {accuracy:.2f}")
print("Classification Report:")
print(classification_report(y_test, y_pred))

# Simple Prediction Function
def predict_performance(input_data):
    input_df = pd.DataFrame([input_data], columns=X.columns)
    for col in categorical_columns:
        input_df[col] = label_encoders[col].transform(input_df[col])
    input_df = scaler.transform(input_df)
    prediction = model.predict(input_df)
    return le_target.inverse_transform(prediction)[0]

# Example Usage
example_input = {
    "Age": 20,
    "Gender": "Male",
    "Study_Hours_per_Week": 25,
    "Previous_Scores": 85,
    "Class_Participation": 90,
    "Attendance_Rate": 95,
    "Parental_Education_Level": "Higher",
    "Extra_Curricular_Activities": "Yes",
    "Internet_Access_at_Home": "Yes",
    "Socioeconomic_Status": "Medium",
    "Health_Issues": "No",
    "Motivation_Level": "High",
    "Peer_Influence": "Positive",
    "Time_Management_Skills": "Good",
    "Family_Support": "Strong",
    "Stress_Level": "Low",
    "Learning_Style": "Visual"
}

predicted_performance = predict_performance(example_input)
print(f"Predicted Performance: {predicted_performance}")


Accuracy: 0.24
Classification Report:
              precision    recall  f1-score   support

           0       0.21      0.23      0.22        30
           1       0.36      0.15      0.21        27
           2       0.17      0.32      0.22        22
           3       0.43      0.29      0.34        21

    accuracy                           0.24       100
   macro avg       0.29      0.25      0.25       100
weighted avg       0.29      0.24      0.24       100

Predicted Performance: Good


In [46]:
import pandas as pd
import numpy as np
from sklearn.model_selection import train_test_split
from sklearn.preprocessing import LabelEncoder, StandardScaler
from sklearn.ensemble import RandomForestClassifier
from sklearn.metrics import accuracy_score, classification_report

# Load dataset
df = pd.read_csv("student_performance_datasetdf.csv")

# Drop Student_ID since it's not a predictor

df.drop(columns=["Student_ID"], inplace=True)

# Encode categorical features
label_encoders = {}
categorical_columns = ["Gender", "Parental_Education_Level", 
                       "Extra_Curricular_Activities", "Internet_Access_at_Home", 
                       "Socioeconomic_Status", "Health_Issues", 
                      "Previous_GPA", "Peer_Influence","Time_Management_Skills",
                       "Family_Support", "Stress_Level"]

for col in categorical_columns:
    le = LabelEncoder()
    df[col] = le.fit_transform(df[col])
    label_encoders[col] = le

# Encode target variable
le_target = LabelEncoder()
df["Final_Performance"] = le_target.fit_transform(df["Final_Performance"])

# Split features and target
X = df.drop(columns=["Final_Performance"])
y = df["Final_Performance"]


# Split into train and test sets
X_train, X_test, y_train, y_test = train_test_split(X, y, test_size=0.3, random_state=42)

# Standardize numerical features
scaler = StandardScaler()
X_train = scaler.fit_transform(X_train)
X_test = scaler.transform(X_test)

# Train Random Forest Classifier
model = RandomForestClassifier(n_estimators=100, random_state=42)
model.fit(X_train, y_train)

# Predict on test data
y_pred = model.predict(X_test)

# Evaluate model
accuracy = accuracy_score(y_test, y_pred)
print(f"Accuracy: {accuracy:.2f}")
print("Classification Report:")
print(classification_report(y_test, y_pred))

# Example Usage
example_input = {
    "Age": 20,
    "Gender": "Male",
    "Study_Hours_per_Week": 25,
    "Previous_Scores": 15,
    "Class_Participation": 90,
    "Attendance_Rate": 15,
    "Parental_Education_Level": "Higher",
    "Extra_Curricular_Activities": "Yes",
    "Internet_Access_at_Home": "Yes",
    "Socioeconomic_Status": "Medium",
    "Health_Issues": "No",
    "Previous_GPA": 3.5,
    "Motivation_Level": "High",
    "Peer_Influence": "Positive",
    "Time_Management_Skills": "Good",
    "Family_Support": "Strong"
    
}

predicted_performance = predict_performance(example_input)
print(f"Predicted Performance: {predicted_performance}")


Accuracy: 0.95
Classification Report:
              precision    recall  f1-score   support

           0       1.00      0.80      0.89        10
           1       0.94      1.00      0.97       103
           2       0.94      0.84      0.89        37

    accuracy                           0.95       150
   macro avg       0.96      0.88      0.92       150
weighted avg       0.95      0.95      0.94       150



ValueError: y contains previously unseen labels: [3.7704777574151302]