Dataset can be downloaded from https://www.kaggle.com/datasets/amrmaree/student-performance-prediction?resource=download 

In [1]:
import pandas as pd
from sklearn.model_selection import train_test_split
from sklearn.preprocessing import LabelEncoder
from sklearn.ensemble import RandomForestClassifier
from sklearn.metrics import accuracy_score

def main():
    # Load the Data
    data = pd.read_csv("student_performance_dataset.csv")
    
    # Preprocess the Data
    label_encoder = LabelEncoder()
    data['Gender'] = label_encoder.fit_transform(data['Gender'])
    data['Parental_Education_Level'] = label_encoder.fit_transform(data['Parental_Education_Level'])
    data['Internet_Access_at_Home'] = label_encoder.fit_transform(data['Internet_Access_at_Home'])
    data['Extracurricular_Activities'] = label_encoder.fit_transform(data['Extracurricular_Activities'])
    data['Pass_Fail'] = label_encoder.fit_transform(data['Pass_Fail'])
    
    # Split the Data
    X = data.drop(['Student_ID', 'Pass_Fail'], axis=1)
    y = data['Pass_Fail']
    
    X_train, X_test, y_train, y_test = train_test_split(X, y, test_size=0.2, random_state=42)
    
    # Train the Model
    model = RandomForestClassifier(random_state=42)
    model.fit(X_train, y_train)
    
    # Evaluate the Model
    y_pred = model.predict(X_test)
    accuracy = accuracy_score(y_test, y_pred)
    print(f"Accuracy: {accuracy * 100:.2f}%")
    
    # Print some predictions
    predictions_df = pd.DataFrame({'Actual': y_test, 'Predicted': y_pred})
    print(predictions_df.head())

if __name__ == "__main__":
    main()

Accuracy: 100.00%
     Actual  Predicted
120       0          0
247       1          1
324       1          1
204       1          1
603       1          1


In [2]:
import pandas as pd
import cupy as cp
from sklearn.model_selection import train_test_split
from sklearn.preprocessing import LabelEncoder
from sklearn.ensemble import RandomForestClassifier
from sklearn.metrics import accuracy_score

def main():
    # Load the Data
    data = pd.read_csv("student_performance_dataset.csv")
    
    # Preprocess the Data
    label_encoder = LabelEncoder()
    data['Gender'] = label_encoder.fit_transform(data['Gender'])
    data['Parental_Education_Level'] = label_encoder.fit_transform(data['Parental_Education_Level'])
    data['Internet_Access_at_Home'] = label_encoder.fit_transform(data['Internet_Access_at_Home'])
    data['Extracurricular_Activities'] = label_encoder.fit_transform(data['Extracurricular_Activities'])
    data['Pass_Fail'] = label_encoder.fit_transform(data['Pass_Fail'])
    
    # Convert DataFrame to cuPy array
    X = data.drop(['Student_ID', 'Pass_Fail'], axis=1).to_numpy()
    y = data['Pass_Fail'].to_numpy()
    
    X = cp.asarray(X)
    y = cp.asarray(y)
    
    # Split the Data
    X_train, X_test, y_train, y_test = train_test_split(X, y, test_size=0.2, random_state=42)
    
    # Train the Model using CPU-based RandomForest
    model = RandomForestClassifier(random_state=42)
    model.fit(cp.asnumpy(X_train), cp.asnumpy(y_train))
    
    # Evaluate the Model
    y_pred = model.predict(cp.asnumpy(X_test))
    accuracy = accuracy_score(cp.asnumpy(y_test), y_pred)
    print(f"Accuracy: {accuracy * 100:.2f}%")
    
    # Print some predictions
    predictions_df = pd.DataFrame({'Actual': cp.asnumpy(y_test), 'Predicted': y_pred})
    print(predictions_df.head())

if __name__ == "__main__":
    main()


Accuracy: 100.00%
   Actual  Predicted
0       0          0
1       1          1
2       1          1
3       1          1
4       1          1


In [None]:
import pandas as pd
from sklearnex import patch_sklearn
patch_sklearn()  # Apply the patch to scikit-learn
import warnings
from sklearn.model_selection import train_test_split
from sklearn.preprocessing import LabelEncoder
from sklearn.ensemble import RandomForestClassifier
from sklearn.metrics import accuracy_score

def main():
    # suppress warnings
    warnings.filterwarnings("ignore", category=FutureWarning)
    # Load the Data
    data = pd.read_csv("student_performance_dataset.csv")
    
    # Preprocess the Data
    label_encoder = LabelEncoder()
    data['Gender'] = label_encoder.fit_transform(data['Gender'])
    data['Parental_Education_Level'] = label_encoder.fit_transform(data['Parental_Education_Level'])
    data['Internet_Access_at_Home'] = label_encoder.fit_transform(data['Internet_Access_at_Home'])
    data['Extracurricular_Activities'] = label_encoder.fit_transform(data['Extracurricular_Activities'])
    data['Pass_Fail'] = label_encoder.fit_transform(data['Pass_Fail'])
    
    # Split the Data
    X = data.drop(['Student_ID', 'Pass_Fail'], axis=1)
    y = data['Pass_Fail']
    
    X_train, X_test, y_train, y_test = train_test_split(X, y, test_size=0.2, random_state=42)
    
    # Train the Model
    model = RandomForestClassifier(random_state=42)
    model.fit(X_train, y_train)
    
    # Evaluate the Model
    y_pred = model.predict(X_test)
    accuracy = accuracy_score(y_test, y_pred)
    print(f"Accuracy: {accuracy * 100:.2f}%")
    
    # Print some predictions
    predictions_df = pd.DataFrame({'Actual': y_test, 'Predicted': y_pred})
    print(predictions_df.head())

if __name__ == "__main__":
    main()
