Dataset can be downloaded from https://www.kaggle.com/datasets/amrmaree/student-performance-prediction?resource=download 

In [5]:
import warnings
import pandas as pd
import xgboost as xgb
from sklearn.model_selection import train_test_split
from sklearn.metrics import accuracy_score

def optimize_datatypes(data):
    # Convert categorical features to category dtype
    categorical_columns = ['Gender', 'Parental_Education_Level', 'Internet_Access_at_Home', 'Extracurricular_Activities', 'Pass_Fail']
    for col in categorical_columns:
        data[col] = data[col].astype('category')
    
    # Downcast numerical columns
    for col in data.select_dtypes(include='int'):
        data[col] = pd.to_numeric(data[col], downcast='integer')
    for col in data.select_dtypes(include='float'):
        data[col] = pd.to_numeric(data[col], downcast='float')
    
    return data

def mainBareBones():
    # Suppress warnings
    warnings.filterwarnings("ignore", category=FutureWarning)
    
    # Load the Data
    data = pd.read_csv("student_performance_dataset.csv")
    
    # Optimize Data Types
    data = optimize_datatypes(data)
    
    # Encode categorical features
    categorical_columns = ['Gender', 'Parental_Education_Level', 'Internet_Access_at_Home', 'Extracurricular_Activities', 'Pass_Fail']
    data[categorical_columns] = data[categorical_columns].apply(lambda col: col.cat.codes)
    
    # Split the Data
    X = data.drop(['Student_ID', 'Pass_Fail'], axis=1).values  # Extract values for efficient processing
    y = data['Pass_Fail'].values
    
    X_train, X_test, y_train, y_test = train_test_split(X, y, test_size=0.2, random_state=42)
    
    # Train the Model using XGBoost
    dtrain = xgb.DMatrix(X_train, label=y_train)
    dtest = xgb.DMatrix(X_test, label=y_test)
    
    params = {
        'objective': 'binary:logistic',
        'tree_method': 'hist',  # Optimized for speed
        'device': 'cuda',       # GPU acceleration
        'random_state': 42
    }
    model = xgb.train(params, dtrain, num_boost_round=100)
    
    # Evaluate the Model
    y_pred = (model.predict(dtest) > 0.5).astype(int)
    accuracy = accuracy_score(y_test, y_pred)
    print(f"Accuracy (Optimized Barebones): {accuracy * 100:.2f}%")
    
    # Print some predictions
    predictions_df = pd.DataFrame({'Actual': y_test, 'Predicted': y_pred})
    print(predictions_df.head())

if __name__ == "__main__":
    mainBareBones()


Accuracy (Optimized Barebones): 100.00%
   Actual  Predicted
0       0          0
1       1          1
2       1          1
3       1          1
4       1          1


In [2]:
import warnings
import pandas as pd
import cupy as cp
import xgboost as xgb
from sklearn.model_selection import train_test_split
from sklearn.metrics import accuracy_score

def optimize_datatypes(data):
    # Convert categorical features to category dtype
    categorical_columns = ['Gender', 'Parental_Education_Level', 'Internet_Access_at_Home', 
                           'Extracurricular_Activities', 'Pass_Fail']
    for col in categorical_columns:
        data[col] = data[col].astype('category').cat.codes  # Encode as category

    # Downcast numerical columns to reduce memory usage
    for col in data.select_dtypes(include='int'):
        data[col] = pd.to_numeric(data[col], downcast='integer')
    for col in data.select_dtypes(include='float'):
        data[col] = pd.to_numeric(data[col], downcast='float')
    
    return data

def mainCuPY():
    # Suppress warnings
    warnings.filterwarnings("ignore", category=FutureWarning)
    
    # Load the Data
    data = pd.read_csv("student_performance_dataset.csv")
    
    # Optimize datatypes
    data = optimize_datatypes(data)
    
    # Convert DataFrame to cuPy array
    X = cp.asarray(data.drop(['Student_ID', 'Pass_Fail'], axis=1).values, dtype=cp.float32)
    y = cp.asarray(data['Pass_Fail'].values, dtype=cp.int32)
    
    # Split the Data
    X_train, X_test, y_train, y_test = train_test_split(X, y, test_size=0.2, random_state=42)
    
    # Train the Model using XGBoost with GPU support
    dtrain = xgb.DMatrix(cp.asnumpy(X_train), label=cp.asnumpy(y_train))
    dtest = xgb.DMatrix(cp.asnumpy(X_test), label=cp.asnumpy(y_test))
    
    params = {
        'objective': 'binary:logistic',
        'tree_method': 'hist',  # Optimized for speed
        'device': 'cuda',       # GPU acceleration
        'random_state': 42
    }
    model = xgb.train(params, dtrain, num_boost_round=100)
    
    # Evaluate the Model
    y_pred = (model.predict(dtest) > 0.5).astype(int)
    accuracy = accuracy_score(cp.asnumpy(y_test), y_pred)
    print(f"Accuracy: {accuracy * 100:.2f}%")
    
    # Print some predictions
    predictions_df = pd.DataFrame({'Actual': cp.asnumpy(y_test), 'Predicted': y_pred})
    print(predictions_df.head())

if __name__ == "__main__":
    mainCuPY()


Accuracy (CuPy + GPU Optimized): 100.00%
   Actual  Predicted
0       0          0
1       1          1
2       1          1
3       1          1
4       1          1


In [3]:
import warnings
import pandas as pd
import xgboost as xgb
from sklearn.model_selection import train_test_split
from sklearn.metrics import accuracy_score
from sklearnex import patch_sklearn
patch_sklearn()  # Apply the patch to scikit-learn

def optimize_datatypes(data):
    # Convert categorical features to category dtype
    categorical_columns = ['Gender', 'Parental_Education_Level', 'Internet_Access_at_Home', 'Extracurricular_Activities', 'Pass_Fail']
    for col in categorical_columns:
        data[col] = data[col].astype('category')
    
    # Downcast numerical columns
    for col in data.select_dtypes(include='int'):
        data[col] = pd.to_numeric(data[col], downcast='integer')
    for col in data.select_dtypes(include='float'):
        data[col] = pd.to_numeric(data[col], downcast='float')
    
    return data

def mainIntelex():
    # Suppress warnings
    warnings.filterwarnings("ignore", category=FutureWarning)
    
    # Load the Data
    data = pd.read_csv("student_performance_dataset.csv")
    
    # Optimize Data Types
    data = optimize_datatypes(data)
    
    # Encode categorical features
    categorical_columns = ['Gender', 'Parental_Education_Level', 'Internet_Access_at_Home', 'Extracurricular_Activities', 'Pass_Fail']
    data[categorical_columns] = data[categorical_columns].apply(lambda col: col.cat.codes)
    
    # Split the Data
    X = data.drop(['Student_ID', 'Pass_Fail'], axis=1).values  # Extract values for efficient processing
    y = data['Pass_Fail'].values
    
    X_train, X_test, y_train, y_test = train_test_split(X, y, test_size=0.2, random_state=42)
    
    # Train the Model using XGBoost
    dtrain = xgb.DMatrix(X_train, label=y_train)
    dtest = xgb.DMatrix(X_test, label=y_test)
    
    params = {
        'objective': 'binary:logistic',
        'tree_method': 'hist',  # Optimized for speed
        'device': 'cuda',       # GPU acceleration
        'random_state': 42
    }
    model = xgb.train(params, dtrain, num_boost_round=100)
    
    # Evaluate the Model
    y_pred = (model.predict(dtest) > 0.5).astype(int)
    accuracy = accuracy_score(y_test, y_pred)
    print(f"Accuracy (Optimized Barebones): {accuracy * 100:.2f}%")
    
    # Print some predictions
    predictions_df = pd.DataFrame({'Actual': y_test, 'Predicted': y_pred})
    print(predictions_df.head())

if __name__ == "__main__":
    mainIntelex()

Accuracy (Optimized Barebones): 100.00%
   Actual  Predicted
0       0          0
1       1          1
2       1          1
3       1          1
4       1          1


Intel(R) Extension for Scikit-learn* enabled (https://github.com/intel/scikit-learn-intelex)
