Dataset can be downloaded from https://www.kaggle.com/datasets/amrmaree/student-performance-prediction?resource=download 

In [1]:
import pandas as pd
from sklearn.model_selection import train_test_split
from sklearn.preprocessing import LabelEncoder
from sklearn.ensemble import RandomForestClassifier
from sklearn.metrics import accuracy_score

def optimize_datatypes(data):
    # Downcast numerical columns to reduce memory usage
    for col in data.select_dtypes(include=['int']):
        data[col] = pd.to_numeric(data[col], downcast='integer')
    for col in data.select_dtypes(include=['float']):
        data[col] = pd.to_numeric(data[col], downcast='float')
    return data

def mainBareBones():
    # Load the Data
    file_path = "student_performance_dataset.csv"  # Replace with your dataset's path
    print("Loading the dataset...")
    try:
        data = pd.read_csv(file_path)
        print(f"Dataset loaded successfully! Shape: {data.shape}")
    except Exception as e:
        print(f"Error loading dataset: {e}")
        exit()

    # Optimize data types
    print("\nOptimizing data types...")
    data = optimize_datatypes(data)

    # Preprocess the Data
    print("\nPreprocessing the data...")
    label_encoder = LabelEncoder()
    categorical_columns = ['Gender', 'Parental_Education_Level', 'Internet_Access_at_Home', 'Extracurricular_Activities', 'Pass_Fail']
    for col in categorical_columns:
        data[col] = label_encoder.fit_transform(data[col])

    # Feature-target split
    print("\nSeparating features and target variable...")
    X = data.drop(['Student_ID', 'Pass_Fail'], axis=1).values
    y = data['Pass_Fail'].values
    print(f"Features shape: {X.shape}, Target shape: {y.shape}")

    # Split the Data
    print("\nSplitting the data into training and testing sets...")
    X_train, X_test, y_train, y_test = train_test_split(X, y, test_size=0.2, random_state=42)
    print(f"Training set shape: {X_train.shape}, Testing set shape: {X_test.shape}")

    # Train the Model
    print("\nTraining the Random Forest Classifier...")
    model = RandomForestClassifier(random_state=42)
    model.fit(X_train, y_train)
    print("Model training completed!")

    # Evaluate the Model
    print("\nEvaluating the model...")
    y_pred = model.predict(X_test)
    accuracy = accuracy_score(y_test, y_pred)
    print(f"Accuracy: {accuracy * 100:.2f}%")

    # Print some predictions
    print("\nDisplaying some predictions...")
    predictions_df = pd.DataFrame({'Actual': y_test, 'Predicted': y_pred})
    print(predictions_df.head())

if __name__ == "__main__":
    mainBareBones()

Loading the dataset...
Dataset loaded successfully! Shape: (708, 10)

Optimizing data types...

Preprocessing the data...

Separating features and target variable...
Features shape: (708, 8), Target shape: (708,)

Splitting the data into training and testing sets...
Training set shape: (566, 8), Testing set shape: (142, 8)

Training the Random Forest Classifier...
Model training completed!

Evaluating the model...
Accuracy: 100.00%

Displaying some predictions...
   Actual  Predicted
0       0          0
1       1          1
2       1          1
3       1          1
4       1          1


In [1]:
import pandas as pd
import cupy as cp
from sklearn.model_selection import train_test_split
from sklearn.preprocessing import LabelEncoder
from sklearn.ensemble import RandomForestClassifier
from sklearn.metrics import accuracy_score

def optimize_datatypes(data):
    # Downcast integer and float columns to minimize memory usage
    for col in data.select_dtypes(include=['int']):
        data[col] = pd.to_numeric(data[col], downcast='integer')
    for col in data.select_dtypes(include=['float']):
        data[col] = pd.to_numeric(data[col], downcast='float')
    return data

def mainCuPY():
    # Load the Data
    print("Loading dataset...")
    try:
        data = pd.read_csv("student_performance_dataset.csv")
        print(f"Dataset loaded successfully! Shape: {data.shape}")
    except Exception as e:
        print(f"Error loading dataset: {e}")
        return

    # Optimize datatypes for memory efficiency
    print("\nOptimizing datatypes...")
    data = optimize_datatypes(data)

    # Preprocess the Data
    print("\nPreprocessing the data...")
    label_encoder = LabelEncoder()
    categorical_columns = ['Gender', 'Parental_Education_Level', 'Internet_Access_at_Home', 'Extracurricular_Activities', 'Pass_Fail']
    for col in categorical_columns:
        data[col] = label_encoder.fit_transform(data[col])

    # Convert DataFrame to CuPy arrays
    print("\nConverting data to CuPy arrays...")
    X = cp.asarray(data.drop(['Student_ID', 'Pass_Fail'], axis=1).to_numpy(dtype='float32'))
    y = cp.asarray(data['Pass_Fail'].to_numpy(dtype='int32'))

    # Split the Data
    print("\nSplitting data into training and testing sets...")
    X_train, X_test, y_train, y_test = train_test_split(X, y, test_size=0.2, random_state=42)
    print(f"Training set shape: {X_train.shape}, Testing set shape: {X_test.shape}")

    # Train the Model using RandomForest
    print("\nTraining the Random Forest Classifier...")
    model = RandomForestClassifier(random_state=42, n_estimators=100)
    model.fit(cp.asnumpy(X_train), cp.asnumpy(y_train))
    print("Model training completed!")

    # Evaluate the Model
    print("\nEvaluating the model...")
    y_pred = model.predict(cp.asnumpy(X_test))
    accuracy = accuracy_score(cp.asnumpy(y_test), y_pred)
    print(f"Accuracy: {accuracy * 100:.2f}%")

    # Print some predictions
    print("\nDisplaying some predictions...")
    predictions_df = pd.DataFrame({'Actual': cp.asnumpy(y_test), 'Predicted': y_pred})
    print(predictions_df.head())

if __name__ == "__main__":
    mainCuPY()

Loading dataset...
Dataset loaded successfully! Shape: (708, 10)

Optimizing datatypes...

Preprocessing the data...

Converting data to CuPy arrays...

Splitting data into training and testing sets...
Training set shape: (566, 8), Testing set shape: (142, 8)

Training the Random Forest Classifier...
Model training completed!

Evaluating the model...
Accuracy: 100.00%

Displaying some predictions...
   Actual  Predicted
0       0          0
1       1          1
2       1          1
3       1          1
4       1          1


In [3]:
import pandas as pd
from sklearnex import patch_sklearn
patch_sklearn()  # Apply the patch to scikit-learn
import warnings
from sklearn.model_selection import train_test_split
from sklearn.preprocessing import LabelEncoder
from sklearn.ensemble import RandomForestClassifier
from sklearn.metrics import accuracy_score

def optimize_datatypes(data):
    # Downcast integer and float columns to reduce memory usage
    for col in data.select_dtypes(include=['int']):
        data[col] = pd.to_numeric(data[col], downcast='integer')
    for col in data.select_dtypes(include=['float']):
        data[col] = pd.to_numeric(data[col], downcast='float')
    return data

def mainIntelex():
    # Suppress warnings
    warnings.filterwarnings("ignore", category=FutureWarning)
    
    # Load the Data
    print("Loading dataset...")
    try:
        data = pd.read_csv("student_performance_dataset.csv")
        print(f"Dataset loaded successfully! Shape: {data.shape}")
    except Exception as e:
        print(f"Error loading dataset: {e}")
        return

    # Optimize datatypes for memory efficiency
    print("\nOptimizing datatypes...")
    data = optimize_datatypes(data)

    # Preprocess the Data
    print("\nPreprocessing the data...")
    label_encoder = LabelEncoder()
    categorical_columns = ['Gender', 'Parental_Education_Level', 'Internet_Access_at_Home', 'Extracurricular_Activities', 'Pass_Fail']
    for col in categorical_columns:
        data[col] = label_encoder.fit_transform(data[col])

    # Split the Data
    print("\nSeparating features and target variable...")
    X = data.drop(['Student_ID', 'Pass_Fail'], axis=1)
    y = data['Pass_Fail']

    print("\nSplitting the data into training and testing sets...")
    X_train, X_test, y_train, y_test = train_test_split(X, y, test_size=0.2, random_state=42)
    print(f"Training set shape: {X_train.shape}, Testing set shape: {X_test.shape}")

    # Train the Model
    print("\nTraining the Random Forest Classifier...")
    model = RandomForestClassifier(random_state=42)
    model.fit(X_train, y_train)
    print("Model training completed!")

    # Evaluate the Model
    print("\nEvaluating the model...")
    y_pred = model.predict(X_test)
    accuracy = accuracy_score(y_test, y_pred)
    print(f"Accuracy: {accuracy * 100:.2f}%")

    # Print some predictions
    print("\nDisplaying some predictions...")
    predictions_df = pd.DataFrame({'Actual': y_test, 'Predicted': y_pred})
    print(predictions_df.head())

if __name__ == "__main__":
    mainIntelex()

Loading dataset...
Dataset loaded successfully! Shape: (708, 10)

Optimizing datatypes...

Preprocessing the data...

Separating features and target variable...

Splitting the data into training and testing sets...
Training set shape: (566, 8), Testing set shape: (142, 8)

Training the Random Forest Classifier...
Model training completed!

Evaluating the model...
Accuracy: 100.00%

Displaying some predictions...
     Actual  Predicted
120       0          0
247       1          1
324       1          1
204       1          1
603       1          1


Intel(R) Extension for Scikit-learn* enabled (https://github.com/intel/scikit-learn-intelex)
