In [1]:
import warnings
import pandas as pd
from sklearn.model_selection import train_test_split
from sklearn.preprocessing import LabelEncoder
from sklearn.ensemble import AdaBoostClassifier
from sklearn.metrics import accuracy_score, classification_report

def optimize_datatypes(data):
    # Convert categorical features to category dtype
    categorical_columns = ['Gender', 'Parental_Education_Level', 'Internet_Access_at_Home', 'Extracurricular_Activities', 'Pass_Fail']
    for col in categorical_columns:
        data[col] = data[col].astype('category')
    
    # Downcast numerical columns
    for col in data.select_dtypes(include='int'):
        data[col] = pd.to_numeric(data[col], downcast='integer')
    for col in data.select_dtypes(include='float'):
        data[col] = pd.to_numeric(data[col], downcast='float')
    
    return data

def mainBareBones():
    # Suppress warnings
    warnings.filterwarnings("ignore", category=FutureWarning)
    
    # Load the Data
    data = pd.read_csv("student_performance_dataset.csv")
    
    # Optimize Data Types
    data = optimize_datatypes(data)
    
    # Encode categorical features
    categorical_columns = ['Gender', 'Parental_Education_Level', 'Internet_Access_at_Home', 'Extracurricular_Activities', 'Pass_Fail']
    encoder = LabelEncoder()
    for col in categorical_columns:
        if col in data.columns:
            data[col] = encoder.fit_transform(data[col])
    
    # Split the Data
    X = data.drop(['Student_ID', 'Pass_Fail'], axis=1).values
    y = data['Pass_Fail'].values
    
    X_train, X_test, y_train, y_test = train_test_split(X, y, test_size=0.2, random_state=42)
    
    # Train the Model using AdaBoost
    model = AdaBoostClassifier(n_estimators=100, random_state=42)
    model.fit(X_train, y_train)
    
    # Evaluate the Model
    y_pred = model.predict(X_test)
    accuracy = accuracy_score(y_test, y_pred)
    print(f"Accuracy (AdaBoost): {accuracy * 100:.2f}%")
    
    print("\nClassification Report:")
    print(classification_report(y_test, y_pred, zero_division=1))

if __name__ == "__main__":
    mainBareBones()


Accuracy (AdaBoost): 100.00%

Classification Report:
              precision    recall  f1-score   support

           0       1.00      1.00      1.00        71
           1       1.00      1.00      1.00        71

    accuracy                           1.00       142
   macro avg       1.00      1.00      1.00       142
weighted avg       1.00      1.00      1.00       142



In [2]:
import warnings
import pandas as pd
import cupy as cp
from sklearn.model_selection import train_test_split
from sklearn.preprocessing import LabelEncoder
from sklearn.ensemble import AdaBoostClassifier
from sklearn.metrics import accuracy_score, classification_report

def optimize_datatypes(data):
    # Convert categorical features to category dtype
    categorical_columns = ['Gender', 'Parental_Education_Level', 'Internet_Access_at_Home', 'Extracurricular_Activities', 'Pass_Fail']
    for col in categorical_columns:
        data[col] = data[col].astype('category')
    
    # Downcast numerical columns
    for col in data.select_dtypes(include='int'):
        cupy_array = cp.array(data[col], dtype=cp.int32)  # Convert to CuPy array
        data[col] = cupy_array.get()  # Convert back to NumPy
    for col in data.select_dtypes(include='float'):
        cupy_array = cp.array(data[col], dtype=cp.float32)  # Convert to CuPy array
        data[col] = cupy_array.get() 
    return data

def mainCuPY():
    # Suppress warnings
    warnings.filterwarnings("ignore", category=FutureWarning)
    
    # Load the Data
    data = pd.read_csv("student_performance_dataset.csv")
    
    # Optimize Data Types
    data = optimize_datatypes(data)
    
    # Encode categorical features
    categorical_columns = ['Gender', 'Parental_Education_Level', 'Internet_Access_at_Home', 'Extracurricular_Activities', 'Pass_Fail']
    encoder = LabelEncoder()
    for col in categorical_columns:
        if col in data.columns:
            data[col] = encoder.fit_transform(data[col])
    
    # Split the Data
    X = cp.array(data.drop(['Student_ID', 'Pass_Fail'], axis=1).values)  # Convert features to CuPy array
    y = cp.array(data['Pass_Fail'].values)  # Convert target to CuPy array
    

    X_train, X_test, y_train, y_test = train_test_split(cp.asnumpy(X), cp.asnumpy(y), test_size=0.2, random_state=42)
    
    # Train the Model using AdaBoost
    model = AdaBoostClassifier(n_estimators=100, random_state=42)
    model.fit(X_train, y_train)
    
    # Evaluate the Model
    y_pred = model.predict(X_test)
    accuracy = accuracy_score(y_test, y_pred)
    print(f"Accuracy (AdaBoost): {accuracy * 100:.2f}%")
    
    print("\nClassification Report:")
    print(classification_report(y_test, y_pred, zero_division=1))

if __name__ == "__main__":
    mainCuPY()


Accuracy (AdaBoost): 100.00%

Classification Report:
              precision    recall  f1-score   support

           0       1.00      1.00      1.00        71
           1       1.00      1.00      1.00        71

    accuracy                           1.00       142
   macro avg       1.00      1.00      1.00       142
weighted avg       1.00      1.00      1.00       142



In [3]:
import warnings
import pandas as pd
from sklearn.model_selection import train_test_split
from sklearn.preprocessing import LabelEncoder
from sklearn.ensemble import AdaBoostClassifier
from sklearn.metrics import accuracy_score, classification_report
from sklearnex import patch_sklearn
patch_sklearn()  # Apply the patch to scikit-learn

def optimize_datatypes(data):
    # Convert categorical features to category dtype
    categorical_columns = ['Gender', 'Parental_Education_Level', 'Internet_Access_at_Home', 'Extracurricular_Activities', 'Pass_Fail']
    for col in categorical_columns:
        data[col] = data[col].astype('category')
    
    # Downcast numerical columns
    for col in data.select_dtypes(include='int'):
        data[col] = pd.to_numeric(data[col], downcast='integer')
    for col in data.select_dtypes(include='float'):
        data[col] = pd.to_numeric(data[col], downcast='float')
    
    return data

def mainIntelex():
    # Suppress warnings
    warnings.filterwarnings("ignore", category=FutureWarning)
    
    # Load the Data
    data = pd.read_csv("student_performance_dataset.csv")
    
    # Optimize Data Types
    data = optimize_datatypes(data)
    
    # Encode categorical features
    categorical_columns = ['Gender', 'Parental_Education_Level', 'Internet_Access_at_Home', 'Extracurricular_Activities', 'Pass_Fail']
    encoder = LabelEncoder()
    for col in categorical_columns:
        if col in data.columns:
            data[col] = encoder.fit_transform(data[col])
    
    # Split the Data
    X = data.drop(['Student_ID', 'Pass_Fail'], axis=1).values
    y = data['Pass_Fail'].values
    
    X_train, X_test, y_train, y_test = train_test_split(X, y, test_size=0.2, random_state=42)
    
    # Train the Model using AdaBoost
    model = AdaBoostClassifier(n_estimators=100, random_state=42)
    model.fit(X_train, y_train)
    
    # Evaluate the Model
    y_pred = model.predict(X_test)
    accuracy = accuracy_score(y_test, y_pred)
    print(f"Accuracy (AdaBoost): {accuracy * 100:.2f}%")
    
    print("\nClassification Report:")
    print(classification_report(y_test, y_pred, zero_division=1))

if __name__ == "__main__":
    mainIntelex()


Accuracy (AdaBoost): 100.00%

Classification Report:
              precision    recall  f1-score   support

           0       1.00      1.00      1.00        71
           1       1.00      1.00      1.00        71

    accuracy                           1.00       142
   macro avg       1.00      1.00      1.00       142
weighted avg       1.00      1.00      1.00       142



Intel(R) Extension for Scikit-learn* enabled (https://github.com/intel/scikit-learn-intelex)
