In [1]:
import pandas as pd
from sklearn.model_selection import train_test_split
from sklearn.linear_model import LogisticRegression
from sklearn.metrics import accuracy_score
import cupy as cp
from sklearnex import patch_sklearn

In [2]:
def optimize_datatypes(data):
    # Convert categorical features to category dtype
    categorical_columns = ['Gender', 'Parental_Education_Level', 'Internet_Access_at_Home', 'Extracurricular_Activities', 'Pass_Fail']
    for col in categorical_columns:
        data[col] = data[col].astype('category')
    
    # Downcast numerical columns
    for col in data.select_dtypes(include='int'):
        data[col] = pd.to_numeric(data[col], downcast='integer')
    for col in data.select_dtypes(include='float'):
        data[col] = pd.to_numeric(data[col], downcast='float')
    
    return data

def mainBareBones():
    # Load the Data
    data = pd.read_csv("student_performance_dataset.csv")
    
    # Define the categorical columns
    categorical_columns = ['Gender', 'Parental_Education_Level', 'Internet_Access_at_Home', 'Extracurricular_Activities', 'Pass_Fail']
    
    # Optimize data types
    data = optimize_datatypes(data)
    
    # Encode categorical data
    data[categorical_columns] = data[categorical_columns].apply(lambda col: col.cat.codes)
    
    # Split the Data
    X = data.drop(['Student_ID', 'Pass_Fail'], axis=1)
    y = data['Pass_Fail']
    
    X_train, X_test, y_train, y_test = train_test_split(X, y, test_size=0.2, random_state=42)
    
    # Train the Model
    model = LogisticRegression(random_state=42, max_iter=1000)
    model.fit(X_train, y_train)
    
    # Evaluate the Model
    y_pred = model.predict(X_test)
    accuracy = accuracy_score(y_test, y_pred)
    print(f"Accuracy: {accuracy * 100:.2f}%")
    
    # Print some predictions
    predictions_df = pd.DataFrame({'Actual': y_test, 'Predicted': y_pred})
    print(predictions_df.head())

if __name__ == "__main__":
    mainBareBones()


Accuracy: 100.00%
     Actual  Predicted
120       0          0
247       1          1
324       1          1
204       1          1
603       1          1


In [3]:
def optimize_dtypes(df):
    # Optimize numeric columns
    for col in df.columns:
        if df[col].dtype == 'float64':
            df[col] = pd.to_numeric(df[col], downcast='float')
        elif df[col].dtype == 'int64':
            df[col] = pd.to_numeric(df[col], downcast='integer')
    return df

def maincuPY():
    # Load the Data
    print("Loading dataset...")
    data = pd.read_csv("student_performance_dataset.csv")
    
    # Preprocess the Data (Optimize by converting to appropriate types first)
    print("Preprocessing data...")
    categorical_columns = ['Gender', 'Parental_Education_Level', 'Internet_Access_at_Home', 
                           'Extracurricular_Activities', 'Pass_Fail']
    
    for col in categorical_columns:
        # Convert to 'category' dtype and encode
        data[col] = data[col].astype('category').cat.codes

    # Optimize numeric columns for memory efficiency
    data = optimize_dtypes(data)
    
    # Convert data to CuPy arrays
    print("Converting data to CuPy arrays...")
    X = cp.array(data.drop(['Student_ID', 'Pass_Fail'], axis=1).values, dtype=cp.float32)
    y = cp.array(data['Pass_Fail'].values, dtype=cp.int32)
    
    # Split the Data
    print("Splitting dataset...")
    X_train, X_test, y_train, y_test = train_test_split(X, y, test_size=0.2, random_state=42)
    
    # Train the Model
    print("Training the model...")
    model = LogisticRegression(random_state=42, max_iter=1000)
    model.fit(cp.asnumpy(X_train), cp.asnumpy(y_train))  # Convert CuPy arrays to NumPy
    
    # Evaluate the Model
    print("Evaluating the model...")
    y_pred = model.predict(cp.asnumpy(X_test))  
    accuracy = accuracy_score(cp.asnumpy(y_test), y_pred)  
    print(f"Accuracy: {accuracy * 100:.2f}%")
    
    # Print sample predictions
    print("Sample predictions:")
    predictions_df = pd.DataFrame({'Actual': cp.asnumpy(y_test), 'Predicted': y_pred})
    print(predictions_df.head())

if __name__ == "__main__":
    maincuPY()

Loading dataset...
Preprocessing data...
Converting data to CuPy arrays...
Splitting dataset...
Training the model...
Evaluating the model...
Accuracy: 100.00%
Sample predictions:
   Actual  Predicted
0       0          0
1       1          1
2       1          1
3       1          1
4       1          1


In [None]:
patch_sklearn() # Apply Intel optimizations

In [4]:
def optimize_datatypes(data):
    # Convert categorical features to category dtype
    categorical_columns = ['Gender', 'Parental_Education_Level', 'Internet_Access_at_Home', 'Extracurricular_Activities', 'Pass_Fail']
    for col in categorical_columns:
        data[col] = data[col].astype('category')
    
    # Downcast numerical columns
    for col in data.select_dtypes(include='int'):
        data[col] = pd.to_numeric(data[col], downcast='integer')
    for col in data.select_dtypes(include='float'):
        data[col] = pd.to_numeric(data[col], downcast='float')
    
    return data

def mainIntelex():
    # Load the Data
    data = pd.read_csv("student_performance_dataset.csv")
    
    # Define the categorical columns
    categorical_columns = ['Gender', 'Parental_Education_Level', 'Internet_Access_at_Home', 'Extracurricular_Activities', 'Pass_Fail']
    
    # Optimize data types
    data = optimize_datatypes(data)
    
    # Encode categorical data
    data[categorical_columns] = data[categorical_columns].apply(lambda col: col.cat.codes)
    
    # Split the Data
    X = data.drop(['Student_ID', 'Pass_Fail'], axis=1)
    y = data['Pass_Fail']
    
    X_train, X_test, y_train, y_test = train_test_split(X, y, test_size=0.2, random_state=42)
    
    # Train the Model
    model = LogisticRegression(random_state=42, max_iter=1000)
    model.fit(X_train, y_train)
    
    # Evaluate the Model
    y_pred = model.predict(X_test)
    accuracy = accuracy_score(y_test, y_pred)
    print(f"Accuracy: {accuracy * 100:.2f}%")

if __name__ == "__main__":
    mainIntelex()


Accuracy: 100.00%


Intel(R) Extension for Scikit-learn* enabled (https://github.com/intel/scikit-learn-intelex)
