In [None]:

import pandas as pd
from sklearn.preprocessing import LabelEncoder
from imblearn.over_sampling import SMOTE
from collections import Counter
import matplotlib.pyplot as plt

df = pd.read_csv("Student_performance_data.csv")


def assign_grade_class(gpa):
    if gpa >= 3.5:
        return 'A'
    elif gpa >= 3.0:
        return 'B'
    elif gpa >= 2.5:
        return 'C'
    elif gpa >= 2.0:
        return 'D'
    else:
        return 'F'


df['GradeLetter'] = df['GPA'].apply(assign_grade_class)


label_encoder = LabelEncoder()
df['GradeClassEncoded'] = label_encoder.fit_transform(df['GradeLetter'])


X = df.drop(['GradeLetter', 'GradeClassEncoded', 'StudentID', 'GPA'], axis=1)
y = df['GradeClassEncoded']


original_counts = dict(Counter(y))
print("Original class distribution:", {label_encoder.inverse_transform([k])[0]: v for k, v in original_counts.items()})


smote = SMOTE(random_state=42)
X_resampled, y_resampled = smote.fit_resample(X, y)

y_resampled_letters = label_encoder.inverse_transform(y_resampled)


resampled_counts = dict(Counter(y_resampled))
print("After SMOTE class distribution:", {label_encoder.inverse_transform([k])[0]: v for k, v in resampled_counts.items()})


Original class distribution: {'C': 24, 'A': 24, 'B': 23, 'D': 29}
After SMOTE class distribution: {'C': 29, 'A': 29, 'B': 29, 'D': 29}
