In [1]:
import pandas as pd
import numpy as np

# Load your dataset
file_path = "C:/Users/waizz/OneDrive/Documents/GitHub/EduEz/pages/Updated_Bruneian_Students_Simulated_Dataset.csv"
df = pd.read_csv(file_path)

# List of O Level subjects
o_level_subjects = {
    'O_Level_Mathematics': 'Mathematics',
    'O_Level_English': 'English Language',
    'O_Level_Malay_Language': 'Bahasa Melayu',
    'O_Level_IRK': 'Islamic Religious Knowledge',
    'O_Level_Chemistry': 'Chemistry',
    'O_Level_Biology': 'Biology',
    'O_Level_Physics': 'Physics',
    'O_Level_Combined_Science': 'Combined Science',
    'O_Level_Commerce': 'Commerce',
    'O_Level_Economics': 'Economics',
    'O_Level_Computer_Science': 'Computer Science',
    'O_Level_Additional_Maths': 'Additional Mathematics'
}

# Unique grade distribution for each subject based on difficulty
grade_distributions = {
    'O_Level_Mathematics':     {'A1': 0.05, 'A2': 0.08, 'B3': 0.12, 'B4': 0.15, 'C5': 0.2, 'C6': 0.15, 'D7': 0.1, 'E8': 0.08, 'U': 0.07},
    'O_Level_English':         {'A1': 0.1,  'A2': 0.1,  'B3': 0.15, 'B4': 0.15, 'C5': 0.2, 'C6': 0.1,  'D7': 0.1, 'E8': 0.05, 'U': 0.05},
    'O_Level_Malay_Language':  {'A1': 0.15, 'A2': 0.12, 'B3': 0.2,  'B4': 0.15, 'C5': 0.15, 'C6': 0.1,  'D7': 0.08, 'E8': 0.03, 'U': 0.02},
    'O_Level_IRK':             {'A1': 0.2,  'A2': 0.2,  'B3': 0.18, 'B4': 0.12, 'C5': 0.1,  'C6': 0.1,  'D7': 0.05, 'E8': 0.03, 'U': 0.02},
    'O_Level_Chemistry':       {'A1': 0.05, 'A2': 0.1,  'B3': 0.12, 'B4': 0.15, 'C5': 0.2,  'C6': 0.15, 'D7': 0.1, 'E8': 0.07, 'U': 0.06},
    'O_Level_Biology':         {'A1': 0.1,  'A2': 0.12, 'B3': 0.15, 'B4': 0.12, 'C5': 0.18, 'C6': 0.15, 'D7': 0.08, 'E8': 0.05, 'U': 0.05},
    'O_Level_Physics':         {'A1': 0.08, 'A2': 0.1,  'B3': 0.12, 'B4': 0.15, 'C5': 0.2,  'C6': 0.15, 'D7': 0.08, 'E8': 0.07, 'U': 0.05},
    'O_Level_Combined_Science':{'A1': 0.08, 'A2': 0.1,  'B3': 0.15, 'B4': 0.15, 'C5': 0.2,  'C6': 0.12, 'D7': 0.1, 'E8': 0.05, 'U': 0.05},
    'O_Level_Commerce':        {'A1': 0.1,  'A2': 0.15, 'B3': 0.15, 'B4': 0.18, 'C5': 0.15, 'C6': 0.1,  'D7': 0.07, 'E8': 0.06, 'U': 0.04},
    'O_Level_Economics':       {'A1': 0.08, 'A2': 0.12, 'B3': 0.15, 'B4': 0.18, 'C5': 0.15, 'C6': 0.1,  'D7': 0.08, 'E8': 0.07, 'U': 0.07},
    'O_Level_Computer_Science':{'A1': 0.12, 'A2': 0.12, 'B3': 0.2,  'B4': 0.15, 'C5': 0.15, 'C6': 0.1,  'D7': 0.05, 'E8': 0.07, 'U': 0.04},
    'O_Level_Additional_Maths':{'A1': 0.05, 'A2': 0.07, 'B3': 0.12, 'B4': 0.15, 'C5': 0.15, 'C6': 0.15, 'D7': 0.1, 'E8': 0.08, 'U': 0.13}
}

# Function to assign grades based on distribution
def assign_grades(num_students, distribution):
    grades = pd.Series(
        np.random.choice(list(distribution.keys()), size=num_students, p=list(distribution.values()))
    )
    return grades

# Modify grades for each O Level subject with unique distribution
for subject in o_level_subjects:
    df[subject] = assign_grades(len(df), grade_distributions[subject])

# Save the modified dataset
df.to_csv('modified_bruneian_students_dataset.csv', index=False)
