In [1]:
import pandas as pd
import numpy as np

In [2]:
# Set a seed for reproducibility
np.random.seed(42)

In [3]:
# Generate synthetic data
data = {
    'Quiz1': np.round(np.random.uniform(0, 4, 2000), 1),  # Out of 4
    'Quiz2': np.round(np.random.uniform(0, 4, 2000), 1),   # Out of 4
    'Assignment1': np.round(np.random.uniform(0, 3.5, 2000), 1),  # Out of 3.5
    'Assignment2': np.round(np.random.uniform(0, 3.5, 2000), 1),  # Out of 3.5
    'Attendance': np.concatenate([np.random.randint(70, 101, 1600), np.random.choice(np.concatenate([np.random.randint(0, 70, 160), np.random.randint(0, 70, 240)]), size=400, replace=False)]),  # 80% >= 70, 20% < 70
    'StudyHours': np.round(np.random.uniform(0, 12, 2000), 1),  # Rounded to first decimal place
    'ProjectScore': np.round(np.random.uniform(0, 10, 2000), 1),  # Out of 10
    'Midterm': np.round(np.random.uniform(0, 25, 2000), 1),  # Out of 25
    'Final': np.round(np.random.uniform(0, 50, 2000), 1),  # Out of 50
}
# Generate 'Attendance' column with 80% values >= 70 and 20% values < 70
attendance_values = np.concatenate([np.random.randint(70, 101, 1600), np.random.randint(0, 70, 400)])
np.random.shuffle(attendance_values)
data['Attendance'] = attendance_values

In [4]:
# Ensure "Final" is 0 when "Attendance" is less than 70
data['Final'][data['Attendance'] < 70] = 0

In [5]:
df = pd.DataFrame(data)

In [6]:
# Calculate the total based on the sum of specific columns
columns_to_sum = ['Quiz1', 'Quiz2', 'Assignment1', 'Assignment2', 'ProjectScore', 'Midterm', 'Final']
df['Total'] = df.loc[:, columns_to_sum].sum(axis=1)

In [7]:
# Assign grades based on the total
conditions = [
    (df['Total'] >= 84.5),
    (df['Total'] < 84.5) & (df['Total'] >= 79.5),
    (df['Total'] < 79.5) & (df['Total'] >= 64.5),
    (df['Total'] < 64.5) & (df['Total'] >= 54.5),
    (df['Total'] < 54.5) & (df['Total'] >= 49.5),
    (df['Total'] < 49.5)
]

grades = ['A+', 'A', 'B', 'C', 'D', 'F']

df['Grade'] = np.select(conditions, grades)

In [8]:
# Map grades to numerical values
grade_mapping = {'A+': 0, 'A': 1, 'B': 2, 'C': 3, 'D': 4, 'F': 5}

In [9]:
# Create a new 'Grade' column with numerical values
df['Grade'] = np.select(conditions, grades)
df['Grade'] = df['Grade'].map(grade_mapping)

In [10]:
# Convert the 'Grade' column to integer data type
df['Grade'] = df['Grade'].astype(int)

In [11]:
# Drop the 'Total' column to meet your requirement of not including it in the dataset
df.drop('Total', axis=1, inplace=True)

In [12]:
# Assuming 'A+' grade is represented by 'A+' in the 'Grade' column, and so on
df.loc[(df['Grade'] == 'A+') & (df['StudyHours'] < 9), 'StudyHours'] = np.round(np.random.uniform(9, 12, df.loc[(df['Grade'] == 'A+') & (df['StudyHours'] < 9), 'StudyHours'].shape[0]), 1)
df.loc[(df['Grade'] == 'A+') & (df['StudyHours'] > 12), 'StudyHours'] = 12

df.loc[(df['Grade'] == 'A') & (df['StudyHours'] < 7), 'StudyHours'] = np.round(np.random.uniform(7, 9, df.loc[(df['Grade'] == 'A') & (df['StudyHours'] < 7), 'StudyHours'].shape[0]), 1)
df.loc[(df['Grade'] == 'A') & (df['StudyHours'] >= 9), 'StudyHours'] = 9

df.loc[(df['Grade'] == 'B') & (df['StudyHours'] < 5), 'StudyHours'] = np.round(np.random.uniform(5, 7, df.loc[(df['Grade'] == 'B') & (df['StudyHours'] < 5), 'StudyHours'].shape[0]), 1)
df.loc[(df['Grade'] == 'B') & (df['StudyHours'] >= 7), 'StudyHours'] = 7

df.loc[(df['Grade'] == 'C') & (df['StudyHours'] < 3), 'StudyHours'] = np.round(np.random.uniform(3, 5, df.loc[(df['Grade'] == 'C') & (df['StudyHours'] < 3), 'StudyHours'].shape[0]), 1)
df.loc[(df['Grade'] == 'C') & (df['StudyHours'] >= 5), 'StudyHours'] = 5

df.loc[(df['Grade'] == 'D') & (df['StudyHours'] < 2), 'StudyHours'] = np.round(np.random.uniform(2, 3, df.loc[(df['Grade'] == 'D') & (df['StudyHours'] < 2), 'StudyHours'].shape[0]), 1)
df.loc[(df['Grade'] == 'D') & (df['StudyHours'] >= 3), 'StudyHours'] = 3

df.loc[(df['Grade'] == 'F') & (df['StudyHours'] < 1), 'StudyHours'] = np.round(np.random.uniform(0, 2, df.loc[(df['Grade'] == 'F') & (df['StudyHours'] < 1), 'StudyHours'].shape[0]), 1)
df.loc[(df['Grade'] == 'F') & (df['StudyHours'] >= 2), 'StudyHours'] = 1


In [13]:
# Save the DataFrame to a CSV file
df.to_csv('student_data.csv', index=False)