In [1]:
import numpy as np
import pandas as pd

# Set seed for reproducibility
np.random.seed(42)

# Number of samples
n_samples = 200

# Generate features: hours studied, previous score, sleep hours, attendance %, extracurricular hrs
hours_studied = np.random.uniform(1, 10, n_samples)
prev_score = np.random.uniform(40, 100, n_samples)
sleep_hrs = np.random.uniform(4, 9, n_samples)
attendance = np.random.uniform(60, 100, n_samples)
extracurricular_hrs = np.random.uniform(0, 5, n_samples)

# Define coefficients for each feature
coeffs = [5, 0.8, 2, 0.5, -3]

# Generate target variable 'performance index' as linear combination + noise
noise = np.random.normal(0, 5, n_samples)
performance_index = (coeffs[0] * hours_studied +
                     coeffs[1] * prev_score +
                     coeffs[2] * sleep_hrs +
                     coeffs[3] * attendance +
                     coeffs[4] * extracurricular_hrs +
                     noise)

# Create DataFrame
df = pd.DataFrame({
    'Hours_Studied': hours_studied,
    'Previous_Score': prev_score,
    'Sleep_Hours': sleep_hrs,
    'Attendance_Percent': attendance,
    'Extracurricular_Hours': extracurricular_hrs,
    'Performance_Index': performance_index
})

# Save to CSV
df.to_csv('student_performance_dataset.csv', index=False)

print("Dataset 'student_performance_dataset.csv' generated successfully!")
print(df.head())


Dataset 'student_performance_dataset.csv' generated successfully!
   Hours_Studied  Previous_Score  Sleep_Hours  Attendance_Percent  \
0       4.370861       78.521899     4.515619           66.757403   
1       9.556429       45.048398     8.512765           71.143614   
2       7.587945       49.697723     6.526262           67.080419   
3       6.387926       93.913251     8.132287           63.548101   
4       2.404168       76.385744     5.600248           64.825435   

   Extracurricular_Hours  Performance_Index  
0               3.536193         117.361690  
1               0.762695         127.453391  
2               2.881442         117.547303  
3               3.033575         149.061061  
4               2.120653         113.179639  
