In [9]:
import numpy as np
import pandas as pd

np.random.seed(42)   # for reproducibility
N = 10000            # number of samples


#  Generate Independent Variables


# Screen time (lognormal → skewed distribution)
screen_time = np.random.lognormal(mean=1.5, sigma=0.5, size=N)
screen_time = np.clip(screen_time, 0, 10)   

# Caffeine intake (Poisson distribution)
caffeine_intake = np.random.poisson(lam=2, size=N)
caffeine_intake = np.clip(caffeine_intake, 0, 6)

# Stress level (normal distribution then clamped from 1 to 10)
stress_level = np.random.normal(loc=5, scale=2, size=N)
stress_level = np.clip(stress_level, 1, 10)


# Generate Sleep Duration (depends only on: stress, caffeine, screen_time)



sleep_duration = np.random.normal(loc=7.5, scale=0.7, size=N)

for i in range(N):
    # high stress reduces sleep
    if stress_level[i] > 7:
        sleep_duration[i] -= np.random.uniform(1.0, 2.0)
    elif stress_level[i] < 3:
        sleep_duration[i] += np.random.uniform(0.5, 1.0)

    # high caffeine reduces sleep
    if caffeine_intake[i] >= 3:
        sleep_duration[i] -= np.random.uniform(0.5, 1.5)

    # too much screen time reduces sleep
    if screen_time[i] > 6:
        sleep_duration[i] -= np.random.uniform(0.5, 1.0)

sleep_duration = np.clip(sleep_duration, 4, 10)



#  Generate Sleep Quality 


sleep_quality = np.random.normal(loc=7.0, scale=1.0, size=N)

for i in range(N):
    delta = 0.0  

    # Stress effect
    if stress_level[i] >= 7:
        delta -= np.random.uniform(1.0, 2.0)   
    elif stress_level[i] <= 3:
        delta += np.random.uniform(0.2, 0.8)  

    # Screen time effect
    if screen_time[i] >= 6:
        delta -= np.random.uniform(0.8, 1.8)   
    elif screen_time[i] >= 4:
        delta -= np.random.uniform(0.3, 0.8)   
    # Caffeine effect
    if caffeine_intake[i] >= 3:
        delta -= np.random.uniform(0.4, 1.0)  
    elif caffeine_intake[i] == 2:
        delta -= np.random.uniform(0.1, 0.4)   
    
    sleep_quality[i] += delta + np.random.normal(0, 0.2)

# Clamp to 1–10 scale
sleep_quality = np.clip(sleep_quality, 1, 10)



#  Generate Academic Performance (depends ONLY on: sleep_duration, sleep_quality)

academic_performance = np.random.normal(loc=70, scale=10, size=N)

for i in range(N):

    if sleep_duration[i] < 6:
        academic_performance[i] -= np.random.uniform(10, 20)

    if 7 <= sleep_duration[i] <= 8:
        academic_performance[i] += np.random.uniform(5, 15)

    if sleep_duration[i] > 9:
        academic_performance[i] -= np.random.uniform(8, 18)

    if sleep_quality[i] < 5:
        academic_performance[i] -= np.random.uniform(5, 15)

    if sleep_quality[i] > 8:
        academic_performance[i] += np.random.uniform(5, 12)

# Ensure values between 0 and 100
academic_performance = np.clip(academic_performance, 0, 100)



#  Build the Dataset


df = pd.DataFrame({
    "screen_time": screen_time,
    "caffeine_intake": caffeine_intake,
    "stress_level": stress_level,
    "sleep_duration": sleep_duration,
    "sleep_quality": sleep_quality,
    "academic_performance": academic_performance
})

df.head()


Unnamed: 0,screen_time,caffeine_intake,stress_level,sleep_duration,sleep_quality,academic_performance
0,5.745156,1,7.634179,6.912628,4.213806,62.113253
1,4.182327,0,1.0,8.843957,7.165575,68.490097
2,6.19563,0,4.115754,7.013748,5.066795,71.116444
3,9.597618,1,1.808579,7.609843,5.383383,80.884152
4,3.986539,2,3.998298,8.654227,8.143298,73.900897


In [10]:
df.to_csv("student_sleep_stress_dataset.csv", index=False)
