## Dataset simulation

In [1]:
import numpy as np
import pandas as pd
from datetime import date, timedelta
import random  

# Data simulation for different scenarios

Scenario 1:

"Overworked single parent": Due to Covid he/she has to work from home. Additionally 
- Subjective Stress is slightly higher than the physicial measured stress during the week, similar on the weekend.
- Over the weekend and at holiday, stress is clearly lower than average
- Main stressors: Work deadlines and childcare 

Scenario 2:

Young man with relationship issues. Whenever he is around his partner, there is a potential for arguing and fights. 
- Main stressors: Shared time with his girlfriend, Being at home.
- In high stress periods, subjective stress is lower than the physical one.

Scenario 3:

Student with anxiety. Although he loves the subject, it is hard for him and to speak in front of an unknown audience. Whenever there are presentations, his stress level rises. The same counts for gigs with his choir. 
- Mainly pretty low stress level, but high stress during rarely occuring presentations
- Main stressors: Presentations, concerts


In [3]:
from scipy.stats import truncnorm

def get_truncated_normal(mean=0, sd=1, low=0, upp=10):
    # function to create normal distributed numbers within a range
    return truncnorm(
        (low - mean) / sd, (upp - mean) / sd, loc=mean, scale=sd)
# The variables Xn return a number with mean n.
X2 = get_truncated_normal(mean=2, sd=3, low=1, upp=10)
X3 = get_truncated_normal(mean=3, sd=3, low=1, upp=10)
X4 = get_truncated_normal(mean=4, sd=3, low=1, upp=10)
X5 = get_truncated_normal(mean=5, sd=3, low=1, upp=10)
X6 = get_truncated_normal(mean=6, sd=3, low=1, upp=10)
X7 = get_truncated_normal(mean=7, sd=3, low=1, upp=10)
X8 = get_truncated_normal(mean=8, sd=3, low=1, upp=10)

#use for example: 
#X1.rvs()

In [7]:
# Dataset scenario 1: overworked mom

main_stressors = ["work", "childcare", "working at home", "homeschooling"]
other_activities = ["office","family","friends","yoga","long walk","running","shopping",
                    "dating", "holiday"]

from datetime import date, timedelta
sdate = date(2022,1,1)    # start date
edate = date(2022,12,31)   # end date
date = pd.date_range(sdate,edate-timedelta(days=1),freq='d') #create range of dates

date = [day for day in date for _ in (0, 1)] #duplicate each date for two differnt score types
activity = []
score = []
score_type = 364 * ["objective","subjective"]

for week in range(52): #weeks of one year
    for day in range(2): # weekend
        score.append(int(X3.rvs())) # objective score
        score.append(int(X4.rvs())) # subjective score
        if score[-1] >= 5:
            stressing_activity = random.sample(main_stressors,1)
            activity += 2*stressing_activity #append the activity for both score types
        else:
            other_activity = random.sample(other_activities,1)
            activity += 2*other_activity
    for day in range(5): #weekdays
        score.append(int(X6.rvs())) # objective score
        score.append(int(X8.rvs())) # subjective score
        if score[-1] >= 6:
            stressing_activity = random.sample(main_stressors,1)
            activity += 2*stressing_activity #append the activity for both score types
        else:
            other_activity = random.sample(other_activities,1)
            activity += 2*other_activity
            
df_dict = {'date': date, 'activity': activity, 'score_type':score_type, 'score': score} 
    
df_scen1 = pd.DataFrame(df_dict).set_index("date")
df_scen1.to_csv("data_scen1_yearly.csv")
df_scen1.loc[:"2022-01-31"].to_csv("data_scen1_monthly.csv")

In [8]:
# Dataset scenario 2: young man with relationship issues

main_stressors = ["hanging out partner", "cooking at home", "time with partner"]
other_activities = ["office","family","friends","bar","football","running","shopping","hiking","holiday"]

from datetime import date, timedelta
sdate = date(2022,1,1)    # start date
edate = date(2022,12,31)   # end date
date = pd.date_range(sdate,edate-timedelta(days=1),freq='d') #create range of dates

date = [day for day in date for _ in (0, 1)] #duplicate each date for two differnt score types
activity = []
score = []
score_type = 364 * ["objective","subjective"]

for week in range(52): #weeks of one year
    for day in range(2): # weekend
        score.append(int(X8.rvs())) # objective score
        score.append(int(X6.rvs())) # subjective score
        if score[-1] >= 7:
            stressing_activity = random.sample(main_stressors,1)
            activity += 2*stressing_activity #append the activity for both score types
        else:
            other_activity = random.sample(other_activities,1)
            activity += 2*other_activity
    for day in range(5): #weekdays
        score.append(int(X5.rvs())) # objective score
        score.append(int(X4.rvs())) # subjective score
        if score[-1] >= 7:
            stressing_activity = random.sample(main_stressors,1)
            activity += 2*stressing_activity #append the activity for both score types
        else:
            other_activity = random.sample(other_activities,1)
            activity += 2*other_activity
            
df_dict = {'date': date, 'activity': activity, 'score_type':score_type, 'score': score} 
    
df_scen1 = pd.DataFrame(df_dict).set_index("date")
df_scen1.to_csv("data_scen2_yearly.csv")
df_scen1.loc[:"2022-01-31"].to_csv("data_scen2_monthly.csv")

In [9]:
# Dataset scenario 3: Student with anxiety

main_stressors = ["performance with choir", "presentation", "speech"]
other_activities = ["office","family","friends","bar","football","running","shopping","hiking","holiday"]

from datetime import date, timedelta
sdate = date(2022,1,1)    # start date
edate = date(2022,12,31)   # end date
date = pd.date_range(sdate,edate-timedelta(days=1),freq='d') #create range of dates

date = [day for day in date for _ in (0, 1)] #duplicate each date for two differnt score types
activity = []
score = []
score_type = 364 * ["objective","subjective"]

for day in range(364): #weeks of one year
        score.append(int(X3.rvs())) # objective score
        score.append(int(X2.rvs())) # subjective score
        if score[-1] >= 6:
            stressing_activity = random.sample(main_stressors,1)
            activity += 2*stressing_activity #append the activity for both score types
        else:
            other_activity = random.sample(other_activities,1)
            activity += 2*other_activity

df_dict = {'date': date, 'activity': activity, 'score_type':score_type, 'score': score} 
    
df_scen1 = pd.DataFrame(df_dict).set_index("date")
df_scen1.to_csv("data_scen3_yearly.csv")
df_scen1.loc[:"2022-01-31"].to_csv("data_scen3_monthly.csv")