# Setup

## Imports

In [1]:
import numpy as np
import pandas as pd
import os
from tqdm import tqdm
import datetime
from dateutil import parser
import matplotlib.pyplot as plt
import seaborn as sns
from ydata_profiling import ProfileReport

## Set Paths

In [21]:
# set run
run_num = 1

if run_num ==1:
    # run 1
    path = '/Users/djw/Documents/pCloud_synced/Academics/Projects/2020_thesis/thesis_experiments/3_experiments/3_3_experience_sampling/3_3_1_raw_data/run_1/app_data/'
    save_path = '/Users/djw/Documents/pCloud_synced/Academics/Projects/2020_thesis/thesis_experiments/3_experiments/3_3_experience_sampling/3_3_2_processed_data/run_1/'
    subjects_run1 = pd.read_csv(path + '../run1_subjects.csv')
    subjects = subjects_run1.ParticipantIdentifier
elif run_num ==2:
    # run 2
    path = '/Users/djw/Documents/pCloud_synced/Academics/Projects/2020_thesis/thesis_experiments/3_experiments/3_3_experience_sampling/3_3_1_raw_data/run_2/app_data/'
    save_path = '/Users/djw/Documents/pCloud_synced/Academics/Projects/2020_thesis/thesis_experiments/3_experiments/3_3_experience_sampling/3_3_2_processed_data/run_2/'
    subjects_run2 = pd.read_csv(path + '../run2_subjects.csv')
    subjects = subjects_run2.ParticipantIdentifier
    
eda_reports_path = '/Users/djw/Documents/pCloud_synced/Academics/Projects/2020_thesis/thesis_experiments/3_experiments/3_3_experience_sampling/3_3_4_outputs/EDA/'

# Active Data

## Import Data

In [126]:
# erase df if it already exists
if 'df' in globals():
    del(df)
    print('deleted existing df')

deleted existing df


In [127]:
# loop through all days
days = [i for i in os.listdir(path) if i.startswith('RK')]
for day in tqdm(days):
    files = os.listdir(path + day)
    surveyQuestions = [i for i in files if i.startswith('SurveyQuestionResults')]
    # there should be only one
    for file in surveyQuestions:
        if 'df' not in globals():
            df = pd.read_csv(path + day + '/' + file)
        else:
            temp_df = pd.read_csv(path + day + '/' + file)
            df = pd.concat([df,temp_df], axis=0)

100%|██████████| 87/87 [00:03<00:00, 28.96it/s]


In [128]:
df.shape

(597839, 8)

In [129]:
# select relevant columns
df = df[['ParticipantIdentifier', 'ResultIdentifier', 'Answers', 'EndDate']]
df = df.reset_index(drop=True)
df.head()

Unnamed: 0,ParticipantIdentifier,ResultIdentifier,Answers,EndDate
0,6338356d-f098-46ea-b270-10c6fff7e67e,WEEKLY_goal_report1,19,2022-10-21T21:55:56-04:00
1,6338356d-f098-46ea-b270-10c6fff7e67e,WEEKLY_goal_report2,2,2022-10-21T21:55:59-04:00
2,c62ae7a2-6fe6-4fd5-9b9e-93773b08d8b9,task_custom_bart_info1,"{""totalEarnings"":0}",2022-10-24T07:04:41-04:00
3,c62ae7a2-6fe6-4fd5-9b9e-93773b08d8b9,task_custom_bart_info2,"{""totalEarnings"":0}",2022-10-24T07:04:41-04:00
4,c62ae7a2-6fe6-4fd5-9b9e-93773b08d8b9,task_custom_bart_info3,"{""totalEarnings"":0}",2022-10-24T07:04:42-04:00


In [130]:
df.isna().sum()

ParticipantIdentifier     0
ResultIdentifier          0
Answers                   2
EndDate                  15
dtype: int64

In [131]:
# Remove rows without valid EndDate value
df = df.dropna(subset=['EndDate']).reset_index(drop=True)

# Select relevant subjects
df = df.loc[df.ParticipantIdentifier.isin(subjects)].reset_index(drop=True)

In [132]:
# add trial date and time columns
for i in tqdm(range(df.shape[0])):
    dt = parser.parse(df.loc[i, 'EndDate'])
    df.loc[i, 'datetime'] = dt
    df.loc[i, 'trial_date'] = (dt + datetime.timedelta(hours = -4.75)).date() # trial day associated with sample (4:45am is when the day flips)
    df.loc[i, 'time'] = dt.time()

100%|██████████| 586408/586408 [01:04<00:00, 9097.80it/s]


In [133]:
# save to csv
if run_num ==1:
    # run 1
    df.to_csv(save_path + 'run1_survey_results.csv', index=False)
if run_num ==2:
    # run 2
    df.to_csv(save_path + 'run2_survey_results.csv', index=False)

# Gap App

## Self Report

### Affect

#### Select Data

In [70]:
df_affect = df.loc[df.ResultIdentifier.str.startswith('affect_')].reset_index(drop=True)
df_affect_am = df.loc[(df.ResultIdentifier.str.startswith('affect_')) & (df.ResultIdentifier.str.endswith('am'))].reset_index(drop=True)
df_affect_pm = df.loc[(df.ResultIdentifier.str.startswith('affect_')) & (~df.ResultIdentifier.str.endswith('am'))].reset_index(drop=True)

#### Convert to Wide

In [71]:
df_affect_pm_wide = df_affect_pm.pivot_table(index=["ParticipantIdentifier", "trial_date"], 
                    columns='ResultIdentifier', 
                    values='Answers').reset_index()
# get rid of name on index
df_affect_pm_wide = df_affect_pm_wide.rename_axis(None, axis=1)

df_affect_am_wide = df_affect_am.pivot_table(index=["ParticipantIdentifier", "trial_date"], 
                    columns='ResultIdentifier', 
                    values='Answers').reset_index()
# get rid of name on index
df_affect_am_wide = df_affect_am_wide.rename_axis(None, axis=1)

In [72]:
# join
df_daily_affect_wide = df_affect_pm_wide.merge(df_affect_am_wide, how='left', on=['ParticipantIdentifier', 'trial_date'])

df_daily_affect_wide.head(3)

Unnamed: 0,ParticipantIdentifier,trial_date,affect_neg_angry,affect_neg_ashamed,affect_neg_bored,affect_neg_depressed,affect_neg_embarrassed,affect_neg_frustrated,affect_neg_guilty,affect_neg_lazy,...,affect_neg_sad_am,affect_neg_stressed_am,affect_pos_amused_am,affect_pos_appreciated_am,affect_pos_excited_am,affect_pos_focused_am,affect_pos_happy_am,affect_pos_hopeful_am,affect_pos_motivated_am,affect_pos_relaxedCalm_am
0,0501ba67-3406-4779-aff1-878a0e9f7885,2022-09-30,1.0,4.0,5.0,4.0,1.0,3.0,4.0,4.0,...,,,,,,,,,,
1,0501ba67-3406-4779-aff1-878a0e9f7885,2022-10-01,1.0,2.0,1.0,1.0,1.0,3.0,3.0,3.0,...,2.0,2.0,1.0,3.0,4.0,4.0,4.0,5.0,4.0,3.0
2,0501ba67-3406-4779-aff1-878a0e9f7885,2022-10-02,3.0,1.0,4.0,5.0,1.0,4.0,2.0,5.0,...,,,,,,,,,,


In [73]:
# Create df with one participant ID for every trial_date

# Create a series of dates from '2022-09-27' to '2022-12-20'
date_series = pd.date_range(start='2022-09-27', end='2022-12-20')
ids_series = subjects

# Create a dataframe using a cartesian product of the two series
df_complete_idDate = pd.DataFrame({
    'ParticipantIdentifier': np.repeat(ids_series, len(date_series)),
    'trial_date': date_series.tolist() * len(ids_series)
}).reset_index(drop=True)

# Convert trial_date to datetime.date
df_complete_idDate['trial_date'] = pd.to_datetime(df_complete_idDate['trial_date']).dt.date

# Join with affect df
df_daily_affect_wide = df_complete_idDate.merge(df_daily_affect_wide, how='left', on=['ParticipantIdentifier', 'trial_date'])

#### EDA Profiling

In [52]:
profile = ProfileReport(df_daily_affect_wide.iloc[:,2:], title="Affect | Pandas Profiling Report")
profile.to_file(eda_reports_path + "affect_report.html")

Summarize dataset:   0%|          | 0/5 [00:00<?, ?it/s]

Summarize dataset: 100%|██████████| 450/450 [00:23<00:00, 19.24it/s, Completed]                                                   
Generate report structure: 100%|██████████| 1/1 [00:04<00:00,  4.49s/it]
Render HTML: 100%|██████████| 1/1 [00:03<00:00,  3.50s/it]
Export report to file: 100%|██████████| 1/1 [00:00<00:00, 37.65it/s]


#### Clean

In [74]:
# Number of instances where the cell value is out of range (greater than 5)
df_daily_affect_wide.iloc[:,2:][df_daily_affect_wide.iloc[:,2:] > 5].count().sum()

157

In [75]:
# Replace all values below threshold with NaN
df_daily_affect_wide.iloc[:,2:] = np.where(df_daily_affect_wide.iloc[:,2:]>5, np.nan, df_daily_affect_wide.iloc[:,2:])

In [76]:
# Recheck for instances above 5
df_daily_affect_wide.iloc[:,2:][df_daily_affect_wide.iloc[:,2:] > 5].count().sum()

0

In [86]:
# Rerun profiling
profile = ProfileReport(df_daily_affect_wide.iloc[:,2:],
                        title="Affect | Pandas Profiling Report",
                        infer_dtypes = False)
profile.to_file(eda_reports_path + "affect_report_clean.html")

SyntaxError: expression cannot contain assignment, perhaps you meant "=="? (982188103.py, line 4)

In [91]:
# Look at variance by subject
df_daily_affect_wide.drop(['trial_date'], axis=1).groupby('ParticipantIdentifier').var()

Unnamed: 0_level_0,affect_neg_angry,affect_neg_ashamed,affect_neg_bored,affect_neg_depressed,affect_neg_embarrassed,affect_neg_frustrated,affect_neg_guilty,affect_neg_lazy,affect_neg_lonelyIsolated,affect_neg_nervousAnxious,...,affect_neg_sad_am,affect_neg_stressed_am,affect_pos_amused_am,affect_pos_appreciated_am,affect_pos_excited_am,affect_pos_focused_am,affect_pos_happy_am,affect_pos_hopeful_am,affect_pos_motivated_am,affect_pos_relaxedCalm_am
ParticipantIdentifier,Unnamed: 1_level_1,Unnamed: 2_level_1,Unnamed: 3_level_1,Unnamed: 4_level_1,Unnamed: 5_level_1,Unnamed: 6_level_1,Unnamed: 7_level_1,Unnamed: 8_level_1,Unnamed: 9_level_1,Unnamed: 10_level_1,Unnamed: 11_level_1,Unnamed: 12_level_1,Unnamed: 13_level_1,Unnamed: 14_level_1,Unnamed: 15_level_1,Unnamed: 16_level_1,Unnamed: 17_level_1,Unnamed: 18_level_1,Unnamed: 19_level_1,Unnamed: 20_level_1,Unnamed: 21_level_1
0501ba67-3406-4779-aff1-878a0e9f7885,1.44531,0.536839,1.432976,1.872769,0.56248,1.899383,1.843557,1.639403,1.842584,1.668939,...,0.322511,1.670996,0.181818,1.898268,1.551948,1.78355,1.491342,2.036797,1.489177,1.064935
099765a5-a9c9-4fff-b297-a39eab517267,1.441176,1.437908,0.264706,0.104575,0.300654,2.029412,1.310458,0.470588,1.058824,0.970588,...,0.2,0.3,0.0,2.2,0.5,0.7,0.7,1.3,0.3,0.7
0ca43379-41b5-47fb-90ba-0a22e6bf5586,1.202198,0.568654,1.259259,1.444746,0.549232,1.256248,0.696176,1.194369,1.799006,0.985245,...,1.099415,1.953216,0.538012,0.783626,0.701754,1.426901,0.918129,0.508772,0.953216,0.608187
146e1ab8-c4ca-4a52-9261-e108b38eea53,0.899522,1.09501,1.313397,1.279563,0.864662,1.185236,0.962748,1.879016,1.343131,0.539303,...,0.3,0.566667,0.266667,1.466667,0.666667,0.3,0.4,0.566667,0.7,0.266667
147400db-43d9-4155-8bf2-b85b8adf4315,0.386111,0.424383,0.240123,0.284568,0.268827,0.399383,0.275,0.219444,0.379938,0.258642,...,0.333333,0.333333,0.333333,0.333333,0.333333,0.333333,0.333333,0.333333,0.333333,0.333333
...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...
ed16354a-961a-4e5e-83de-ce9a17b25a2f,1.741742,0.936937,1.51952,1.418919,1.62012,0.666667,1.108108,1.798799,2.033033,1.921922,...,1.647619,0.790476,0.0,0.128571,0.457143,1.061905,0.457143,0.361905,0.547619,0.933333
f889f1a4-9754-456e-ae08-092f992d3359,0.300654,0.222222,0.565359,0.264706,0.104575,1.624183,0.222222,2.565359,0.055556,1.751634,...,0.0,0.694444,0.5,0.194444,1.444444,0.527778,0.5,0.861111,1.111111,0.361111
fa996c7b-fd1c-4cba-990a-336f4cefaeb4,2.504615,1.218462,2.266154,2.344615,1.115385,2.726154,2.301538,1.458462,3.053846,1.501538,...,0.333333,4.0,3.0,0.25,3.333333,0.916667,0.25,1.0,0.333333,2.0
fed3a16b-c5a3-4a84-a111-32b7574e04b5,0.563234,0.785456,0.684884,0.667871,0.495182,0.796296,0.667269,0.601777,0.703403,0.618488,...,1.078788,0.773193,0.901865,0.840326,0.933333,0.950117,1.074126,1.043357,1.017249,0.775991


#### Save

In [None]:
# save to csv
if run_num ==1:
    # run 1
    df.to_csv(save_path + 'run1_affect.csv', index=False)
if run_num ==2:
    # run 2
    df.to_csv(save_path + 'run2_affect.csv', index=False)

### Daily General and Detail

#### Load Data

In [134]:
if run_num ==1:
    # run 1
    df = pd.read_csv(save_path + 'run1_survey_results.csv')
if run_num ==2:
    # run 2
    df = pd.read_csv(save_path + 'run2_survey_results.csv')

#### Select Data

In [135]:
past24_general = [
    'DAILY_survey_situation1_surveys',
    'DAILY_survey_situation2_surveys',
    'DAILY_survey_missed',
    'DAILY_past48to24_gap',
    'DAILY_past48to24_gapCause',
    'DAILY_past24_ideal',
    'DAILY_past24_satisfaction',
    'DAILY_past24_change',
    'DAILY_past24_productivity',
    'DAILY_past24_procrastination',
    'DAILY_past24_punctuality',
    'DAILY_past24_mentalEffort',
    'DAILY_past24_physicalEffort',
    'DAILY_past24_values',
    'DAILY_past24_gap',
    'DAILY_past24_gapCause',
    'DAILY_past24_illness',
    'DAILY_past24_fatigue',
    'DAILY_past24_unusualEvents'
]

past24_categories = [
    'DAILY_past24_sleep',
    'DAILY_past24_occupation',
    'DAILY_past24_nonoccupation',
    'DAILY_past24_exercise',
    'DAILY_past24_leisureSolo',
    'DAILY_past24_leisureSoloMental',
    'DAILY_past24_leisureSoloPhysical',
    'DAILY_past24_leisureNonSolo',
    'DAILY_past24_leisureNonSoloMental',
    'DAILY_past24_leisureNonSoloPhysical',
    'DAILY_past24_diet',
    'DAILY_past24_socialMedia',
    'DAILY_past24_drinks'
]

next24_categories = [
    'DAILY_next24_sleep',
    'DAILY_next24_occupation',
    'DAILY_next24_nonoccupation',
    'DAILY_next24_leisureSolo',
    'DAILY_next24_leisureNonSolo',
    'DAILY_next24_exercise',
    'DAILY_next24_socialMedia',
    'DAILY_next24_drinks',
    'DAILY_next24_diet'
]

if run_num == 1:
    specific_goals = [
        'DAILY_goal1_report',
        'DAILY_goal1_importance',
        'DAILY_goal1_consequences',
        'DAILY_goal1_motivationInternal',
        'DAILY_goal1_motivationExternal',
        'DAILY_goal1_confidence',
        'DAILY_goal1_effort',
        'DAILY_goal1_interaction_week1',
        'DAILY_goal1_interaction_week2',
        'DAILY_goal1_interaction_month1',
        'DAILY_goal1_interaction_month2',
        'DAILY_goal2_report',
        'DAILY_goal2_importance',
        'DAILY_goal2_consequences',
        'DAILY_goal2_motivationInternal',
        'DAILY_goal2_motivationExternal',
        'DAILY_goal2_confidence',
        'DAILY_goal2_effort',
        'DAILY_goal2_interaction_week1',
        'DAILY_goal2_interaction_week2',
        'DAILY_goal2_interaction_month1',
        'DAILY_goal2_interaction_month2',
        'DAILY_goal2_interaction_eachOther'
    ]

elif run_num == 2:
    specific_goals = [
        'DAILY_goal1_report',
        'DAILY_goal1_importance',
        'DAILY_goal1_consequences',
        'DAILY_goal1_motivationInternal',
        'DAILY_goal1_motivationExternal',
        'DAILY_goal1_confidence',
        'DAILY_goal1_effort',
        'DAILY_goal1_interaction_week',
        'DAILY_goal1_interaction_month',
        'DAILY_goal2_report',
        'DAILY_goal2_importance',
        'DAILY_goal2_consequences',
        'DAILY_goal2_motivationInternal',
        'DAILY_goal2_motivationExternal',
        'DAILY_goal2_confidence',
        'DAILY_goal2_effort',
        'DAILY_goal2_interaction_week',
        'DAILY_goal2_interaction_month',
        'DAILY_goal2_interaction_eachOther'
    ]

non_numeric_cols = [
    'DAILY_goal1_set',
    'DAILY_goal2_set',
    'ParticipantIdentifier', 
    'trial_date',
    'DAILY_next24_diet',
    'DAILY_past48to24_gapCause',
    'DAILY_survey_situation1_surveys',
    'DAILY_survey_situation2_surveys',
    'DAILY_survey_missed',
    'DAILY_past24_gapCause'
]

In [136]:
df_daily_sr = df.loc[df.ResultIdentifier.isin(past24_general + 
                                              past24_categories + 
                                              next24_categories + 
                                              specific_goals)].reset_index(drop=True)

In [137]:
df_daily_sr.head(3)

Unnamed: 0,ParticipantIdentifier,ResultIdentifier,Answers,EndDate,datetime,trial_date,time
0,90592e06-bcf6-4150-85b0-c5daf7e7569c,DAILY_survey_situation1_surveys,alone,2022-10-24T19:59:12-04:00,2022-10-24 19:59:12-04:00,2022-10-24,19:59:12
1,90592e06-bcf6-4150-85b0-c5daf7e7569c,DAILY_survey_missed,did_not_miss,2022-10-24T19:59:13-04:00,2022-10-24 19:59:13-04:00,2022-10-24,19:59:13
2,90592e06-bcf6-4150-85b0-c5daf7e7569c,DAILY_past24_ideal,13,2022-10-24T19:59:17-04:00,2022-10-24 19:59:17-04:00,2022-10-24,19:59:17


#### Convert to Wide

In [138]:
df_daily_sr_wide = df_daily_sr.pivot_table(index=["ParticipantIdentifier", "trial_date"],
                                           columns='ResultIdentifier',
                                           values='Answers',
                                           aggfunc=lambda x: ' '.join(x)).reset_index()
# get rid of name on index
df_daily_sr_wide = df_daily_sr_wide.rename_axis(None, axis=1)

In [139]:
# convert data to numeric where appropriate
df_daily_sr_wide.loc[:,df_daily_sr_wide.columns[~df_daily_sr_wide.columns.isin(non_numeric_cols)]] = df_daily_sr_wide.loc[:,df_daily_sr_wide.columns[~df_daily_sr_wide.columns.isin(non_numeric_cols)]].apply(pd.to_numeric, errors='coerce')

In [140]:
# Break gap cause into two columns
df_daily_sr_wide[['DAILY_past24_gapCause_internal', 'DAILY_past24_gapCause_external', 'drop_col']] = df_daily_sr_wide.DAILY_past24_gapCause.str.split("_", expand = True)
df_daily_sr_wide.drop(columns='drop_col', inplace=True)

# convert to numeric 0-1
cols = ['DAILY_past24_gapCause_internal', 'DAILY_past24_gapCause_external']
df_daily_sr_wide[cols] = df_daily_sr_wide[cols].apply(pd.to_numeric, errors = 'coerce')
df_daily_sr_wide[cols] = df_daily_sr_wide[cols]/100

In [141]:
# REPEAT FOR MISSED DAY DATA
# Break gap cause into two columns
df_daily_sr_wide[['DAILY_past48to24_gapCause_internal', 'DAILY_past48to24_gapCause_external']] = df_daily_sr_wide.DAILY_past48to24_gapCause.str.split("_", expand = True)

# convert to numeric 0-1
cols = ['DAILY_past48to24_gapCause_internal', 'DAILY_past48to24_gapCause_external']
df_daily_sr_wide[cols] = df_daily_sr_wide[cols].apply(pd.to_numeric, errors = 'coerce')
df_daily_sr_wide[cols] = df_daily_sr_wide[cols]/100

In [142]:
df_daily_sr_wide.head()

Unnamed: 0,ParticipantIdentifier,trial_date,DAILY_goal1_confidence,DAILY_goal1_consequences,DAILY_goal1_effort,DAILY_goal1_importance,DAILY_goal1_interaction_month,DAILY_goal1_interaction_week,DAILY_goal1_motivationExternal,DAILY_goal1_motivationInternal,...,DAILY_past24_values,DAILY_past48to24_gap,DAILY_past48to24_gapCause,DAILY_survey_missed,DAILY_survey_situation1_surveys,DAILY_survey_situation2_surveys,DAILY_past24_gapCause_internal,DAILY_past24_gapCause_external,DAILY_past48to24_gapCause_internal,DAILY_past48to24_gapCause_external
0,0501ba67-3406-4779-aff1-878a0e9f7885,2022-09-30,4.0,3.0,6.0,5.0,,,7.0,6.0,...,,,,did_not_miss,with_friend,False,0.9,0.1,,
1,0501ba67-3406-4779-aff1-878a0e9f7885,2022-10-01,5.0,4.0,7.0,7.0,,,6.0,7.0,...,,60.0,80_20,missed_busy,alone,,0.3,0.7,0.8,0.2
2,0501ba67-3406-4779-aff1-878a0e9f7885,2022-10-02,7.0,5.0,4.0,7.0,,,7.0,5.0,...,,,,did_not_miss,with_friend,False,1.0,0.0,,
3,0501ba67-3406-4779-aff1-878a0e9f7885,2022-10-03,5.0,7.0,7.0,7.0,,,7.0,4.0,...,,76.0,100_0,missed_feels,with_friend,False,1.0,0.0,1.0,0.0
4,0501ba67-3406-4779-aff1-878a0e9f7885,2022-10-04,5.0,7.0,7.0,7.0,,,7.0,7.0,...,,88.0,100_0,missed_busy,with_friend,False,1.0,0.0,1.0,0.0


#### Category Gap Calculation

<div class="alert alert-block alert-info">
<b>📝 Note:</b><br>
    I am assuming that peoples' goals are directional in a way that MAY NOT BE ACCURATE for everyone.<br><br>
    For example, I am assuming that people want to sleep more and drink less - in other words they have a <b>gap</b> if they have <b>more</b> drinks than planned, but for <b>sleep</b> the gap calculation is reversed since we assume a gap means that you had <b>fewer</b> hours of sleep than planned.<br><br>
    While this may be accurate <i>in general</i> I would reasonably expect there to be exceptions.
</div>

In [143]:
# calculate diet gap
df_daily_sr_wide['DAILY_gap_diet'] = 100 - df_daily_sr_wide.DAILY_past24_diet

In [144]:
# take the predicted amount from the day before and subtract the actual amount...
for i in range(df_daily_sr_wide.shape[0]-1):
    df_daily_sr_wide.loc[i+1, 'DAILY_gap_sleep'] =  df_daily_sr_wide.loc[i, 'DAILY_next24_sleep'] - df_daily_sr_wide.loc[i+1, 'DAILY_past24_sleep']
    df_daily_sr_wide.loc[i+1, 'DAILY_gap_occupation'] =  df_daily_sr_wide.loc[i, 'DAILY_next24_occupation'] - df_daily_sr_wide.loc[i+1, 'DAILY_past24_occupation']    
    df_daily_sr_wide.loc[i+1, 'DAILY_gap_nonoccupation'] =  df_daily_sr_wide.loc[i, 'DAILY_next24_nonoccupation'] - df_daily_sr_wide.loc[i+1, 'DAILY_past24_nonoccupation']    
    df_daily_sr_wide.loc[i+1, 'DAILY_gap_leisureSolo'] =  df_daily_sr_wide.loc[i+1, 'DAILY_past24_leisureSolo'] - df_daily_sr_wide.loc[i, 'DAILY_next24_leisureSolo'] # reversed
    df_daily_sr_wide.loc[i+1, 'DAILY_gap_leisureNonSolo'] =  df_daily_sr_wide.loc[i+1, 'DAILY_past24_leisureNonSolo']  - df_daily_sr_wide.loc[i, 'DAILY_next24_leisureNonSolo'] # reversed
    df_daily_sr_wide.loc[i+1, 'DAILY_gap_exercise'] =  df_daily_sr_wide.loc[i, 'DAILY_next24_exercise'] - df_daily_sr_wide.loc[i+1, 'DAILY_past24_exercise']    
    df_daily_sr_wide.loc[i+1, 'DAILY_gap_socialMedia'] =  df_daily_sr_wide.loc[i+1, 'DAILY_past24_socialMedia'] - df_daily_sr_wide.loc[i, 'DAILY_next24_socialMedia'] # reversed 
    df_daily_sr_wide.loc[i+1, 'DAILY_gap_drinks'] =  df_daily_sr_wide.loc[i+1, 'DAILY_past24_drinks'] - df_daily_sr_wide.loc[i, 'DAILY_next24_drinks'] # reversed
    

In [158]:
# Join with complete ID/Date

# Convert trial_date to datetime.date
df_daily_sr_wide['trial_date'] = pd.to_datetime(df_daily_sr_wide['trial_date']).dt.date

# Join
df_daily_sr_wide = df_complete_idDate.merge(df_daily_sr_wide, how='left', on=['ParticipantIdentifier', 'trial_date'])

#### EDA Profiling

In [159]:
profile = ProfileReport(df_daily_sr_wide.iloc[:,2:], title="Daily Reports | Pandas Profiling Report")
profile.to_file(eda_reports_path + "daily_reports.html")

Summarize dataset: 100%|██████████| 4571/4571 [03:58<00:00, 19.15it/s, Completed]                                                                       
Generate report structure: 100%|██████████| 1/1 [00:08<00:00,  8.64s/it]
Render HTML: 100%|██████████| 1/1 [00:38<00:00, 38.26s/it]
Export report to file: 100%|██████████| 1/1 [00:00<00:00,  2.65it/s]
