In [162]:
import pandas as pd
import matplotlib.pyplot as plt
import numpy as np
from matplotlib.dates import DateFormatter
from IPython.display import display
from datetime import datetime
# Load the PANAS data from the csv file

dateparse = lambda x: datetime.strptime(x, "%m/%d/%Y %H:%M:%S")

#load dataframe from csv
df = pd.read_csv("../data/panas_data.csv",parse_dates=['Timestamp'], date_parser=dateparse)

variable_dict = {
'Timestamp':'TIMESTAMP',
'Pair':'PAIR',
'Identifier': 'ID',
'Time of Day': 'WHEN_RECORDED',
'How many hours did you sleep last night? (beginning of day only)':	'SLEEP',
'Upset':'UPSET',
'Hostile':'HOSTILE',
'Alert':'ALERT',
'Ashamed':'ASHAMED',
'Inspired': 'INSPIRED',
'Nervous':'NERVOUS',
'Determined':'DETERMINED',
'Attentive':'ATTENTIVE',
'Afraid':'AFRAID',
'Active':'ACTIVE',
'Any comments / thoughts?':'COMMENTS',
'Positive':'POSITIVE_SCORE',
'Negative':'NEGATIVE_SCORE',
'Difference':'SCORE_DIFFERENCE'}

df_clean = df.rename(columns=variable_dict)
df_clean.dropna(how='all', axis=1, inplace=True)

df_clean = df_clean.loc[(~df_clean['PAIR'].isnull())]
df_clean = df_clean.sort_values(['PAIR','TIMESTAMP'])


# Descriptive Statistics

## Daily Emotion

In [163]:
### Number of Daily emotion records


In [164]:

unique_pairs = df_clean.PAIR.nunique()
print("total daily emotion records: ", unique_pairs)

total daily emotion records:  18


### Daily emotion scores

In [165]:

# count number of times day started positive
#df.loc[(df['col1'] == value) & (df['col2'] < value)]
pos_start_day = len(df_clean.loc[(df_clean.SCORE_DIFFERENCE > 0) & (df_clean.WHEN_RECORDED == 'Beginning of Day')])
neg_start_day = unique_pairs - pos_start_day
print("total positive morning: ", pos_start_day)
print("total negative morning: ", neg_start_day)

pos_end_day = len(df_clean.loc[(df_clean.SCORE_DIFFERENCE > 0) & (df_clean.WHEN_RECORDED == 'End of Day')])
neg_end_day = unique_pairs - pos_end_day
print("total positive night: ", pos_end_day)
print("total negative night: ", neg_end_day)

#print(df_clean.sort_values(['PAIR','TIMESTAMP']))
#print(df_clean[['TIMESTAMP','SCORE_DIFFERENCE']].diff())




total positive morning:  14
total negative morning:  4
total positive night:  12
total negative night:  6


### Differences in consecutive PANAS ratings

In [166]:
#now look at differences between start and end of day
df_clean['DELTA_PREV_DIFFERENCE'] = df_clean[['TIMESTAMP','SCORE_DIFFERENCE']].diff()['SCORE_DIFFERENCE']
#print(df_clean[['PAIR','WHEN_RECORDED','SCORE_DIFFERENCE','DELTA_PREV_DIFFERENCE']])
print('#Participants whose end of day total emotion had reduced since start of day: ', len(df_clean.loc[(df_clean.WHEN_RECORDED =='End of Day') & (df_clean.DELTA_PREV_DIFFERENCE <0)]))

# and differences from start of day and end of previous night
print('#Participants whose start of day total emotion had increased since end of previous day: ', len(df_clean.loc[(df_clean.WHEN_RECORDED =='Beginning of Day') & (df_clean.DELTA_PREV_DIFFERENCE >0)]))



#Participants whose end of day total emotion had reduced since start of day:  14
#Participants whose start of day total emotion had increased since end of previous day:  12


### Sleep distribution

In [167]:

print("Sleep distribution: avg {}, std {}".format(str(df_clean.SLEEP.mean()), str(df_clean.SLEEP.std())))


Sleep distribution: avg 6.87, std 0.8788829809011249


In [168]:
# save the cleaned up data along with the emotion differences
df_clean.to_csv('../clean_daily_emotion.csv', index=False)

## Experience Sample Data

In [169]:
# now analyse the experience sample data
df_exp = pd.read_csv("../data/experience_sample_data.csv",parse_dates=['Timestamp'], date_parser=dateparse)

exp_variable_dict = {'Timestamp':'TIMESTAMP','Personal ID':'ID','Before-After break pairings':'PAIR','Before / After a break':'RELATION_BEAK','Duration of break activity (MINUTES)':'BREAK_DURATION', 'Current Perceived Stress Level':"PERCEIVED_STRESS",'Current Actual Stress Level':'GARMIN_STRESS','Activity Prior to Break Activity':'ACTIVITY_PRIOR_BREAK','Details of Break Activity':'BREAK_DETAILS','Was the break physically active?':'BREAK_PHYSICAL','Was the break indoors or outdoors?':'BREAK_LOCATION','Was the break alone or social?':'BREAK_SOCIAL','If it involved an electronic device, provide details':'BREAK_ELECTRONIC_DEVICE','Do you think the break activity helped?':'BREAK_HELPED','Date and Time of activity':'ACTUAL_ACTIVITY_TIME','Arousal/Valence [Aroused 2]':'AROUSAL_2','Arousal/Valence [Aroused 1]':'AROUSAL_1','Arousal/Valence [Aroused 0]':'AROUSAL_0','Arousal/Valence [Aroused -1]':'AROUSAL_-1','Arousal/Valence [Arousal_-2]':'AROUSAL_-2','Adjectives / feelings':'FEELINGS','Why are you taking a break or why did you take a break?':'BREAK_REASON'}

# update columns names and remove the empty columns
df_clean_exp = df_exp.rename(columns=exp_variable_dict)
df_clean_exp.dropna(how='all', axis=1, inplace=True)



### Unique before/after reports

In [170]:
unique_exp_pairs = df_clean_exp.PAIR.nunique()
print("unique pairs of before/after reports: ", unique_exp_pairs)


unique pairs of before/after reports:  48


### Break details

In [171]:
# avg break duration
print("Break duration distribution: avg {}, std {}".format(str(df_clean_exp.BREAK_DURATION.mean()), str(df_clean_exp.BREAK_DURATION.std())))

Break duration distribution: avg 43.29824561403509, std 30.568988085663772


In [172]:

print(df_clean_exp.loc[(df_clean_exp.RELATION_BEAK =='After break')].groupby(['BREAK_PHYSICAL'])['BREAK_PHYSICAL'].count())

BREAK_PHYSICAL
No         24
Yes        15
Yes, No     1
Name: BREAK_PHYSICAL, dtype: int64


In [173]:

print(df_clean_exp.loc[(df_clean_exp.RELATION_BEAK =='After break')].groupby(['BREAK_SOCIAL'])['BREAK_SOCIAL'].count())

BREAK_SOCIAL
Alone                 30
Alone, With others     4
With others            7
Name: BREAK_SOCIAL, dtype: int64


In [174]:

print(df_clean_exp.loc[(df_clean_exp.RELATION_BEAK =='After break')].groupby(['BREAK_ELECTRONIC_DEVICE'])['BREAK_ELECTRONIC_DEVICE'].count())

BREAK_ELECTRONIC_DEVICE
Phone       20
Tablet       2
computer     8
Name: BREAK_ELECTRONIC_DEVICE, dtype: int64


In [175]:

print(df_clean_exp.loc[(df_clean_exp.RELATION_BEAK =='After break')].groupby(['BREAK_PHYSICAL','BREAK_SOCIAL','BREAK_ELECTRONIC_DEVICE'])['BREAK_ELECTRONIC_DEVICE'].count())


#print(df_clean_exp.loc[(df_clean_exp.RELATION_BEAK =='After break')].groupby(['BREAK_HELPED'])['BREAK_HELPED'].count())


BREAK_PHYSICAL  BREAK_SOCIAL        BREAK_ELECTRONIC_DEVICE
No              Alone               Phone                      13
                                    computer                    5
                Alone, With others  Phone                       3
                With others         Phone                       1
                                    computer                    1
Yes             Alone               Phone                       3
                                    Tablet                      2
                                    computer                    1
Yes, No         Alone               computer                    1
Name: BREAK_ELECTRONIC_DEVICE, dtype: int64


### Reported and Perceived Stress Levels

In [176]:

print(df_clean_exp.groupby(['PERCEIVED_STRESS','GARMIN_STRESS'])['PERCEIVED_STRESS'].count())


PERCEIVED_STRESS  GARMIN_STRESS   
High              High (76 - 100)      1
                  Rest (0 - 25)        2
Low               High (76 - 100)      7
                  Low (26 - 50)       22
                  Medium (51 - 75)     8
                  Rest (0 - 25)       15
Medium            High (76 - 100)      4
                  Low (26 - 50)       14
                  Medium (51 - 75)     6
                  Rest (0 - 25)        6
Rest              Low (26 - 50)        1
                  Medium (51 - 75)     1
                  Rest (0 - 25)        7
Name: PERCEIVED_STRESS, dtype: int64


### Number breaks reports per day

In [177]:
# number breaks reported per day
print(df_clean_exp.groupby(pd.Grouper(key="TIMESTAMP", freq="D"))['TIMESTAMP'].count())

TIMESTAMP
2022-05-13     4
2022-05-14     2
2022-05-15     4
2022-05-16    18
2022-05-17    12
2022-05-18    16
2022-05-19     8
2022-05-20     6
2022-05-21     4
2022-05-22     0
2022-05-23    16
2022-05-24     4
Freq: D, Name: TIMESTAMP, dtype: int64


### Number breaks reports per person

In [178]:


print(df_clean_exp.loc[(df_clean_exp.RELATION_BEAK =='After break')].groupby(['ID'])['ID'].count())




ID
Cameron     7
Jason       7
Katie      16
VJ          8
nadia       8
nadia       1
Name: ID, dtype: int64
