# Summarizing the Data
---------------------------------------
- Heart Rate Data
- Audio Exposure Data
- Step Data

In [1]:
# import dependencies
import pandas as pd
from datetime import date, datetime, timedelta, time
import numpy as np

### Heart Rate

In [2]:
# load the data
file_path = '../Resources/hr_data.csv'
hr_df = pd.read_csv(file_path)
hr_df.head()

Unnamed: 0,date_created,time_created,values
0,2015-12-25,14:06:03,65.0
1,2015-12-25,14:25:52,107.0
2,2015-12-25,14:33:08,67.0
3,2015-12-25,14:43:08,97.0
4,2015-12-25,14:52:27,61.0


In [3]:
# create bins for the hr values
ranges = []

# values from American Heart Association

# high > 100
# low < 60
# normal 60-100
for i in hr_df['values']:
    if i > 195:
        ranges.append('danger')
    elif i > 125 and i < 195:
        ranges.append('high')
    elif i < 65:
        ranges.append('low')
    else:
        ranges.append('normal')

# add bin into the df
hr_df.insert(3, 'ranges', ranges)

In [4]:
# create bins for each time range while at work
periods = []

# bins for each time range
def time_in_range(start_time, end_time, creation_time):
    return start_time.strftime('%H:%M:%S') <= creation_time.strftime('%H:%M:%S') <= end_time.strftime('%H:%M:%S')

# the start and end times of my day
# before first class
start_time_1 = time(6, 45, 0)
end_time_1 = time(9, 25, 0)
# 5th Grade
start_time_2 = time(9, 30, 0)
end_time_2 = time(10, 15, 0)
# 4th Grade
start_time_3 = time(10, 20, 0)
end_time_3 = time(11, 5, 0)
# lunch
start_time_4 = time(11, 5, 0)
end_time_4 = time(11, 35, 0)
# kinder
start_time_5 = time(11, 35, 0)
end_time_5 = time(12, 20, 0)
# 1st grade
start_time_6 = time(12, 25, 0)
end_time_6 = time(13, 10, 0)
# 3rd grade
start_time_7 = time(13, 15, 0)
end_time_7 = time(14, 0, 0)
# 2nd grade
start_time_8 = time(14, 5, 0)
end_time_8 = time(14, 50, 0)
# dismissal
start_time_9 = time(14, 50, 0)
end_time_9 = time(16, 0, 0)

In [5]:
# loop through the data and create the bin for time at work
for i in hr_df['time_created']:
    creation_time = i
    time_dt = datetime.strptime(creation_time, '%H:%M:%S')
    time_dt.strftime('%H:%M:%S')
    if time_in_range(start_time_1, end_time_1, time_dt) == True:
        periods.append(1)
    elif time_in_range(start_time_2, end_time_2, time_dt) == True:
        periods.append(2)
    elif time_in_range(start_time_3, end_time_3, time_dt) == True:
        periods.append(3)
    elif time_in_range(start_time_4, end_time_4, time_dt) == True:
        periods.append(4)
    elif time_in_range(start_time_5, end_time_5, time_dt) == True:
        periods.append(5)
    elif time_in_range(start_time_6, end_time_6, time_dt) == True:
        periods.append(6)
    elif time_in_range(start_time_7, end_time_7, time_dt) == True:
        periods.append(7)
    elif time_in_range(start_time_8, end_time_8, time_dt) == True:
        periods.append(8)
    elif time_in_range(start_time_9, end_time_9, time_dt) == True:
        periods.append(9)
    else:
        periods.append(0)
        
# add bin into df
hr_df.insert(4, 'day_sections', periods)

In [6]:
hr_df.head()

Unnamed: 0,date_created,time_created,values,ranges,day_sections
0,2015-12-25,14:06:03,65.0,normal,8
1,2015-12-25,14:25:52,107.0,normal,8
2,2015-12-25,14:33:08,67.0,normal,8
3,2015-12-25,14:43:08,97.0,normal,8
4,2015-12-25,14:52:27,61.0,low,9


In [7]:
# determine the range of days
# we want to find all the days of the week while I was working
def date_in_range(start, end, day):
    return start <= day <= end

# the start and end dates
start = datetime(2022, 3, 28)
end = datetime(2022, 6, 9)

In [11]:
date_creation = []
time_creation = []
hr_value = []
hr_bin = []
time_bin = []
day_week = []

# grabbing all the rows for the selected time range
for idx, row in hr_df.iterrows():
    day = row['date_created']
    day_dt = datetime.strptime(day, '%Y-%m-%d')
    if date_in_range(start, end, day_dt) == True:
        day_week.append(datetime.weekday(day_dt))
        date_creation.append(day)
        time_creation.append(row['time_created'])
        hr_value.append(row['values'])
        hr_bin.append(row['ranges'])
        time_bin.append(row['day_sections'])

school_hr_dict = {
    'date_created': date_creation,
    'time_created': time_creation,
    'values': hr_value,
    'ranges': hr_bin,
    'day_sections': time_bin,
    'day_of_week': day_week
}

school_hr_df = pd.DataFrame(school_hr_dict)

# removing all the weekend days
school_hr_df = school_hr_df[school_hr_df['day_of_week'] < 5]
school_hr_df.to_csv('../Resources/work_hr.csv', index =False)

In [10]:
school_hr_df.head()

Unnamed: 0,date_created,time_created,values,ranges,day_sections,day_of_week
0,2022-03-28,18:44:22,96.0,normal,0,0
1,2022-03-28,18:45:04,105.0,normal,0,0
2,2022-03-28,18:45:09,98.0,normal,0,0
3,2022-03-28,18:45:14,100.0,normal,0,0
4,2022-03-28,18:45:19,100.0,normal,0,0


### Audio Exposure

In [12]:
# load the data
file_path = '../Resources/sound_data.csv'
sound_df = pd.read_csv(file_path)
sound_df.head()

Unnamed: 0,date_created,time_created,values
0,2021-03-09,15:14:42,60.6016
1,2021-03-09,15:38:53,66.6931
2,2021-03-09,16:05:54,57.5056
3,2021-03-09,16:39:09,71.8202
4,2021-03-09,17:09:26,69.3809


In [13]:
# create bins for the sound values
level = []

# values from World Health Organization

# safe < 80
# 80 > loud > 100
# danger > 100
for i in sound_df['values']:
    if i > 100:
        level.append('danger')
    elif i > 80 and i < 100:
        level.append('loud')
    else:
        level.append('safe')

# add bin into the df
sound_df.insert(3, 'level', level)

In [17]:
periods = []

# loop through the data and create the bin for time at work
for i in sound_df['time_created']:
    creation_time = i
    time_dt = datetime.strptime(creation_time, '%H:%M:%S')
    time_dt.strftime('%H:%M:%S')
    if time_in_range(start_time_1, end_time_1, time_dt) == True:
        periods.append(1)
    elif time_in_range(start_time_2, end_time_2, time_dt) == True:
        periods.append(2)
    elif time_in_range(start_time_3, end_time_3, time_dt) == True:
        periods.append(3)
    elif time_in_range(start_time_4, end_time_4, time_dt) == True:
        periods.append(4)
    elif time_in_range(start_time_5, end_time_5, time_dt) == True:
        periods.append(5)
    elif time_in_range(start_time_6, end_time_6, time_dt) == True:
        periods.append(6)
    elif time_in_range(start_time_7, end_time_7, time_dt) == True:
        periods.append(7)
    elif time_in_range(start_time_8, end_time_8, time_dt) == True:
        periods.append(8)
    elif time_in_range(start_time_9, end_time_9, time_dt) == True:
        periods.append(9)
    else:
        periods.append(0)
        
# add bin into df
sound_df.insert(4, 'day_sections', periods)

In [18]:
sound_df.head()

Unnamed: 0,date_created,time_created,values,level,day_sections
0,2021-03-09,15:14:42,60.6016,safe,9
1,2021-03-09,15:38:53,66.6931,safe,9
2,2021-03-09,16:05:54,57.5056,safe,0
3,2021-03-09,16:39:09,71.8202,safe,0
4,2021-03-09,17:09:26,69.3809,safe,0


In [21]:
date_creation = []
time_creation = []
sound_value = []
sound_bin = []
time_bin = []
day_week = []

# grabbing all the rows for the selected time range
for idx, row in sound_df.iterrows():
    day = row['date_created']
    day_dt = datetime.strptime(day, '%Y-%m-%d')
    if date_in_range(start, end, day_dt) == True:
        day_week.append(datetime.weekday(day_dt))
        date_creation.append(day)
        time_creation.append(row['time_created'])
        sound_value.append(row['values'])
        sound_bin.append(row['level'])
        time_bin.append(row['day_sections'])

school_sound_dict = {
    'date_created': date_creation,
    'time_created': time_creation,
    'values': sound_value,
    'level': sound_bin,
    'day_sections': time_bin,
    'day_of_week': day_week
}

school_sound_df = pd.DataFrame(school_sound_dict)

# removing all the weekend days
school_sound_df = school_sound_df[school_sound_df['day_of_week'] < 5]
school_sound_df.to_csv('../Resources/work_sound.csv', index =False)

### Steps

In [None]:
# load the data
file_path = '../Resources/step_data.csv'
step_df = pd.read_csv(file_path)
step_df.head()

In [None]:
# loop through the data and create the bin for time at work
for i in step_df['time_created']:
    creation_time = i
    time_dt = datetime.strptime(creation_time, '%H:%M:%S')
    time_dt.strftime('%H:%M:%S')
    if time_in_range(start_time_1, end_time_1, time_dt) == True:
        periods.append(1)
    elif time_in_range(start_time_2, end_time_2, time_dt) == True:
        periods.append(2)
    elif time_in_range(start_time_3, end_time_3, time_dt) == True:
        periods.append(3)
    elif time_in_range(start_time_4, end_time_4, time_dt) == True:
        periods.append(4)
    elif time_in_range(start_time_5, end_time_5, time_dt) == True:
        periods.append(5)
    elif time_in_range(start_time_6, end_time_6, time_dt) == True:
        periods.append(6)
    elif time_in_range(start_time_7, end_time_7, time_dt) == True:
        periods.append(7)
    elif time_in_range(start_time_8, end_time_8, time_dt) == True:
        periods.append(8)
    elif time_in_range(start_time_9, end_time_9, time_dt) == True:
        periods.append(9)
    else:
        periods.append(0)
        
# add bin into df
step_df.insert(3, 'day_sections', periods)

In [None]:
step_df.head()