# Imports

### Libraries

In [1]:
%matplotlib inline
import numpy as np # linear algebra
import pandas as pd # data processing, CSV file I/O (e.g. pd.read_csv)
import os
from glob import glob
import json
import matplotlib.pyplot as plt
from matplotlib.pyplot import figure
import matplotlib.dates as dates_
import seaborn as sns
from pyowm.owm import OWM
from pyowm.utils import timestamps, formatting
from datetime import datetime, timedelta, timezone, date

### Data

##### Split dataframes into dictionaries by day

In [79]:
def df_by_day(df, date_col_name="start_time"):
    # Get all available days
    days = df[date_col_name].dt.to_period("D").unique()

    #create a DataFrame dictionary to store your data frames
    df_by_day = {elem : pd.DataFrame for elem in days}

    for key in df_by_day.keys():
        df_by_day[key] = df[:][df[date_col_name].dt.to_period("D") == key]
    
    return df_by_day, days

##### Heart Beat Data

In [10]:
# Find all json files
samsung_base_dir = os.path.join('data', 'Samsung Data', '04.12.2021')

json_file_names = []
for file in os.listdir(os.path.join(samsung_base_dir, 'jsons', 'com.samsung.health.heart_rate')):
    if file != '.ipynb_checkpoints':
        json_file_names.append(file)

df_json_readings = []
for i in range(len(json_file_names)):
    file_name = json_file_names[i]
    df = pd.read_json(os.path.join(samsung_base_dir,'jsons', 'com.samsung.health.heart_rate', file_name))
    df_json_readings.append(df.copy())

df_heart_rate_json = pd.concat(df_json_readings, axis=0, ignore_index=True)
df_heart_rate_json = df_heart_rate_json.sort_values(by=['start_time'], ascending=True, ignore_index=True)

###### Create dictionnary with data per day

In [26]:
HR_by_day, HR_day_keys = df_by_day(df_heart_rate_json)

In [27]:
HR_by_day[HR_day_keys[0]]

Unnamed: 0,heart_rate,heart_rate_max,heart_rate_min,start_time,end_time
0,80,82,79,2021-11-28 12:40:00,2021-11-28 12:40:59
1,83,87,78,2021-11-28 12:41:00,2021-11-28 12:41:59
2,82,85,80,2021-11-28 12:42:00,2021-11-28 12:42:59
3,83,86,80,2021-11-28 12:43:00,2021-11-28 12:43:59
4,80,82,77,2021-11-28 12:44:00,2021-11-28 12:44:59
...,...,...,...,...,...
538,68,74,63,2021-11-28 22:36:00,2021-11-28 22:36:59
539,75,78,72,2021-11-28 22:37:00,2021-11-28 22:37:59
540,77,90,72,2021-11-28 22:38:00,2021-11-28 22:38:59
541,94,97,89,2021-11-28 22:39:00,2021-11-28 22:39:59


##### Calendar Data

In [17]:
# Load CSV file
cal_base_dir = os.path.join('data', 'Calendar Data')
df_calendar = pd.read_csv(os.path.join('data', 'Calendar Data', '05.12.2021.csv'), sep='\t')

# Rename columns with simpler names
df_calendar = df_calendar.rename(columns={'Given planned earliest start' : 'start_time', 'Given planned earliest end' : 'end_time'})
df_calendar = df_calendar.drop(['Notes', 'Assigned Resources', 'Additional Title'], axis=1)

# Convert date columns into date format
df_calendar['start_time'] = pd.to_datetime(df_calendar['start_time'], dayfirst=True)
df_calendar['end_time'] = pd.to_datetime(df_calendar['end_time'], dayfirst=True)

# Sort Dataframe by date
df_calendar = df_calendar.sort_values(by=['start_time'], ascending=True, ignore_index=True)

In [82]:
cal_by_day, cal_day_keys = df_by_day(df_calendar)

##### Weather Data
We can go back up to 5 days ago, but unfortunately not more than that.

In [51]:
APIKEY = '12c406b33039d61c116a70eefb3526bb' #your API Key here as string
OpenWMap = OWM(APIKEY) # Use API key to get data
mgr = OpenWMap.weather_manager()
lausanne_lat = 46.5196535
lausanne_lon = 6.6322734

# Query current data
observation_cur = mgr.weather_at_place('Lausanne') # give where you need to see the weather
weather_cur = observation_cur.weather

# Query yesterday data
yesterday_epoch = formatting.to_UNIXtime(timestamps.yesterday())
one_call_yesterday = mgr.one_call_history(lat=lausanne_lat, lon=lausanne_lon, dt=yesterday_epoch)
weather_yesterday = one_call_yesterday.forecast_hourly

# Query 4 days ago data
nb_of_days = 1
three_days_ago_epoch = int((datetime.now() - timedelta(days=nb_of_days)).replace(tzinfo=timezone.utc).timestamp())
one_call_four_days_ago = mgr.one_call_history(lat=52.5244, lon=13.4105, dt=three_days_ago_epoch)
weather_four_days_ago = one_call_four_days_ago.forecast_hourly

In [52]:
# Iterate over each hour of the day
for i in range(len(weather_four_days_ago)):
    print(weather_four_days_ago[i].detailed_status)
    print(weather_four_days_ago[i].temperature('celsius'))

light snow
{'temp': 1.19, 'feels_like': -3.85}
light snow
{'temp': 1.45, 'feels_like': -3.73}
light rain
{'temp': 1.45, 'feels_like': -3.93}
light rain
{'temp': 0.94, 'feels_like': -3.29}
overcast clouds
{'temp': 1.01, 'feels_like': -2.93}
light intensity drizzle
{'temp': 1.71, 'feels_like': -2.2}
light snow
{'temp': 1.64, 'feels_like': -1.98}
light snow
{'temp': 1.56, 'feels_like': -2.66}
broken clouds
{'temp': 1.68, 'feels_like': -1.93}
broken clouds
{'temp': 1.68, 'feels_like': -1.93}
overcast clouds
{'temp': 1.44, 'feels_like': -1.74}
broken clouds
{'temp': 2.21, 'feels_like': -1.86}
broken clouds
{'temp': 2.33, 'feels_like': -1.71}
broken clouds
{'temp': 2.62, 'feels_like': -0.46}
overcast clouds
{'temp': 2.49, 'feels_like': -0.61}
overcast clouds
{'temp': 2.3, 'feels_like': -1.18}
overcast clouds
{'temp': 2.17, 'feels_like': -1.63}
overcast clouds
{'temp': 1.87, 'feels_like': -1.7}
broken clouds
{'temp': 1.76, 'feels_like': -2.67}
broken clouds
{'temp': 1.84, 'feels_like': -2.04}

##### Step Count Data

In [77]:
# Find all json files
samsung_base_dir = os.path.join('data', 'Samsung Data', '04.12.2021')

# Finding step count file
filename = ''
for file in os.listdir(samsung_base_dir):
    if file.startswith('com.samsung.health.step_count'):
        filename = file


# Importing data
cols_names = ["speed", "pkg_name", "count", "sample_position_type", "time_offset", "start_time", "calorie", 
              "deviceuuid", "custom", "end_time", "datauuid", "distance", "create_time", "update_time"]
df_step_count = pd.read_csv(os.path.join(samsung_base_dir, filename), skiprows=1,names=cols_names)

# Cleaning dataset
cols_to_drop = ["pkg_name", "sample_position_type", "time_offset", "deviceuuid", "custom", "datauuid", "create_time", "update_time"]
df_step_count = df_step_count[1:]
df_step_count = df_step_count.drop(cols_to_drop, axis=1)

# Convert date columns into date format
df_step_count['start_time'] = df_step_count['start_time'].replace('à', 'at', regex=True)

df_step_count['end_time'] = df_step_count['start_time'].replace('à', 'at', regex=True)
df_step_count['start_time'] = pd.to_datetime(df_step_count['start_time'], dayfirst=True)
df_step_count['end_time'] = pd.to_datetime(df_step_count['end_time'], dayfirst=True)


In [80]:
step_by_day, step_day_keys = df_by_day(df_step_count)

In [81]:
step_by_day[step_day_keys[0]]

Unnamed: 0,speed,count,start_time,calorie,end_time,distance
1,1.388889,16,2021-11-28 13:50:00,0.57,2021-11-28 13:50:00,11.51
2,1.497405,22,2021-11-28 13:51:00,0.73,2021-11-28 13:51:00,17.09
3,1.277778,17,2021-11-28 13:56:00,0.59,2021-11-28 13:56:00,12.44
4,2.027778,19,2021-11-28 14:06:00,0.66,2021-11-28 14:06:00,15.37
5,1.277778,15,2021-11-28 14:11:00,0.52,2021-11-28 14:11:00,11.12
6,1.527778,59,2021-11-28 14:12:00,2.05,2021-11-28 14:12:00,45.69
7,1.764286,14,2021-11-28 14:12:00,0.457786,2021-11-28 14:12:00,12.35
8,0.403583,20,2021-11-28 14:45:00,1.058468,2021-11-28 14:45:00,13.36
9,1.388889,12,2021-11-28 14:45:00,0.4,2021-11-28 14:45:00,9.02
10,1.135878,15,2021-11-28 18:45:00,0.670492,2021-11-28 18:45:00,13.95


# Use-case : 4 days ago (24.11.2021)
As we did not have data for the heart rate then, we will use the data we have from today, and pretend it was taken on that day. As this is merely an example, we will look at it during a day, but the analysis can easily be repeated fro a week, or larger period.

##### Get key for day we want

In [98]:
def get_key(day, month, year):
    for key in cal_day_keys:
        if key.start_time.month == day and key.start_time.day == month and key.start_time.year == year:
            cal_key = key
    return cal_key

In [99]:
day = 5
month = 12
year = 2021

day_key = get_key(day, month, year)

##### Filter calendar data to keep only data for a day

In [100]:
df_filtered_dates = cal_by_day[day_key]
# Create mask for bars in plotting
df_filtered_dates['plot_dummies'] = np.ones(len(df_filtered_dates)).astype(int)*df_heart_rate_json['heart_rate_max'].max()
#df_filtered_dates.at[893,'Title']= "Repas avec Flo à Morges" 

df_filtered_dates['bar_width'] = df_filtered_dates['end_time'] - df_filtered_dates['start_time']
df_filtered_dates['bar_width'] = df_filtered_dates['bar_width'].apply(lambda t : t.seconds//3600 * 0.0415)

In [101]:
df_filtered_dates.head()

Unnamed: 0,Title,start_time,end_time,plot_dummies,bar_width
496,Midi avec Julia,2021-05-12 12:00:00,2021-05-12 14:00:00,167,0.083
497,Appel Pidoux,2021-05-12 15:00:00,2021-05-12 16:00:00,167,0.0415


##### Prepare weather data for plotting

In [None]:
# Weather data
actual_temp = []
feels_like_temp = []
weather = []

for i in range(len(weather_four_days_ago)):
    actual_temp.append(weather_four_days_ago[i].temperature('celsius')['temp'])
    feels_like_temp.append(weather_four_days_ago[i].temperature('celsius')['feels_like'])
    weather.append(weather_four_days_ago[i].detailed_status)

In [None]:
data_range = pd.date_range(start=datetime(year_to_keep, month_to_keep, day_to_keep, 0, 0, 0), periods=24, freq="60 min")

# Create datafram
weather_data = [actual_temp, feels_like_temp, weather]
df_weather = pd.DataFrame(np.vstack(weather_data))
df_weather = df_weather.T
df_weather = df_weather.rename(columns={0:'actual_temp', 1:'feels_like_temp', 2: 'weather'})
df_weather['time'] = data_range

df_weather['actual_temp'] = df_weather['actual_temp'].astype(float)
df_weather['feels_like_temp'] = df_weather['feels_like_temp'].astype(float)

In [None]:
df_weather.head(50)

##### Change day (cheating on day of heart rate measurement)

In [None]:
offset = date.today().day - day_to_keep - 2
df_heart_rate_json['start_time'] = df_heart_rate_json['start_time'].apply(lambda t : t.replace(day=day_to_keep, month=month_to_keep, year=year_to_keep))
df_heart_rate_json['end_time'] = df_heart_rate_json['end_time'].apply(lambda t : t.replace(day=day_to_keep, month=month_to_keep, year=year_to_keep))


In [None]:
df_heart_rate_json.head()

##### Prepare step count data

# Plotting all together

In [None]:
import matplotlib.dates as mdates
fig, ax = plt.subplots(figsize=(20, 10))

# Plot Hear rate data
hr = ax.plot(df_heart_rate_json['start_time'],
        df_heart_rate_json['heart_rate'],
        color='purple', label='Heart Rate')

#accel = ax.plot(df_accel['time'], df_accel['Vector Magnitude'])


# Set bar plots style for events
cal = ax.bar(df_filtered_dates['start_time'],
        df_filtered_dates['plot_dummies'],
        color='green', width=df_filtered_dates['bar_width'], alpha=0.2, label='Calendar events', align='edge')
ax.bar_label(cal, labels=df_filtered_dates['Title'], label_type='center', rotation=90, weight='bold', fontsize=20)

# Format dates as hours:minutes
xformatter = mdates.DateFormatter('%H:%M')
plt.gcf().axes[0].xaxis.set_major_formatter(xformatter)


# Set title and labels for axes
ax.set_xlabel("Time of the day", fontsize=20)

ax.set_ylabel("Heart Rate", color="purple", fontsize=20)

# Create format for plotting temperature
#ax2=ax.twinx()

#Remove the night for purposes of plotting
df_weather_loc = df_weather.copy()
df_weather_loc = df_weather_loc.drop(range(13))

#ax2.plot(df_weather_loc['time'], df_weather_loc['actual_temp'], color='red', label='Actual Temperature')
#ax2.set_ylabel("Temperature (celsisus)")

# Ading legends
#ax.legend(bbox_to_anchor=(0.985, 1), prop={"size":20})


ax2=ax.twinx()
ax2.plot(df_accel['time'], df_accel['Vector Magnitude'], label="Accelerometer")

#ax2.legend("Accelerometer", prop={"size":20})
#ax.legend("Hear rate", prop={"size":20})
ax.set_ylabel("Heart Rate", color="purple", fontsize=20)
ax2.set_ylabel("Accelerometer", color="blue", fontsize=20)

ax.tick_params(axis='both', which='major', labelsize=20)
ax.tick_params(axis='both', which='minor', labelsize=8)

ax2.tick_params(axis='both', which='major', labelsize=20)
ax2.tick_params(axis='both', which='minor', labelsize=8)

# Ading legends
# bbox_to_anchor=(0.985, 1), 
ax.legend(prop={"size":20})
# bbox_to_anchor=(0.999, 0.93),
ax2.legend(bbox_to_anchor=(0.9835, 0.88), prop={"size":20})

#sns.set()
plt.savefig("images/cal_events_correlation.png", transparent=True)

In [None]:
import matplotlib.dates as mdates
fig, ax = plt.subplots(figsize=(20, 10))

#Remove the night for purposes of plotting
df_weather_loc = df_weather.copy()
df_weather_loc = df_weather_loc.drop(range(13))

# Set bar plots style for events
weather = ax.bar(df_weather_loc['time'],
        df_weather_loc['actual_temp'],
        color='orange', width=0.03, alpha=0.2, label='Weather status')
ax.bar_label(weather, labels=df_weather_loc['weather'], label_type='center', rotation=90, weight='bold', fontsize=20)

# Format dates as hours:minutes
xformatter = mdates.DateFormatter('%H:%M')
plt.gcf().axes[0].xaxis.set_major_formatter(xformatter)


# Set title and labels for axes
ax.set_xlabel("Time of the day", fontsize=20)
ax.set_ylabel("Temperature (celsius)", color="orange", fontsize=20)
# Create format for plotting temperature
ax2=ax.twinx()


# Plot Hear rate data
ax2.plot(df_heart_rate_json['start_time'],
        df_heart_rate_json['heart_rate'],
        color='purple', label='Heart Rate')
#ax2.plot(df_weather_loc['time'], df_weather_loc['actual_temp'], color='red', label='Actual Temperature')
ax2.set_ylabel("Heart rate", color="purple", fontsize=20)

# Ading legends
# bbox_to_anchor=(0.985, 1), 
ax.legend(prop={"size":20})
# bbox_to_anchor=(0.999, 0.93),
ax2.legend(bbox_to_anchor=(0.999, 0.93), prop={"size":20})

ax.tick_params(axis='both', which='major', labelsize=20)
ax.tick_params(axis='both', which='minor', labelsize=8)

ax2.tick_params(axis='both', which='major', labelsize=20)
ax2.tick_params(axis='both', which='minor', labelsize=8)

# Display plot
plt.savefig("images/weather_events_correlation.png", transparent=True)
plt.show()
