# Calculate average TEE per participant
In this script, previously processed TEE data is summarized. Our aim is to create a representative 24 hour TEE time series by averaging daily TEE data for each participant.

++++++++++++++++++

Brendan Croom<br>
b.p.croom@gmail.com

Created: 22 JAN 2020<br>

In [1]:
import os
import numpy as np
import pandas as pd
from scipy import interpolate, optimize
from matplotlib import pyplot as plt
import datetime

# Note... also need to have XlsxWriter installed in background. Refer
# to https://xlsxwriter.readthedocs.io/index.html

Auxilliary function to make a list of 10 second epochs over a 24 hour period

In [2]:
def make_time_list():
    return np.arange(0, 24*60*60, 10).astype('int')

Iterate over all previously processed files:

In [3]:
# Detect a list of files:
src_folder = 'Processed_HR_data'
xlsx_list = [f for f in os.listdir(src_folder) if f.endswith('.xlsx')]

# Iterate over each files.
for file in xlsx_list:
    print(file)
    
    # Read the file, which creates a dictionary of Pandas DataFrames corresponding to each sheet
    data = pd.read_excel(os.path.join(src_folder, file), sheet_name=None)

    # Create a list of all possible epochs
    time_list = make_time_list()  # all times in a day
    
    # Initialize a container to store TEE values associated with each unique epoch in "time_list"
    tee_vs_time_dict = dict()

    # Iterate over each day of data. Note that we also plot the calculated TEE values using Matplotlib
    for i, key in enumerate(data.keys()):
        df = data[key]

        # Add the times to the dictionary
        for (time, tee) in df.loc[df.HR_corrected > 0, ['Time_int', 'TEE_hiiloskorpy']].values:
            time = int(time)
            if time not in tee_vs_time_dict.keys():
                tee_vs_time_dict[time] = []
            tee_vs_time_dict[time].append(tee)
        
        # add to plot
        plt.scatter(*df.loc[df.HR_corrected > 0, ['Time_int', 'TEE_hiiloskorpy']].values.T, c='b', s=3, lw=0, alpha=0.5)


    # Now, post-process the times. Compute the average for each epoch
    tee_avg = np.zeros(time_list.shape) + df['TEE_hiiloskorpy'].min()  # the BMR
    for i, time in enumerate(time_list):
        if int(time) in tee_vs_time_dict:
            tee_avg[i] = np.average(tee_vs_time_dict[int(time)])

    # Format and save the plot:
    plt.plot(time_list, tee_avg, c='r')
    plt.title('{} TEE data'.format(file))
    plt.xlabel('Time_int (s)'); plt.ylabel('TEE per 10 second epoch')
    plt.savefig(
        os.path.join('day_averaged_{}'.format(src_folder), 
                     'tee_{}.png'.format(file))
        )
    plt.close()
    
    # Save the data to a new Excel file:
    summary_df = pd.DataFrame(
        columns=['Time_int', 'TEE_hiiloskorpy'],
        data=np.vstack((time_list, tee_avg)).T)
    summary_df.to_excel(
        os.path.join('day_averaged_{}'.format(src_folder), file)
        )