In [7]:
import pandas as pd
import numpy as np
import math

from scipy.optimize import least_squares

In [3]:
df = pd.read_csv('data/strava_test_activities.csv', index_col='Unnamed: 0')
df

Unnamed: 0,name,distance,moving_time,id,start_date_local,average_speed,max_speed,average_heartrate,max_heartrate,relative_effort
0,1km_190bpm,1000.0,300,8690021393,2022-03-05T13:00:00Z,3.333,3.333,190.0,190.0,21.0
1,1km_180bpm,1000.0,300,8690021397,2022-03-04T13:00:00Z,3.333,3.333,180.0,180.0,19.0
2,1km_170bpm,1000.0,300,8690021382,2022-03-03T13:00:00Z,3.333,3.333,170.0,170.0,14.0
3,1km_160bpm,1000.0,300,8690021385,2022-03-02T13:00:00Z,3.333,3.333,160.0,160.0,9.0
4,1km_150bpm,1000.0,300,8690021381,2022-03-01T13:00:00Z,3.333,3.333,150.0,150.0,7.0
...,...,...,...,...,...,...,...,...,...,...
84,5kms_118bpm,5000.0,1800,8683721172,2021-01-05T13:00:00Z,2.778,2.778,118.0,118.0,6.0
85,30kms_113bpm,30000.0,10800,8683721180,2021-01-04T13:00:00Z,2.778,2.778,113.0,113.0,34.0
86,20kms_113bpm,20000.0,7200,8683721170,2021-01-03T13:00:00Z,2.778,2.778,113.0,113.0,22.0
87,10kms_113bpm,10000.0,3600,8683721173,2021-01-02T13:00:00Z,2.778,2.778,113.0,113.0,11.0


In [4]:
max_HR = 193
ftp = 200

In [31]:
def calculate_hr_zones_strava(hrmax):
    """
    Calculates heart rate zones using the percentage of HRmax method based on Strava findings

    Args:
        hrmax (int): Maximum heart rate.

    Returns:
        pandas.DataFrame: Heart rate zones, including zone number, zone name,
                           and heart rate range for each zone.
    """

    zone_ranges = [0.50, 0.585, 0.7772, 0.87, 0.9689]
    zone_names = ['Endurance', 'Moderate', 'Tempo', 'Threshold', 'Anaerobic']
    hr_zones = []
    for i, zone_range in enumerate(zone_ranges):
        lower_range = zone_ranges[i] * hrmax
        upper_range = zone_ranges[i+1] * hrmax if i+1 < len(zone_ranges) else float("inf")
        if i == 0:
            hr_range = f'< {upper_range:.0f}'
        elif i == 4:
            hr_range = f'>{lower_range:.0f}'
        else:
            hr_range = f'{lower_range:.0f} - {upper_range:.0f}'
        hr_zones.append((i+1, zone_names[i], round(lower_range,0), round(upper_range,0), hr_range))
    zones_df = pd.DataFrame(hr_zones, columns=['Zone', 'Name', 'Lower Bound', 'Upper Bound', 'Range'])

    return zones_df

In [32]:
hr_data = calculate_hr_zones_strava(193)
hr_data

Unnamed: 0,Zone,Name,Lower Bound,Upper Bound,Range
0,1,Endurance,96.0,113.0,< 113
1,2,Moderate,113.0,150.0,113 - 150
2,3,Tempo,150.0,168.0,150 - 168
3,4,Threshold,168.0,187.0,168 - 187
4,5,Anaerobic,187.0,inf,>187


In [33]:
def calculate_time_in_zones_strava_test(df, hr_data):
    """
    Calculate the time spent in each heart rate zone for each activity and add the results as columns.
    
    Args:
    - df (pandas.DataFrame): A DataFrame containing information about each activity.
    - hr_data (pandas.DataFrame): A DataFrame containing the heart rate zones and corresponding upper and lower bounds.
    
    Returns:
    - df (pandas.DataFrame): The original DataFrame with new columns for the time spent in each heart rate zone.
    """
        
    # Loop through the rows in df and calculate time spent in each zone for the corresponding activity
    for index, row in df.iterrows():    
    # Extract activity id from the file name column
        activity_num = row['name']
        # Load the activity data
        activity_file = f"data/strava_test_csv/{activity_num}.csv"
        csv_data = pd.read_csv(activity_file)

        # Calculate the time spent in each zone
        for index_2, row_2 in csv_data.iterrows():
            if row_2["heart_rate_bpm"] < hr_data["Upper Bound"].iloc[0]:
                csv_data.loc[index_2, "Zone"] = hr_data["Zone"].iloc[0]
            elif hr_data["Lower Bound"].iloc[1] <= row_2["heart_rate_bpm"] < hr_data["Upper Bound"].iloc[1]:
                csv_data.loc[index_2, "Zone"] = hr_data["Zone"].iloc[1]
            elif hr_data["Lower Bound"].iloc[2] <= row_2["heart_rate_bpm"] < hr_data["Upper Bound"].iloc[2]:
                csv_data.loc[index_2, "Zone"] = hr_data["Zone"].iloc[2]
            elif hr_data["Lower Bound"].iloc[3] <= row_2["heart_rate_bpm"] < hr_data["Upper Bound"].iloc[3]:
                csv_data.loc[index_2, "Zone"] = hr_data["Zone"].iloc[3]
            elif row_2["heart_rate_bpm"] > hr_data["Lower Bound"].iloc[4]:
                csv_data.loc[index_2, "Zone"] = hr_data["Zone"].iloc[4]

        # Group by zone and calculate the percentage of time spent in each zone
        csv_grouped = csv_data.groupby("Zone")["time"].count().reset_index()
        csv_grouped = csv_grouped.rename(columns={"time": "Count"})
        csv_grouped['perc'] = round(csv_grouped['Count'] / len(csv_data), 2)

        # Add new columns with time spent in each zone to df
        try:
            df.loc[index, "time_z1"] = csv_grouped[csv_grouped["Zone"] == 1]["perc"].values[0]*df.loc[index,'moving_time']/60
        except IndexError:
            df.loc[index, "time_z1"] = 0
        try:
            df.loc[index, "time_z2"] = csv_grouped[csv_grouped["Zone"] == 2]["perc"].values[0]*df.loc[index,'moving_time']/60
        except IndexError:
            df.loc[index, "time_z2"] = 0
        try:
            df.loc[index, "time_z3"] = csv_grouped[csv_grouped["Zone"] == 3]["perc"].values[0]*df.loc[index,'moving_time']/60
        except IndexError:
            df.loc[index, "time_z3"] = 0
        try:
            df.loc[index, "time_z4"] = csv_grouped[csv_grouped["Zone"] == 4]["perc"].values[0]*df.loc[index,'moving_time']/60
        except IndexError:
            df.loc[index, "time_z4"] = 0
        try:
            df.loc[index, "time_z5"] = csv_grouped[csv_grouped["Zone"] == 5]["perc"].values[0]*df.loc[index,'moving_time']/60
        except IndexError:
            df.loc[index, "time_z5"] = 0
    
    return df


In [34]:
activities = calculate_time_in_zones_strava_test(df, hr_data)
activities

Unnamed: 0,name,distance,moving_time,id,start_date_local,average_speed,max_speed,average_heartrate,max_heartrate,relative_effort,time_z1,time_z2,time_z3,time_z4,time_z5,time_z6,time_z7
0,1km_190bpm,1000.0,300,8690021393,2022-03-05T13:00:00Z,3.333,3.333,190.0,190.0,21.0,0.0,0.0,0.0,0.0,5.0,0.0,0.0
1,1km_180bpm,1000.0,300,8690021397,2022-03-04T13:00:00Z,3.333,3.333,180.0,180.0,19.0,0.0,0.0,0.0,5.0,0.0,0.0,0.0
2,1km_170bpm,1000.0,300,8690021382,2022-03-03T13:00:00Z,3.333,3.333,170.0,170.0,14.0,0.0,0.0,0.0,5.0,0.0,0.0,0.0
3,1km_160bpm,1000.0,300,8690021385,2022-03-02T13:00:00Z,3.333,3.333,160.0,160.0,9.0,0.0,0.0,5.0,0.0,0.0,0.0,0.0
4,1km_150bpm,1000.0,300,8690021381,2022-03-01T13:00:00Z,3.333,3.333,150.0,150.0,7.0,0.0,0.0,5.0,0.0,0.0,0.0,0.0
...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...
84,5kms_118bpm,5000.0,1800,8683721172,2021-01-05T13:00:00Z,2.778,2.778,118.0,118.0,6.0,0.0,30.0,0.0,0.0,0.0,0.0,0.0
85,30kms_113bpm,30000.0,10800,8683721180,2021-01-04T13:00:00Z,2.778,2.778,113.0,113.0,34.0,0.0,180.0,0.0,0.0,0.0,0.0,0.0
86,20kms_113bpm,20000.0,7200,8683721170,2021-01-03T13:00:00Z,2.778,2.778,113.0,113.0,22.0,0.0,120.0,0.0,0.0,0.0,0.0,0.0
87,10kms_113bpm,10000.0,3600,8683721173,2021-01-02T13:00:00Z,2.778,2.778,113.0,113.0,11.0,0.0,60.0,0.0,0.0,0.0,0.0,0.0


In [35]:
activities = activities[['moving_time', 'distance', 'average_heartrate', 'relative_effort', 'time_z1',	'time_z2',	'time_z3',	'time_z4',	'time_z5']]
activities.head()

Unnamed: 0,moving_time,distance,average_heartrate,relative_effort,time_z1,time_z2,time_z3,time_z4,time_z5
0,300,1000.0,190.0,21.0,0.0,0.0,0.0,0.0,5.0
1,300,1000.0,180.0,19.0,0.0,0.0,0.0,5.0,0.0
2,300,1000.0,170.0,14.0,0.0,0.0,0.0,5.0,0.0
3,300,1000.0,160.0,9.0,0.0,0.0,5.0,0.0,0.0
4,300,1000.0,150.0,7.0,0.0,0.0,5.0,0.0,0.0


In [59]:
act = activities.copy()
act['HR_ratio'] = act['average_heartrate']/193
act

Unnamed: 0,moving_time,distance,average_heartrate,relative_effort,time_z1,time_z2,time_z3,time_z4,time_z5,HR_ratio
0,300,1000.0,190.0,21.0,0.0,0.0,0.0,0.0,5.0,0.984456
1,300,1000.0,180.0,19.0,0.0,0.0,0.0,5.0,0.0,0.932642
2,300,1000.0,170.0,14.0,0.0,0.0,0.0,5.0,0.0,0.880829
3,300,1000.0,160.0,9.0,0.0,0.0,5.0,0.0,0.0,0.829016
4,300,1000.0,150.0,7.0,0.0,0.0,5.0,0.0,0.0,0.777202
...,...,...,...,...,...,...,...,...,...,...
84,1800,5000.0,118.0,6.0,0.0,30.0,0.0,0.0,0.0,0.611399
85,10800,30000.0,113.0,34.0,0.0,180.0,0.0,0.0,0.0,0.585492
86,7200,20000.0,113.0,22.0,0.0,120.0,0.0,0.0,0.0,0.585492
87,3600,10000.0,113.0,11.0,0.0,60.0,0.0,0.0,0.0,0.585492


In [37]:
activities_bis = activities.copy()
activities_bis['HR_ratio'] = activities_bis['average_heartrate']/193
activities_bis['HR_time'] = activities_bis['HR_ratio']*(activities_bis['time_z1']+activities_bis['time_z2']+activities_bis['time_z3']+activities_bis['time_z4']+activities_bis['time_z5'])
activities_bis['weight'] = 0.64*np.exp(1.92*activities_bis['HR_ratio'])
activities_bis['HR_time_weight'] = activities_bis['HR_time']*activities_bis['weight']
activities_bis

Unnamed: 0,moving_time,distance,average_heartrate,relative_effort,time_z1,time_z2,time_z3,time_z4,time_z5,HR_ratio,HR_time,weight,HR_time_weight
0,300,1000.0,190.0,21.0,0.0,0.0,0.0,0.0,5.0,0.984456,4.922280,4.237055,20.855968
1,300,1000.0,180.0,19.0,0.0,0.0,0.0,5.0,0.0,0.932642,4.663212,3.835832,17.887301
2,300,1000.0,170.0,14.0,0.0,0.0,0.0,5.0,0.0,0.880829,4.404145,3.472604,15.293850
3,300,1000.0,160.0,9.0,0.0,0.0,5.0,0.0,0.0,0.829016,4.145078,3.143770,13.031171
4,300,1000.0,150.0,7.0,0.0,0.0,5.0,0.0,0.0,0.777202,3.886010,2.846075,11.059877
...,...,...,...,...,...,...,...,...,...,...,...,...,...
84,1800,5000.0,118.0,6.0,0.0,30.0,0.0,0.0,0.0,0.611399,18.341969,2.070104,37.969785
85,10800,30000.0,113.0,34.0,0.0,180.0,0.0,0.0,0.0,0.585492,105.388601,1.969654,207.579097
86,7200,20000.0,113.0,22.0,0.0,120.0,0.0,0.0,0.0,0.585492,70.259067,1.969654,138.386064
87,3600,10000.0,113.0,11.0,0.0,60.0,0.0,0.0,0.0,0.585492,35.129534,1.969654,69.193032


In [38]:
# Calculate the time spent in each Upper Bound
for index, row in activities_bis.iterrows():
    if row["average_heartrate"] < hr_data["Upper Bound"].iloc[0]:
        activities_bis.loc[index, "Upper Bound"] = hr_data["Upper Bound"].iloc[0]
        activities_bis.loc[index, "Zone"] = hr_data["Zone"].iloc[0]
    elif hr_data["Lower Bound"].iloc[1] <= row["average_heartrate"] < hr_data["Upper Bound"].iloc[1]:
        activities_bis.loc[index, "Upper Bound"] = hr_data["Upper Bound"].iloc[1]
        activities_bis.loc[index, "Zone"] = hr_data["Zone"].iloc[1]
    elif hr_data["Lower Bound"].iloc[2] <= row["average_heartrate"] < hr_data["Upper Bound"].iloc[2]:
        activities_bis.loc[index, "Upper Bound"] = hr_data["Upper Bound"].iloc[2]
        activities_bis.loc[index, "Zone"] = hr_data["Zone"].iloc[2]
    elif hr_data["Lower Bound"].iloc[3] <= row["average_heartrate"] < hr_data["Upper Bound"].iloc[3]:
        activities_bis.loc[index, "Upper Bound"] = hr_data["Upper Bound"].iloc[3]
        activities_bis.loc[index, "Zone"] = hr_data["Zone"].iloc[3]
    elif row["average_heartrate"] > hr_data["Lower Bound"].iloc[4]:
        activities_bis.loc[index, "Upper Bound"] = hr_data["Upper Bound"].iloc[4]
        activities_bis.loc[index, "Zone"] = hr_data["Zone"].iloc[4]

In [39]:
activities_bis

Unnamed: 0,moving_time,distance,average_heartrate,relative_effort,time_z1,time_z2,time_z3,time_z4,time_z5,HR_ratio,HR_time,weight,HR_time_weight,Upper Bound,Zone
0,300,1000.0,190.0,21.0,0.0,0.0,0.0,0.0,5.0,0.984456,4.922280,4.237055,20.855968,inf,5.0
1,300,1000.0,180.0,19.0,0.0,0.0,0.0,5.0,0.0,0.932642,4.663212,3.835832,17.887301,187.0,4.0
2,300,1000.0,170.0,14.0,0.0,0.0,0.0,5.0,0.0,0.880829,4.404145,3.472604,15.293850,187.0,4.0
3,300,1000.0,160.0,9.0,0.0,0.0,5.0,0.0,0.0,0.829016,4.145078,3.143770,13.031171,168.0,3.0
4,300,1000.0,150.0,7.0,0.0,0.0,5.0,0.0,0.0,0.777202,3.886010,2.846075,11.059877,168.0,3.0
...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...
84,1800,5000.0,118.0,6.0,0.0,30.0,0.0,0.0,0.0,0.611399,18.341969,2.070104,37.969785,150.0,2.0
85,10800,30000.0,113.0,34.0,0.0,180.0,0.0,0.0,0.0,0.585492,105.388601,1.969654,207.579097,150.0,2.0
86,7200,20000.0,113.0,22.0,0.0,120.0,0.0,0.0,0.0,0.585492,70.259067,1.969654,138.386064,150.0,2.0
87,3600,10000.0,113.0,11.0,0.0,60.0,0.0,0.0,0.0,0.585492,35.129534,1.969654,69.193032,150.0,2.0


In [40]:
activities_z1 = activities_bis[activities_bis['Zone'] == 1]
activities_z2 = activities_bis[activities_bis['Zone'] == 2]
activities_z3 = activities_bis[activities_bis['Zone'] == 3]
activities_z4 = activities_bis[activities_bis['Zone'] == 4]
activities_z5 = activities_bis[activities_bis['Zone'] == 5]

In [49]:
activities_z1

Unnamed: 0,moving_time,distance,average_heartrate,relative_effort,time_z1,time_z2,time_z3,time_z4,time_z5,HR_ratio,HR_time,weight,HR_time_weight,Upper Bound,Zone
8,300,1000.0,110.0,0.0,5.0,0.0,0.0,0.0,0.0,0.569948,2.849741,1.911739,5.447962,113.0,1.0
31,10800,30000.0,110.0,32.0,180.0,0.0,0.0,0.0,0.0,0.569948,102.590674,1.911739,196.126615,113.0,1.0
32,7200,20000.0,110.0,21.0,120.0,0.0,0.0,0.0,0.0,0.569948,68.393782,1.911739,130.751076,113.0,1.0
33,3600,10000.0,110.0,10.0,60.0,0.0,0.0,0.0,0.0,0.569948,34.196891,1.911739,65.375538,113.0,1.0
34,1800,5000.0,110.0,5.0,30.0,0.0,0.0,0.0,0.0,0.569948,17.098446,1.911739,32.687769,113.0,1.0
64,10800,30000.0,110.0,32.0,180.0,0.0,0.0,0.0,0.0,0.569948,102.590674,1.911739,196.126615,113.0,1.0
65,7200,20000.0,110.0,21.0,120.0,0.0,0.0,0.0,0.0,0.569948,68.393782,1.911739,130.751076,113.0,1.0
66,3600,10000.0,110.0,10.0,60.0,0.0,0.0,0.0,0.0,0.569948,34.196891,1.911739,65.375538,113.0,1.0
67,1800,5000.0,110.0,5.0,30.0,0.0,0.0,0.0,0.0,0.569948,17.098446,1.911739,32.687769,113.0,1.0
68,10800,30000.0,105.0,29.0,180.0,0.0,0.0,0.0,0.0,0.544041,97.927461,1.818974,178.127484,113.0,1.0


In [51]:

from scipy.optimize import least_squares

# Zone 1
# Define the TRIMP function
def trimp(heart_rate, bound, HR_time_weight, a):
    return HR_time_weight*heart_rate/bound*a

# Define the error function to minimize
def residuals(params):
    a = params
    y_pred = trimp(heart_rate, bound, HR_time_weight, a)
    return y_pred - effort_score

# Define the observed data
duration = np.array(activities_z1['moving_time'])
heart_rate = np.array(activities_z1['average_heartrate'])
HR_time_weight = np.array(activities_z1['HR_time_weight'])
zone = np.array(activities_z1['Zone'])
effort_score = np.array(activities_z1['relative_effort'])
bound = np.array(activities_z1['Upper Bound'])

# Perform the optimization
result = least_squares(residuals, x0=[1/6])
a = result.x

print(f"a={a}")


a=[0.16986715]


In [55]:
activities_z1['new_RE'] = round(activities_z1['HR_time_weight']*a,0)
activities_z1

A value is trying to be set on a copy of a slice from a DataFrame.
Try using .loc[row_indexer,col_indexer] = value instead

See the caveats in the documentation: https://pandas.pydata.org/pandas-docs/stable/user_guide/indexing.html#returning-a-view-versus-a-copy
  activities_z1['new_RE'] = round(activities_z1['HR_time_weight']*a,0)


Unnamed: 0,moving_time,distance,average_heartrate,relative_effort,time_z1,time_z2,time_z3,time_z4,time_z5,HR_ratio,HR_time,weight,HR_time_weight,Upper Bound,Zone,new_RE
8,300,1000.0,110.0,0.0,5.0,0.0,0.0,0.0,0.0,0.569948,2.849741,1.911739,5.447962,113.0,1.0,1.0
31,10800,30000.0,110.0,32.0,180.0,0.0,0.0,0.0,0.0,0.569948,102.590674,1.911739,196.126615,113.0,1.0,33.0
32,7200,20000.0,110.0,21.0,120.0,0.0,0.0,0.0,0.0,0.569948,68.393782,1.911739,130.751076,113.0,1.0,22.0
33,3600,10000.0,110.0,10.0,60.0,0.0,0.0,0.0,0.0,0.569948,34.196891,1.911739,65.375538,113.0,1.0,11.0
34,1800,5000.0,110.0,5.0,30.0,0.0,0.0,0.0,0.0,0.569948,17.098446,1.911739,32.687769,113.0,1.0,6.0
64,10800,30000.0,110.0,32.0,180.0,0.0,0.0,0.0,0.0,0.569948,102.590674,1.911739,196.126615,113.0,1.0,33.0
65,7200,20000.0,110.0,21.0,120.0,0.0,0.0,0.0,0.0,0.569948,68.393782,1.911739,130.751076,113.0,1.0,22.0
66,3600,10000.0,110.0,10.0,60.0,0.0,0.0,0.0,0.0,0.569948,34.196891,1.911739,65.375538,113.0,1.0,11.0
67,1800,5000.0,110.0,5.0,30.0,0.0,0.0,0.0,0.0,0.569948,17.098446,1.911739,32.687769,113.0,1.0,6.0
68,10800,30000.0,105.0,29.0,180.0,0.0,0.0,0.0,0.0,0.544041,97.927461,1.818974,178.127484,113.0,1.0,30.0


### Zone 2

In [56]:
# Zone 2
# Define the TRIMP function
def trimp(heart_rate, bound, HR_time_weight, a):
    return HR_time_weight*heart_rate/bound*a

# Define the error function to minimize
def residuals(params):
    a = params
    y_pred = trimp(heart_rate, bound, HR_time_weight, a)
    return y_pred - effort_score

# Define the observed data
duration = np.array(activities_z2['moving_time'])
heart_rate = np.array(activities_z2['average_heartrate'])
HR_time_weight = np.array(activities_z2['HR_time_weight'])
zone = np.array(activities_z2['Zone'])
effort_score = np.array(activities_z2['relative_effort'])
bound = np.array(activities_z2['Upper Bound'])

# Perform the optimization
result = least_squares(residuals, x0=[1/6])
a = result.x

print(f"a={a}")

a=[0.30070455]


## Laurent

In [61]:
la = pd.read_csv('data/re.csv', index_col='Unnamed: 0')
la

Unnamed: 0,Date,Time,Nom du fichier,Durée de déplacement,Distance,Fréquence cardiaque moyenne,Fréquence cardiaque maximum,Vitesse moyenne,Cadence moyenne,Puissance moyenne,...,Puissance moyenne pondérée,intensity_score,time_z1,time_z2,time_z3,time_z4,time_z5,TRIMP,TRIMP_score,Relative Effort
663,2017-09-03,11:08:17,activities/1272038048.fit.gz,67.53,39.50,138.995026,174.0,,90.237099,266.073975,...,310.0,86.11,0.01,0.05,0.07,0.04,0.0,48.0,10.13,11.401315
689,2017-10-26,14:33:17,activities/1354264574.fit.gz,59.27,29.44,140.061310,179.0,,81.347870,258.270538,...,310.0,86.11,0.05,0.66,0.14,0.13,0.0,231.0,48.73,48.137118
690,2017-10-28,12:14:35,activities/1357422018.fit.gz,117.05,51.06,101.433411,172.0,,82.146042,131.771713,...,211.0,58.61,0.75,0.21,0.01,0.01,0.0,124.0,26.16,51.033800
691,2017-10-29,07:12:07,activities/1358014051.fit.gz,101.75,40.97,125.681000,182.0,,81.164803,213.772003,...,294.0,81.67,0.35,0.41,0.04,0.18,0.0,201.0,42.41,71.920292
692,2017-10-30,15:51:33,activities/1360150637.fit.gz,35.22,19.09,123.212326,155.0,,91.744896,206.869675,...,222.0,61.67,0.23,0.65,0.08,0.00,0.0,177.0,37.34,21.918580
...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...
2388,2023-02-15,09:29:58,activities/9194335010.fit.gz,25.45,18.11,155.882782,,42.70,94.063995,375.270477,...,400.0,111.11,0.07,0.06,0.50,0.30,0.0,289.0,60.97,25.861442
2389,2023-02-16,10:59:50,activities/9201718878.fit.gz,218.73,139.03,147.699341,,38.14,83.431664,290.123718,...,335.0,93.06,0.03,0.40,0.39,0.13,0.0,252.0,53.16,193.794780
2390,2023-02-16,10:31:27,activities/9201719852.fit.gz,8.97,4.04,107.026070,,27.03,69.140045,168.564240,...,204.0,56.67,0.59,0.38,0.00,0.00,0.0,135.0,28.48,4.257760
2391,2023-02-17,09:39:32,activities/9206565665.fit.gz,143.42,73.94,105.816284,,30.93,75.174011,185.966919,...,229.0,63.61,0.56,0.41,0.01,0.00,0.0,141.0,29.75,71.112417


In [None]:
zone_data = calculate_time_in_zones(data_vo2_fit, hr_data)
zone_data.to_csv("data/zone_data.csv")

zone_data = pd.read_csv("data/zone_data.csv", index_col="Unnamed: 0")
zone_data

In [22]:
la = pd.read_csv("data/zone_data_subdiv.csv", index_col="Unnamed: 0")
la

Unnamed: 0,Date,Time,Nom du fichier,Durée de déplacement,Distance,Fréquence cardiaque moyenne,Fréquence cardiaque maximum,Vitesse moyenne,Cadence moyenne,Puissance moyenne,...,time_z3,avgHR_z3,time_z4,avgHR_z4,time_z5,avgHR_z5,time_z6,avgHR_z6,time_z7,avgHR_z7
46,2014-04-15,14:35:30,activities/142879521.tcx.gz,112.52,47.67,127.316002,176.0,,73.387299,219.221359,...,0.14,137.346320,0.19,158.075235,0.08,168.404255,0.00,176.000000,0.0,0.0
47,2014-04-17,14:29:30,activities/143568422.tcx.gz,73.92,33.36,130.358002,182.0,,79.790298,241.315857,...,0.14,137.418301,0.11,156.915254,0.10,170.902655,0.17,178.321053,0.0,0.0
48,2014-04-19,10:56:24,activities/144313052.tcx.gz,217.67,85.62,139.462006,183.0,,72.434898,234.804291,...,0.12,137.136951,0.12,157.485488,0.11,170.592287,0.13,178.019231,0.0,0.0
49,2014-04-21,14:27:58,activities/145168490.tcx.gz,50.32,25.62,111.242996,144.0,,96.695900,184.828018,...,0.27,134.222772,0.00,0.000000,0.00,0.000000,0.00,0.000000,0.0,0.0
50,2014-04-22,12:44:13,activities/145556830.tcx.gz,202.33,88.02,143.753006,174.0,,76.811096,240.926056,...,0.31,137.573876,0.28,159.043943,0.11,167.412121,0.00,0.000000,0.0,0.0
...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...
2388,2023-02-15,09:29:58,activities/9194335010.fit.gz,25.45,18.11,155.882782,,42.70,94.063995,375.270477,...,0.06,141.905882,0.50,157.554822,0.30,168.046256,0.00,0.000000,0.0,0.0
2389,2023-02-16,10:59:50,activities/9201718878.fit.gz,218.73,139.03,147.699341,,38.14,83.431664,290.123718,...,0.28,139.270525,0.39,157.007081,0.13,168.033013,0.00,180.125000,0.0,0.0
2390,2023-02-16,10:31:27,activities/9201719852.fit.gz,8.97,4.04,107.026070,,27.03,69.140045,168.564240,...,0.00,0.000000,0.00,0.000000,0.00,0.000000,0.00,0.000000,0.0,0.0
2391,2023-02-17,09:39:32,activities/9206565665.fit.gz,143.42,73.94,105.816284,,30.93,75.174011,185.966919,...,0.06,134.672968,0.01,150.555556,0.00,0.000000,0.00,0.000000,0.0,0.0


In [23]:
lab = la[['Durée de déplacement', 'Fréquence cardiaque moyenne', "Mesure d'effort", 'time_z1','avgHR_z1',	'time_z2','avgHR_z2',	'time_z3','avgHR_z3',	'time_z4','avgHR_z4',	'time_z5', 'avgHR_z5', 'time_z6', 'avgHR_z6', 'time_z7', 'avgHR_z7']]
lab = lab.dropna()
lab.columns

Index(['Durée de déplacement', 'Fréquence cardiaque moyenne',
       'Mesure d'effort', 'time_z1', 'avgHR_z1', 'time_z2', 'avgHR_z2',
       'time_z3', 'avgHR_z3', 'time_z4', 'avgHR_z4', 'time_z5', 'avgHR_z5',
       'time_z6', 'avgHR_z6', 'time_z7', 'avgHR_z7'],
      dtype='object')

In [24]:
def calculate_zone_weighted(df):
    # define the weighting factor function
    weighting_factor = lambda x: 0.64*np.exp(1.92*x/190)

    # create a list of tuples, where each tuple contains the zone time and heart rate columns
    zone_columns = [('time_z1', 'avgHR_z1'), ('time_z2', 'avgHR_z2'), ('time_z3', 'avgHR_z3'), ('time_z4', 'avgHR_z4'), ('time_z5', 'avgHR_z5'), ('time_z6', 'avgHR_z6'), ('time_z7', 'avgHR_z7')]

    # iterate over the list of tuples and create a new column for each zone
    for i, (zone_time_col, zone_hr_col) in enumerate(zone_columns):
        # create a new column name for the duration
        duration_col_name = f'duration_HR{i+1}'
        # create a new column for the duration
        df[duration_col_name] = df['Durée de déplacement'] * df[zone_time_col]
        
        # create a new column name for the weighting factor
        wfactor_col_name = f'wfactor{i+1}'
        # apply the calculation using lambda function to create the new column
        df[wfactor_col_name] = weighting_factor(df[zone_hr_col])

        # create a new column name for the weighted duration
        weighted_duration_col_name = f'weighted_duration_HR{i+1}'
        # calculate the weighted duration
        df[weighted_duration_col_name] = df[duration_col_name] * df[zone_hr_col] / 190 * df[wfactor_col_name]
    
    return df


In [25]:
calculate_zone_weighted(lab)

Unnamed: 0,Durée de déplacement,Fréquence cardiaque moyenne,Mesure d'effort,time_z1,avgHR_z1,time_z2,avgHR_z2,time_z3,avgHR_z3,time_z4,...,weighted_duration_HR4,duration_HR5,wfactor5,weighted_duration_HR5,duration_HR6,wfactor6,weighted_duration_HR6,duration_HR7,wfactor7,weighted_duration_HR7
46,112.52,127.316002,54.0,0.24,85.524510,0.28,120.224359,0.14,137.346320,0.19,...,56.235686,9.0016,3.509530,28.000654,0.0000,3.789519,0.000000,0.0,0.64,0.0
47,73.92,130.358002,55.0,0.23,60.953307,0.18,122.907216,0.14,137.418301,0.11,...,20.984271,7.3920,3.599263,23.931548,12.5664,3.879453,45.754135,0.0,0.64,0.0
48,217.67,139.462006,154.0,0.12,105.444156,0.34,120.242038,0.12,137.136951,0.12,...,68.045123,23.9437,3.587992,77.134483,28.2971,3.867638,102.541840,0.0,0.64,0.0
49,50.32,111.242996,16.0,0.28,72.198068,0.42,121.241270,0.27,134.222772,0.00,...,0.000000,0.0000,0.640000,0.000000,0.0000,0.640000,0.000000,0.0,0.64,0.0
50,202.33,143.753006,112.0,0.06,105.890173,0.16,120.735294,0.31,137.573876,0.28,...,151.408974,22.2563,3.474520,68.136704,0.0000,0.640000,0.000000,0.0,0.64,0.0
...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...
2388,25.45,155.882782,46.0,0.07,96.828571,0.00,119.000000,0.06,141.905882,0.50,...,33.187185,7.6350,3.496857,23.613597,0.0000,0.640000,0.000000,0.0,0.64,0.0
2389,218.73,147.699341,272.0,0.03,103.299742,0.11,122.030518,0.28,139.270525,0.39,...,220.480037,28.4349,3.496389,87.925017,0.0000,3.950821,0.000000,0.0,0.64,0.0
2390,8.97,107.026070,1.0,0.59,96.828746,0.37,120.956522,0.00,0.000000,0.00,...,0.000000,0.0000,0.640000,0.000000,0.0000,0.640000,0.000000,0.0,0.64,0.0
2391,143.42,105.816284,26.0,0.56,92.961594,0.34,119.229361,0.06,134.672968,0.01,...,3.330198,0.0000,0.640000,0.000000,0.0000,0.640000,0.000000,0.0,0.64,0.0


In [26]:
# Define the TRIMP function
def trimp(weighted_duration_HR1, weighted_duration_HR2, weighted_duration_HR3, weighted_duration_HR4, weighted_duration_HR5, weighted_duration_HR6, weighted_duration_HR7, a, b, c, d, e, f, g):
    z1 = (weighted_duration_HR1*a)
    z2 = (weighted_duration_HR2*b)
    z3 = (weighted_duration_HR3*c)
    z4 = (weighted_duration_HR4*d)
    z5 = (weighted_duration_HR5*e)
    z6 = (weighted_duration_HR6*f)
    z7 = (weighted_duration_HR7*g)
    return z1 + z2 + z3 + z4 + z5 + z6 + z7

# Define the error function to minimize
def residuals(params):
    a, b, c, d, e, f, g = params
    y_pred = trimp(weighted_duration_HR1, weighted_duration_HR2, weighted_duration_HR3, weighted_duration_HR4, weighted_duration_HR5, weighted_duration_HR6, weighted_duration_HR7, a, b, c, d, e, f, g)
    return y_pred - effort_score

# Define the observed data
heart_rate = np.array(lab['Fréquence cardiaque moyenne'])
effort_score = np.array(lab["Mesure d'effort"])

weighted_duration_HR1 = np.array(lab['weighted_duration_HR1'])
weighted_duration_HR2 = np.array(lab['weighted_duration_HR2'])
weighted_duration_HR3 = np.array(lab['weighted_duration_HR3'])
weighted_duration_HR4 = np.array(lab['weighted_duration_HR4'])
weighted_duration_HR5 = np.array(lab['weighted_duration_HR5'])
weighted_duration_HR6 = np.array(lab['weighted_duration_HR6'])
weighted_duration_HR7 = np.array(lab['weighted_duration_HR7'])


# Perform the optimization
result = least_squares(residuals, x0=[0.2, 0.4, 0.6, 0.7, 0.8, 1, 1.1])
a, b, c, d, e, f, g = result.x

print(f"a={a}, b={b}, c={c}, d={d}, e={e}, f={f}, g={g}")


a=0.15431269224351535, b=0.027420732193484548, c=0.38565644853265013, d=0.5323576919121238, e=0.6270385873082338, f=1.0517240453307226, g=1.0598455728195533


In [19]:
def calculate_zone_weighted(df):
    # define the weighting factor function
    #weighting_factor = lambda x: 0.64*np.exp(1.92*x/190)

    # create a list of tuples, where each tuple contains the zone time and heart rate columns
    zone_columns = [('time_z1', 'avgHR_z1'), ('time_z2', 'avgHR_z2'), ('time_z3', 'avgHR_z3'), ('time_z4', 'avgHR_z4'), ('time_z5', 'avgHR_z5'), ('time_z6', 'avgHR_z6'), ('time_z7', 'avgHR_z7')]

    # iterate over the list of tuples and create a new column for each zone
    for i, (zone_time_col, zone_hr_col) in enumerate(zone_columns):
        # create a new column name for the duration
        duration_col_name = f'duration_HR{i+1}'
        # create a new column for the duration
        df[duration_col_name] = df['Durée de déplacement'] * df[zone_time_col]
        

        # create a new column name for the weighted duration
        weighted_duration_col_name = f'weighted_duration_HR{i+1}'
        # calculate the weighted duration
        df[weighted_duration_col_name] = df[duration_col_name] * df[zone_hr_col] / 190
    
    return df

lab2 = calculate_zone_weighted(lab)

In [21]:
# Define the TRIMP function
def trimp(weighted_duration_HR1, weighted_duration_HR2, weighted_duration_HR3, weighted_duration_HR4, weighted_duration_HR5, weighted_duration_HR6, weighted_duration_HR7, a, b, c, d, e, f, g):
    z1 = (weighted_duration_HR1*0.64*np.exp(1.92*a))
    z2 = (weighted_duration_HR2*0.64*np.exp(1.92*b))
    z3 = (weighted_duration_HR3*0.64*np.exp(1.92*c))
    z4 = (weighted_duration_HR4*0.64*np.exp(1.92*d))
    z5 = (weighted_duration_HR5*0.64*np.exp(1.92*e))
    z6 = (weighted_duration_HR6*0.64*np.exp(1.92*f))
    z7 = (weighted_duration_HR7*0.64*np.exp(1.92*g))
    return z1 + z2 + z3 + z4 + z5 + z6 + z7

# Define the error function to minimize
def residuals(params):
    a, b, c, d, e, f, g = params
    y_pred = trimp(weighted_duration_HR1, weighted_duration_HR2, weighted_duration_HR3, weighted_duration_HR4, weighted_duration_HR5, weighted_duration_HR6, weighted_duration_HR7, a, b, c, d, e, f, g)
    return y_pred - effort_score

# Define the observed data
heart_rate = np.array(lab2['Fréquence cardiaque moyenne'])
effort_score = np.array(lab2["Mesure d'effort"])

weighted_duration_HR1 = np.array(lab2['weighted_duration_HR1'])
weighted_duration_HR2 = np.array(lab2['weighted_duration_HR2'])
weighted_duration_HR3 = np.array(lab2['weighted_duration_HR3'])
weighted_duration_HR4 = np.array(lab2['weighted_duration_HR4'])
weighted_duration_HR5 = np.array(lab2['weighted_duration_HR5'])
weighted_duration_HR6 = np.array(lab2['weighted_duration_HR6'])
weighted_duration_HR7 = np.array(lab2['weighted_duration_HR7'])


# Perform the optimization
result = least_squares(residuals, x0=[0.2, 0.4, 0.6, 0.7, 0.8, 1, 1.1])
a, b, c, d, e, f, g = result.x

print(f"a={a}, b={b}, c={c}, d={d}, e={e}, f={f}, g={g}")


a=-0.48521595533145745, b=-1.2288823977554657, c=0.16509363022992332, d=0.5774574932175552, e=0.7501095390443343, f=0.9820660733757447, g=1.0339472437522992
