#  Combining Garmin and Clue data.
This script uses Garmin and Clue data to visualise running performance at the same time as menstrual data. Although Garmin has it's own menstrual calendar it is not possible to download this data or to see this data alongside performance information. If you want to track your own menstrual data rather than using an app create a csv file with a column of the dates you've had your period instead.

You need three files for this script to run.
* `Activities.csv` The CSV downloaded from Garmin connect when all activities are selected.
* `Activities_running_only.csv` The CSV downloaded from Garmin connect when only running is slected. This will export more of the running statistics.
* `clue_measurements.json` This is the `measurements.json` file created when you download data from the menstrual tracking app Clue. If you aren't using clue then you will need `period_dates.csv`, enter the dates in the format `YYYY-MM-DD`.

To download from Garmin:
1. Login to Garmin Connect.
2. Sidebar select `Activities>All activities`.  
3. Select either running or all activites. You will need both.
4. Scroll down as far as the dates you want to include.
5. Click `Export csv`.
6. Move your csv file to the same place you've chosen your data (probably a directory where this script is saved.)

To download from Clue:
1. Open the Clue app
2. Go to the More Menu (the = in the top-right corner of your Cycle View)
3. Tap Settings
4. Tap Download my data
5. Tap Request data
6. A screen will appear with a unique password to download the data file - copy this. You will probably want to send this to yourself as it's likely you'll run this script on a computer rather than phone. So paste->send->copy again.
7. Open the email from Clue that was sent to your Clue email address
8. The email will include a link to download the data file, which expires after 72 hours
9. Tap Download data
10. Extract the zip to the desired location.
11. When you open the file enter the password. Save the `measurements` file as `clue_measurements`. The others can now be deleted.

In [413]:
import pandas as pd
import altair as alt
import numpy as np
import datetime as dt
from IPython.display import display, Markdown
from scipy import stats
from datetime import timedelta
import json
import os

# Your inputs
Input your HR zones and the name of the directory containing your data.

## Heart rate zones
To access your heart rate (HR) zones in the Garmin Connect app, you can do the following:
1. Open the app
2. Select More in the bottom right corner
3. Select Garmin Devices
4. Select your device
5. Select User Settings, User Profile, or My Stats
6. Select Heart Rate Zones or Heart Rate
7. Customize your HR zones
8. Select Done

## Data directory
* If your data directory is saved in the same place as this script then you can enter the name inside \" \". Include "/" at the end.
* Prefix the directory with "../" if it's in the directory above. Use this as many times as you need to go up. 
* If your data isn't in a separate folder and is in the same place as the script then set to \"\".
* If your data is in a completely different part of your system you can use an absolute path for example \"C:/User/name/data/", this is NOT advised if you are going to be sharing this script. Include "/" at the end.

In [414]:
hr_zones = [120,148, 165, 176, 185, 213]
dir_name = "my_data/" # The name and path of the directory you have the data saved in.

# Setup the Functions
## Plotting functions

In [415]:
display(Markdown("Function - scatter plot without period data but with upper and lower bounds."))
def scatter_plot_no_p(data_frame, col1, col2, low = 0, high = 1.0):
    # Plot the points labelled Success first then others
    data_crop = data_frame[[col1, col2]].dropna(how = "any")
    data_crop["Band"] = None
    data_crop.loc[data_crop[col2]< low, "Band"] = "low"
    data_crop.loc[data_crop[col2]> high, "Band"] = "high"
    data_crop.loc[(data_crop[col2]>=low) & (data_crop[col2]<= high), "Band"] = "Mid"
    if col1 == "Date": xvar = col1+":T"
    else: xvar = col1+":Q"
    points = alt.Chart(data_crop).mark_square(size=20).encode(
            x=xvar,
            y=alt.Y(col2+":Q").scale(zero=False),
            color = 'Band'+":N",
            tooltip = [col1, col2]
        ).properties(
    width=800,
    height=300
).interactive()
    line = alt.Chart(data_crop).mark_line(
            color='blue',
            size=1
        ).transform_window(
            rolling_mean='mean('+col2+')',
            frame=[-5, 5]
        ).encode(
        x = xvar,
        y = alt.Y('rolling_mean:Q').scale(zero=False)
    ).properties(
    width=800,
    height=300
)
    return(points+line)


Function - scatter plot without period data but with upper and lower bounds.

In [416]:
def scatter_plot(data_frame, col1, col2):
    # Plot the points labelled Success first then others
    data_crop = data_frame[[col1, col2, "Period"]].dropna(how = "any")
    if col1 == "Date": xvar = col1+":T"
    else: xvar = col1+":Q"
    points = alt.Chart(data_crop).mark_circle(size=20).encode(
            x=xvar,
            y=alt.Y(col2+":Q").scale(zero=False),
            color='Period'+":N",
            tooltip = [col1, col2, "Period"]
        ).properties(
    width=800,
    height=300
).interactive()
    line = alt.Chart(data_crop).mark_line(
            color='blue',
            size=1
        ).transform_window(
            rolling_mean='mean('+col2+')',
            frame=[-5, 5]
        ).encode(
        x = xvar,
        y = alt.Y('rolling_mean:Q').scale(zero=False)
    ).properties(
    width=800,
    height=300
)
    return(points+line)

## Function for numbering Heartrate zones

In [417]:
def set_hr_zone(row, hr_zones):
    hr = row["Avg HR"]
    if hr <hr_zones[0]: return 1
    elif hr_zones[0] <= hr < hr_zones[1]: return 2
    elif hr_zones[1] <= hr < hr_zones[2]: return 3
    elif hr_zones[2]<= hr < hr_zones[3]: return 4
    elif hr_zones[3] <= hr < hr_zones[4]: return 5
    elif hr_zones[4]<= hr < hr_zones[5]: return 6
    elif hr_zones[5]<= hr : return 6
    else: return 1

## Function to convert `Elapsed Time` and `Duration` to integer number of seconds.

In [418]:
def make_del(row):
    if "Elapsed Time" in row.index:
        entry = row["Elapsed Time"]
    elif "Duration" in row.index:
        entry = row["Duration"]
    else: raise("Duration column is missing, check csv files.")
    splits = entry.split(':')
    if len(splits)>2:
        h, m, s = splits
        if "." in s:
          s, _ = s.split('.')
    elif len(splits)==2:
        h = 0
        m = splits[0]
        if "." in s:
          s, _ = s.split('.')
    else: return dt.timedelta(hours=int(0), minutes=int(0), seconds=int(0)).total_seconds()
    return dt.timedelta(hours=int(h), minutes=int(m), seconds=int(s)).total_seconds()

# Load Data
## Load the activities data

In [419]:
try: os.path.isfile(dir_name+ "Activities.csv")
except: print("Activities.csv is not in the data directory. Check location and filename")
activities_df = pd.read_csv(dir_name+"Activities.csv", header = 0, parse_dates=["Date"])
activities_df.head(-5)

Unnamed: 0,Activity Type,Date,Favorite,Title,Distance,Calories,Time,Avg HR,Max HR,Aerobic TE,...,Max Resp,Stress Change,Stress Start,Stress End,Avg Stress,Max Stress,Moving Time,Elapsed Time,Min Elevation,Max Elevation
0,Running,2024-07-21 08:00:25,False,Bath and North East Somerset Running,15.83,1030,01:42:18,176,188,4.4,...,41,--,--,--,--,--,01:40:13,02:02:12,15,186
1,Indoor Cycling,2024-07-17 20:59:15,False,Indoor Cycling,0.00,58,00:20:06,88,124,0.2,...,17,--,--,--,--,--,00:00:00,00:20:06,--,--
2,Indoor Cycling,2024-07-17 17:59:00,False,Indoor Cycling,0.00,427,00:45:58,145,182,3.3,...,43,--,--,--,--,--,00:00:00,00:45:58,--,--
3,Cycling,2024-07-17 17:32:09,False,Bath and North East Somerset Cycling,4.75,131,00:20:13,117,145,1.1,...,--,--,--,--,--,--,00:18:22,00:20:13,25,51
4,Cycling,2024-07-17 11:34:19,False,Bath and North East Somerset Cycling,4.12,103,00:18:21,108,141,0.5,...,--,--,--,--,--,--,00:17:31,03:00:05,26,53
...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...
590,Walking,2023-08-22 17:03:03,False,Maldon Walking,2.61,155,00:40:42,81,108,0.4,...,28,--,--,--,--,--,00:37:58,00:40:42,22,35
591,Walking,2023-08-20 19:16:42,False,Maldon Walking,1.01,72,00:17:25,100,131,0.3,...,29,--,--,--,--,--,00:15:02,00:18:34,18,36
592,Running,2023-08-20 17:41:19,False,Maldon Running,9.52,662,01:27:29,141,166,3.0,...,37,--,--,--,--,--,01:19:33,01:27:29,15,63
593,Stand Up Paddleboarding,2023-08-19 13:56:30,False,Maldon Stand Up Paddleboarding,2.67,279,00:58:16,111,135,1.0,...,--,--,--,--,--,--,00:42:36,00:58:16,--,--


## Load the running only activities data

In [420]:
try: os.path.isfile(dir_name+ "Activities_running_only.csv")
except: raise("Activities_running_only.csv is not in the data directory. Check location and filename")
running_df = pd.read_csv(dir_name+"Activities_running_only.csv", header = 0, parse_dates=["Date"])
running_df.head(-5)

Unnamed: 0,Activity Type,Date,Favorite,Title,Distance,Calories,Time,Avg HR,Max HR,Aerobic TE,...,Best Lap Time,Number of Laps,Max Temp,Avg Resp,Min Resp,Max Resp,Moving Time,Elapsed Time,Min Elevation,Max Elevation
0,Running,2024-07-21 08:00:25,False,Bath and North East Somerset Running,15.83,1030,01:42:18,176,188,4.4,...,00:04:53.2,16,29.0,34,17,41,01:40:13,02:02:12,15,186
1,Running,2024-07-16 18:37:07,False,Bath and North East Somerset Running,4.75,287,00:37:30,138,169,2.3,...,00:05:50.6,5,31.0,26,16,34,00:30:21,00:39:24,23,50
2,Running,2024-07-14 08:14:53,False,Bath and North East Somerset Running,8.79,527,00:54:05,155,170,3.3,...,00:04:31.6,9,28.0,26,16,35,00:52:48,01:15:26,12,37
3,Running,2024-07-12 19:16:48,False,Wiltshire Running,9.66,620,00:57:05,169,188,3.6,...,00:03:54.3,10,23.0,32,21,41,00:56:59,00:57:05,52,158
4,Running,2024-07-11 19:30:12,False,Bath and North East Somerset Running,4.38,297,00:34:38,148,171,2.3,...,00:02:05.8,5,28.0,26,11,35,00:34:22,00:35:27,34,189
...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...
210,Running,2023-08-18 09:03:35,False,Maldon Running,1.48,103,00:11:27,147,163,2.0,...,00:03:39.9,2,30.0,32,14,37,00:11:23,00:11:27,27,37
211,Running,2023-08-15 18:49:21,False,Maldon - HR efforts,5.87,395,00:43:24,155,179,3.1,...,00:00:51.0,12,29.0,31,21,38,00:43:22,00:43:24,15,27
212,Running,2023-08-13 11:51:14,False,Bath and North East Somerset - 12 miles + WU +...,23.71,1607,03:00:43,164,183,4.6,...,00:00:15.1,37,31.0,33,21,43,03:00:16,03:00:51,19,44
213,Running,2023-08-11 09:01:31,False,Bath and North East Somerset - Base,5.02,342,00:38:49,147,165,2.6,...,00:01:49.4,6,30.0,29,20,37,00:38:18,00:39:01,30,55


## Load period data from Clue - JSON

In [421]:
try: period_df = pd.read_json(dir_name + "clue_measurements.json")
except: 
    try: period_df = pd.read_csv(dir_name + "period_dates.csv")
    except: print("No period data found. is not in the data directory. Check location and filename")
period_dates = period_df[period_df.type=='period'].date

## Set the data-type for the Date columns

In [422]:
activities_df["Date"] = pd.to_datetime(activities_df["Date"], dayfirst= True)
running_df["Date"] = pd.to_datetime(running_df["Date"], dayfirst= True)
period_dates = pd.to_datetime(period_dates, dayfirst= True)

# Combine and restucture the data
## Add period data to activities and running dataframes.

In [423]:
running_df['Period'] = 0
activities_df['Period'] = 0
running_df.loc[running_df.Date.dt.date.isin(period_dates.dt.date),'Period'] = 1
activities_df.loc[activities_df.Date.dt.date.isin(period_dates.dt.date),'Period'] = 1


## Get the running data
### Extract data from the strings.

In [424]:
running_df.rename(columns = {"Time": "Duration"}, inplace= True)
running_df["Distance"] = pd.to_numeric(running_df["Distance"])
running_df[['L_GCT Balance', 'R_GCT Balance']] = running_df["Avg GCT Balance"].str.split(" / ", expand= True)
running_df.loc[:,"L_GCT Balance"] = running_df.loc[:,"L_GCT Balance"].str.replace(r'\D', '', regex=True)
running_df.loc[:, "R_GCT Balance"] = running_df.loc[:,"L_GCT Balance"].str.replace(r'\D', '', regex=True)
col_list = ['Avg Stride Length', 'Avg Vertical Ratio', 'Avg Vertical Oscillation', 'Avg Run Cadence', 
            'Avg Ground Contact Time', "L_GCT Balance", "R_GCT Balance"]
running_df[col_list] = running_df[col_list].apply(pd.to_numeric, errors='coerce')
running_df["L_GCT Balance"] = running_df["L_GCT Balance"] / 1000
running_df["R_GCT Balance"] = running_df["R_GCT Balance"] / 1000
running_df["Date"] = pd.to_datetime(running_df["Date"], dayfirst=True, errors = "coerce")
running_df.head()

Unnamed: 0,Activity Type,Date,Favorite,Title,Distance,Calories,Duration,Avg HR,Max HR,Aerobic TE,...,Avg Resp,Min Resp,Max Resp,Moving Time,Elapsed Time,Min Elevation,Max Elevation,Period,L_GCT Balance,R_GCT Balance
0,Running,2024-07-21 08:00:25,False,Bath and North East Somerset Running,15.83,1030,01:42:18,176,188,4.4,...,34,17,41,01:40:13,02:02:12,15,186,0,0.507,0.507
1,Running,2024-07-16 18:37:07,False,Bath and North East Somerset Running,4.75,287,00:37:30,138,169,2.3,...,26,16,34,00:30:21,00:39:24,23,50,0,0.5,0.5
2,Running,2024-07-14 08:14:53,False,Bath and North East Somerset Running,8.79,527,00:54:05,155,170,3.3,...,26,16,35,00:52:48,01:15:26,12,37,0,0.506,0.506
3,Running,2024-07-12 19:16:48,False,Wiltshire Running,9.66,620,00:57:05,169,188,3.6,...,32,21,41,00:56:59,00:57:05,52,158,0,0.519,0.519
4,Running,2024-07-11 19:30:12,False,Bath and North East Somerset Running,4.38,297,00:34:38,148,171,2.3,...,26,11,35,00:34:22,00:35:27,34,189,1,0.498,0.498


## Collect activities by date
### Edit to match your own heart rate zones

In [425]:
start_date = min(running_df.Date.min(),activities_df.Date.min())

### Calculate the total duration in seconds for further calculations.

In [426]:
running_df["Duration_seconds"]=running_df.apply(make_del, axis=1)
activities_df["Duration_seconds"]=activities_df.apply(make_del, axis=1)

### Set the heartrate zones

In [427]:
activities_df["Avg HR"] = pd.to_numeric(activities_df["Avg HR"], errors='coerce')
activities_df["hr_zone"] = activities_df.apply(set_hr_zone, hr_zones = hr_zones, axis = 1)

### Day totals
Calculate the total activity per day and the training load corresponding.
Trianing load calculated using: $$\text{load} = \text{minutes of activity} * \text{average heart rate}.$$

In [428]:

#running_df.Date.dt.normalize(),start_date.date()
date_format = "%Y/%m/%d"
ndays = (dt.datetime.today()-start_date).days
by_date = {}
for i in range(ndays):
    new_date = (start_date+timedelta(days=i))
    runs = running_df.loc[running_df.Date.dt.date==pd.Timestamp(new_date.date()).date()]
    activities = activities_df[activities_df.Date.dt.date==pd.Timestamp(new_date.date()).date()]
    if len(activities)==0:
        tot_dist=0
        duration=0
        load=0
    else:
        tot_dist = runs["Distance"].sum()
        duration = dt.timedelta(seconds=activities["Duration_seconds"].sum())
        load = (activities["Duration_seconds"]%60 * activities.hr_zone).sum()
    if len(activities)>0:
        period = activities.Period.mean()
    else: period = 0
    by_date[new_date.date()] = {'run_dist': tot_dist,
                         'duration': duration,
                         'duration_seconds': activities["Duration_seconds"].sum(),
                         'load': load,
                         'Period': period}
overall_by_date_df = pd.DataFrame(by_date).T
overall_by_date_df.run_dist = pd.to_numeric(overall_by_date_df.run_dist)
overall_by_date_df.duration_seconds = pd.to_numeric(overall_by_date_df.duration_seconds)
overall_by_date_df.load = pd.to_numeric(overall_by_date_df.load)
overall_by_date_df.head()

Unnamed: 0,run_dist,duration,duration_seconds,load,Period
2023-07-22,0.0,0.0,0.0,0.0,0.0
2023-07-23,0.0,0.0,0.0,0.0,0.0
2023-07-24,0.0,0.0,0.0,0.0,0.0
2023-07-25,0.0,0.0,0.0,0.0,0.0
2023-07-26,0.0,0.0,0.0,0.0,0.0


## Plot the training load


In [429]:
overall_by_date_df["Date"] = pd.to_datetime(overall_by_date_df.index)
overall_by_date_df.reset_index(inplace=True, drop=True)
overall_by_date_df.head()

Unnamed: 0,run_dist,duration,duration_seconds,load,Period,Date
0,0.0,0.0,0.0,0.0,0.0,2023-07-22
1,0.0,0.0,0.0,0.0,0.0,2023-07-23
2,0.0,0.0,0.0,0.0,0.0,2023-07-24
3,0.0,0.0,0.0,0.0,0.0,2023-07-25
4,0.0,0.0,0.0,0.0,0.0,2023-07-26


In [430]:
training_load = scatter_plot(overall_by_date_df, "Date", "load")
(training_load)

In [431]:
display(Markdown("## Calculate the rolling totals and percentages."))
overall_by_date_df["Run KMs - Rolling weekly AVG"] = overall_by_date_df.run_dist.rolling(window = 7, axis=0).mean()
overall_by_date_df["Time - Rolling weekly AVG"] = overall_by_date_df.duration_seconds.rolling(window = 7, axis=0).mean()
overall_by_date_df["Load - Rolling weekly AVG"] = overall_by_date_df.load.rolling(window = 7, axis=0).mean()
overall_by_date_df.head()

    

## Calculate the rolling totals and percentages.

  overall_by_date_df["Run KMs - Rolling weekly AVG"] = overall_by_date_df.run_dist.rolling(window = 7, axis=0).mean()
  overall_by_date_df["Time - Rolling weekly AVG"] = overall_by_date_df.duration_seconds.rolling(window = 7, axis=0).mean()
  overall_by_date_df["Load - Rolling weekly AVG"] = overall_by_date_df.load.rolling(window = 7, axis=0).mean()


Unnamed: 0,run_dist,duration,duration_seconds,load,Period,Date,Run KMs - Rolling weekly AVG,Time - Rolling weekly AVG,Load - Rolling weekly AVG
0,0.0,0.0,0.0,0.0,0.0,2023-07-22,,,
1,0.0,0.0,0.0,0.0,0.0,2023-07-23,,,
2,0.0,0.0,0.0,0.0,0.0,2023-07-24,,,
3,0.0,0.0,0.0,0.0,0.0,2023-07-25,,,
4,0.0,0.0,0.0,0.0,0.0,2023-07-26,,,


In [432]:
training_load_rolling = scatter_plot(overall_by_date_df, "Date", "Load - Rolling weekly AVG")
time_rolling = scatter_plot(overall_by_date_df, "Date", "Time - Rolling weekly AVG")
run_kms_rolling = scatter_plot(overall_by_date_df, "Date", "Run KMs - Rolling weekly AVG")
(training_load_rolling & time_rolling) | (run_kms_rolling)

## Calculate the percentage of the weekly distance and the increase from the previous week.

In [None]:
overall_by_date_df["Run KMs - Rolling total"] = 7*overall_by_date_df["Run KMs - Rolling weekly AVG"]
overall_by_date_df["Percent_distance_tot"] = overall_by_date_df.run_dist/ (7*overall_by_date_df["Run KMs - Rolling weekly AVG"])
running_df["Distance_Percent_increase"] = overall_by_date_df["Run KMs - Rolling total"].diff(periods=7)
running_df["Period"] = None
running_df["Success_status"] = None
running_df["Long_run_percent"] = None

In [433]:
display(Markdown("## Plot the distances and percentages"))
upper_bound = 0.33
lower_bound = 0.1
date_percent_dist = scatter_plot_no_p(overall_by_date_df, "Date", "Percent_distance_tot", lower_bound, upper_bound)
date_rolling_dist = scatter_plot_no_p(overall_by_date_df, "Date", "Run KMs - Rolling weekly AVG", 30, 80)
date_dist_incre = scatter_plot_no_p(overall_by_date_df, "Date", "Distance_percent_increase", 0, 0.1)
date_long_per = scatter_plot_no_p(overall_by_date_df, "Date", "Long_run_percent", 0.15, 0.5)

## Plot the distances and percentages

NameError: name 'overall_by_date_df_df' is not defined

In [None]:
(date_percent_dist) & (date_rolling_dist) & (date_dist_incre) & (date_long_per)

In [None]:
display(Markdown("### Plots of distances and percentages tracked by period"))
(date_percent_dist_period) & (date_rolling_dist_period) & (date_dist_incre_period) & (date_long_per_period)

### Plots of distances and percentages tracked by period

In [None]:
date_stride_leng = scatter_plot_no_p(running_df, "Date", 'Avg Stride Length')
date_vert_rat = scatter_plot_no_p(running_df, "Date", 'Avg Vertical Ratio')
date_cadence = scatter_plot_no_p(running_df, "Date", 'Avg Run Cadence')
date_vert_osc = scatter_plot_no_p(running_df, "Date", 'Avg Vertical Oscillation')
date_grd_cont = scatter_plot_no_p(running_df, "Date", 'Avg Ground Contact Time')
date_l_balance = scatter_plot_no_p(running_df, "Date","L_GCT Balance",0.45, 0.55)
date_r_balance = scatter_plot_no_p(running_df, "Date","R_GCT Balance", 0.45, 0.55)

(date_stride_leng | date_vert_rat) & (date_cadence | date_vert_osc) & (date_grd_cont ) & (date_l_balance | date_r_balance)

In [None]:
def get_rtype_by_weekday(run_date):
    if (run_date.weekday() == 0):
        r_day = "Mon"
        r_type = "Gym"
    elif (run_date.weekday() == 1):
        r_day = "Tue"
        r_type = "Intervals"
    elif (run_date.weekday() == 2):
        r_day = "Wed"
        r_type = "rest"
    elif (run_date.weekday() == 3):
        r_day = "Thur"
        r_type = "Intervals"
    elif (run_date.weekday() == 4):
        r_day = "Fri"
        r_type = "Gym"
    elif (run_date.weekday() == 5):
        r_day = "Sat"
        r_type = "rest"
    elif (run_date.weekday() == 6):
        r_day = "Sun"
        r_type = "Long_run"
    return ({"day" : r_day, "r_type": r_type})


In [None]:
def get_dist_by_type(r_type, week_dist, long_dist):
    if r_type == "Long_run": 
        r_dist = long_dist
    elif r_type == "Intervals":
        r_dist = (week_dist - long_dist)/2
    else: r_dist = 0
    return(r_dist)

In [None]:
last_date = running_df.loc[running_df.Date.idxmax(),"Date"]
second_last_date = last_date - timedelta(days = 7)
third_last_date = second_last_date - timedelta(days = 7)
fourth_last_date = third_last_date - timedelta(days = 7)
last_week_rows = running_df[(running_df["Date"]>=second_last_date) & (running_df["Date"]<=last_date)]
second_week_rows = running_df[(running_df["Date"]<=second_last_date) & (running_df["Date"]>=third_last_date)]
third_week_rows = running_df[(running_df["Date"]>=fourth_last_date) & (running_df["Date"]<=third_last_date)]
last_dist = last_week_rows.Distance.sum()
second_tot = second_week_rows.Distance.sum()
third_tot = third_week_rows.Distance.sum()
last_month_date = last_date - timedelta(days = 30)
last_month_rows = last_week_rows = running_df[(running_df["Date"]>=last_month_date) & (running_df["Date"]<=last_date)]
print(last_dist, second_tot, third_tot)

50.2 42.43000000000001 28.95


In [None]:
def reset_date_next_mon(date):
    day = date.weekday()
    day_week = day["day"]
    if day_week == "Mon":
        return(date)
    elif day_week == "Tue": return(date+timedelta(days=6))
    elif day_week == "Wed": return(date+timedelta(days=5))
    elif day_week == "Thur": return(date+timedelta(days=4))
    elif day_week == "Fri": return(date+timedelta(days=3))
    elif day_week == "Sat": return(date+timedelta(days=2))
    elif day_week == "Sun": return(date+timedelta(days=1))
    return

In [None]:
init_week_dist = 1.1*(third_tot + second_tot + last_dist)/3
init_long_dist = last_month_rows.Distance.max()
init_date = last_date + timedelta(days = 1)
check_day = get_rtype_by_weekday(init_date)
init_date = reset_date_next_mon(init_date)
r_type = check_day["r_type"]
r_dist = get_dist_by_type(r_type,init_week_dist, init_long_dist)
init_week_num = 0
init_dict = {
    "Week_No": init_week_num,
    "Day": check_day["day"],
    "Date": init_date,
    "Week_tot": init_week_dist,
    "Run_type": r_type,
    "Distance": r_dist,
    "Down_week": "N",
    "Percent_week_tot": 100*r_dist/init_week_dist
}
init_row = pd.DataFrame(index = [0],data=init_dict)

TypeError: 'int' object is not subscriptable

In [None]:
tot_w = 52
new_week_dist = init_week_dist
long_dist = init_long_dist
new_date = init_date
week_no = init_week_num
down_week_count = 1
DW = "N"
plan_df = pd.DataFrame(columns = ["Week_No", "Day","Date", "Distance", "Run_type", "Down_week", "Week_tot", "Percent_week_tot"])
plan_df = pd.concat([plan_df])
week_day_bound = 12
long_run_bound = 35
week_bound = 100
for nd in range(tot_w):
    check_day = get_rtype_by_weekday(new_date)
    # Get the weekly totalts
    down_week_count +=1
    down_week_count = down_week_count%4
    if down_week_count == 0:
        mul = 0.9
        DW = "Y"
    else:
        mul = 1.1
        DW = "N"
    if new_week_dist * mul >= week_bound:
        new_week_dist = week_bound
        week_bound *= 1.1
        long_run_bound *= 1.1
        week_day_bound *= 1.1
        mul = 1.0
    new_week_dist *= mul
    long_dist *= mul
    # Set default distances for the overflow days
    mon_dist = 0
    wed_dist = 0
    fri_dist = 0
    sat_dist = 0
    # Add Sun
    sun_date = new_date +timedelta(days=6)
    if long_dist > min(0.5*new_week_dist, long_run_bound):
        sun_dist = min(0.5*new_week_dist, long_run_bound)
        extra_dist = long_dist - sun_dist
        if extra_dist <= week_day_bound:
            sat_dist = extra_dist
        elif extra_dist > week_day_bound and extra_dist <= 2*week_day_bound:
            wed_dist += 0.5*extra_dist
            sat_dist += 0.5*extra_dist
        elif extra_dist >= 2*week_day_bound and extra_dist < 3*week_day_bound:
            wed_dist += extra_dist/3
            fri_dist += extra_dist/3
            sat_dist += extra_dist/3
        elif extra_dist >= 3*week_day_bound and extra_dist < 4*week_day_bound:
            mon_dist += extra_dist/4
            wed_dist += extra_dist/4
            fri_dist += extra_dist/4
            sat_dist += extra_dist/4
        else:
            mon_dist += week_day_bound
            wed_dist += week_day_bound
            fri_dist += week_day_bound
            sat_dist += week_day_bound
    else: sun_dist = long_dist
    # Set the Workout distances 
    # - Tues
    tue_date = new_date + timedelta(days=1)
    tue_dist = 0.5*(new_week_dist - sun_dist)
    if tue_dist > week_day_bound:
        extra_dist_t = tue_dist - week_day_bound
        mon_dist += min(extra_dist_t, week_day_bound)
        tue_dist = week_day_bound
    # - Thur
    thur_date = new_date +timedelta(days=3)
    thur_dist = 0.5*(new_week_dist - sun_dist)
    if thur_dist > week_day_bound:
        extra_dist_th = thur_dist - week_day_bound
        fri_dist += min(extra_dist_th, week_day_bound)
        thur_dist = week_day_bound
    # - Cap the overflow distances
    mon_dist = min(mon_dist, week_day_bound)
    wed_dist = min(wed_dist, week_day_bound)
    fri_dist = min(fri_dist, week_day_bound)
    sat_dist = min(sat_dist, week_day_bound)
    # - Set the overflow dates
    wed_date = new_date +timedelta(days=2)
    fri_date = new_date +timedelta(days=4)
    sat_date = new_date +timedelta(days=5)
    new_week_dist = sum([mon_dist, tue_dist, wed_dist, thur_dist, fri_dist, sat_dist, sun_dist])
    mon_dict =  {
        "Week_No": week_no,
        "Day": "Mon",
        "Date": new_date,
        "Week_tot": new_week_dist,
        "Run_type": "Top-up/ rest",
        "Distance": mon_dist,
        "Down_week": DW,
        "Percent_week_tot": 100*mon_dist/new_week_dist
        }
    tue_dict =  {
        "Week_No": week_no,
        "Day": "Tue",
        "Date": tue_date,
        "Week_tot": new_week_dist,
        "Run_type": "Workout",
        "Distance": tue_dist,
        "Down_week": DW,
        "Percent_week_tot": 100*tue_dist/new_week_dist
        }
    wed_dict =  {
        "Week_No": week_no,
        "Day": "Wed",
        "Date": wed_date,
        "Week_tot": new_week_dist,
        "Run_type": "Top-up/ rest",
        "Distance": wed_dist,
        "Down_week": DW,
        "Percent_week_tot": 100*wed_dist/new_week_dist
        }
    thur_dict =  {
        "Week_No": week_no,
        "Day": "Thur",
        "Date": thur_date,
        "Week_tot": new_week_dist,
        "Run_type": "Workout",
        "Distance": thur_dist,
        "Down_week": DW,
        "Percent_week_tot": 100*thur_dist/new_week_dist
        }
    fri_dict =  {
        "Week_No": week_no,
        "Day": "Fri",
        "Date": fri_date,
        "Week_tot": new_week_dist,
        "Run_type": "Top-up/ rest",
        "Distance": fri_dist,
        "Down_week": DW,
        "Percent_week_tot": 100*fri_dist/new_week_dist
        }
    sat_dict =  {
        "Week_No": week_no,
        "Day": "Sat",
        "Date": sat_date,
        "Week_tot": new_week_dist,
        "Run_type": "Top-up/ rest",
        "Distance": sat_dist,
        "Down_week": DW,
        "Percent_week_tot": 100*sat_dist/new_week_dist
        }
    sun_dict =  {
        "Week_No": week_no,
        "Day": "Sun",
        "Date": sun_date,
        "Week_tot": new_week_dist,
        "Run_type": "Long_run",
        "Distance": sun_dist,
        "Down_week": DW,
        "Percent_week_tot": 100*sun_dist/new_week_dist
        }
    new_row = pd.DataFrame(index = [nd],data= mon_dict)
    plan_df = pd.concat([plan_df, new_row])
    new_row = pd.DataFrame(index = [nd+1],data= tue_dict)
    plan_df = pd.concat([plan_df, new_row])
    new_row = pd.DataFrame(index = [nd+2],data= wed_dict)
    plan_df = pd.concat([plan_df, new_row])
    new_row = pd.DataFrame(index = [nd+3],data= thur_dict)
    plan_df = pd.concat([plan_df, new_row])
    new_row = pd.DataFrame(index = [nd+4],data= fri_dict)
    plan_df = pd.concat([plan_df, new_row])
    new_row = pd.DataFrame(index = [nd+5],data= sat_dict)
    plan_df = pd.concat([plan_df, new_row])
    new_row = pd.DataFrame(index = [nd+6],data= sun_dict)
    plan_df = pd.concat([plan_df, new_row])
    nd +=7
    week_no+=1
    new_date +=timedelta(days = 7)
plan_df
    
    

Unnamed: 0,Week_No,Day,Date,Distance,Run_type,Down_week,Week_tot,Percent_week_tot
0,0,Mon,2024-05-20 17:09:53,3.663633,Top-up/ rest,N,49.037267,7.471121
1,0,Tue,2024-05-21 17:09:53,12.000000,Workout,N,49.037267,24.471185
2,0,Wed,2024-05-22 17:09:53,0.000000,Top-up/ rest,N,49.037267,0.000000
3,0,Thur,2024-05-23 17:09:53,12.000000,Workout,N,49.037267,24.471185
4,0,Fri,2024-05-24 17:09:53,3.663633,Top-up/ rest,N,49.037267,7.471121
...,...,...,...,...,...,...,...,...
53,51,Wed,2025-05-14 17:09:53,0.000000,Top-up/ rest,N,179.944525,0.000000
54,51,Thur,2025-05-15 17:09:53,23.384605,Workout,N,179.944525,12.995452
55,51,Fri,2025-05-16 17:09:53,23.384605,Top-up/ rest,N,179.944525,12.995452
56,51,Sat,2025-05-17 17:09:53,18.201006,Top-up/ rest,N,179.944525,10.114787


In [None]:
weeks_df = pd.DataFrame(columns = [
    "Dates", "Mon", "Tue", "Wed", "Thur", "Fri", "Sat", "Sun",
    "Week_tot", "Percent_week_tot", "Down_week"])
week_nums = plan_df.Week_No.unique()

for wn in week_nums:
    per_week_tot = plan_df.loc[(plan_df.Week_No == wn) & (plan_df.Day == "Sun"),"Percent_week_tot"].sum()
    week_tot = plan_df.loc[(plan_df.Week_No == wn) & (plan_df.Day == "Sun"),"Week_tot"].sum()
    dw = plan_df.loc[(plan_df.Week_No == wn) & (plan_df.Day == "Sun"),"Down_week"].sum()
    week_date_st = plan_df.loc[(plan_df.Week_No == wn) & (plan_df.Day == "Mon")]["Date"].item().date()
    week_date_end = plan_df.loc[(plan_df.Week_No == wn) & (plan_df.Day == "Sun")]["Date"].item().date()
    week_date = str(week_date_st) + " -- " + str(week_date_end)
    new_dict = {
        "Dates": week_date,
        "Mon" : plan_df.loc[(plan_df.Week_No == wn) & (plan_df.Day == "Mon"),"Distance"].sum(),
        "Tue" : plan_df.loc[(plan_df.Week_No == wn) & (plan_df.Day == "Tue"),"Distance"].sum(),
        "Wed" : plan_df.loc[(plan_df.Week_No == wn) & (plan_df.Day == "Wed"),"Distance"].sum(),
        "Thur" : plan_df.loc[(plan_df.Week_No == wn) & (plan_df.Day == "Thur"),"Distance"].sum(),
        "Fri" : plan_df.loc[(plan_df.Week_No == wn) & (plan_df.Day == "Fri"),"Distance"].sum(),
        "Sat" : plan_df.loc[(plan_df.Week_No == wn) & (plan_df.Day == "Sat"),"Distance"].sum(),
        "Sun" : plan_df.loc[(plan_df.Week_No == wn) & (plan_df.Day == "Sun"),"Distance"].sum(),
        "Week_tot" : week_tot,
        "Percent_week_tot" : per_week_tot,
        "Down_week" : dw
    }
    new_row = pd.DataFrame(index = [wn], data= new_dict)
    weeks_df = pd.concat([weeks_df, new_row])
weeks_df

Unnamed: 0,Dates,Mon,Tue,Wed,Thur,Fri,Sat,Sun,Week_tot,Percent_week_tot,Down_week
0,2024-05-20 -- 2024-05-26,3.663633,12.0,0.0,12.0,3.663633,0.0,17.71,49.037267,36.11539,N
1,2024-05-27 -- 2024-06-02,5.229997,12.0,0.0,12.0,5.229997,0.0,19.481,53.940993,36.11539,N
2,2024-06-03 -- 2024-06-09,3.506997,12.0,0.0,12.0,3.506997,0.0,17.5329,48.546894,36.11539,Y
3,2024-06-10 -- 2024-06-16,5.057697,12.0,0.0,12.0,5.057697,0.0,19.28619,53.401583,36.11539,N
4,2024-06-17 -- 2024-06-23,6.763466,12.0,0.0,12.0,6.763466,0.0,21.214809,58.741742,36.11539,N
5,2024-06-24 -- 2024-06-30,8.639813,12.0,0.0,12.0,8.639813,0.0,23.33629,64.615916,36.11539,N
6,2024-07-01 -- 2024-07-07,6.575832,12.0,0.0,12.0,6.575832,0.0,21.002661,58.154324,36.11539,Y
7,2024-07-08 -- 2024-07-14,8.433415,12.0,0.0,12.0,8.433415,0.0,23.102927,63.969757,36.11539,N
8,2024-07-15 -- 2024-07-21,10.476756,12.0,0.0,12.0,10.476756,0.0,25.41322,70.366732,36.11539,N
9,2024-07-22 -- 2024-07-28,12.0,12.0,0.0,12.0,12.0,0.0,27.954542,75.954542,36.804306,N


In [None]:
thresh_dates = plan_df.loc[plan_df.Week_tot > 100, "Date"]
indices = thresh_dates.index
mara_date = thresh_dates[indices[0]]
print("Marathon ready by "+ str(mara_date))
print("Earliest race "+ str(mara_date + timedelta(weeks = 3)))

Marathon ready by 2024-11-04 17:09:53
Earliest race 2024-11-25 17:09:53


In [None]:
dt.date(2023, 5, 21) + timedelta(weeks=45 + 3)

datetime.date(2024, 4, 21)

# Load the activities data

In [None]:
activities_df = pd.read_csv("Activities.csv", header = 0, parse_dates=["Date"])
activities_df.head()

  activities_df = pd.read_csv("Activities.csv", header = 0, parse_dates=["Date"])


Unnamed: 0,Activity Type,Date,Favorite,Title,Distance,Calories,Time,Avg HR,Max HR,Aerobic TE,...,Max Resp,Stress Change,Stress Start,Stress End,Avg Stress,Max Stress,Moving Time,Elapsed Time,Min Elevation,Max Elevation
0,Indoor Cycling,2024-07-17 20:59:00,False,Indoor Cycling,0.0,58,00:20:06,88,124,0.2,...,17,--,--,--,--,--,00:00:00,00:20:06,--,--
1,Indoor Cycling,2024-07-17 17:59:00,False,Indoor Cycling,0.0,427,00:45:58,145,182,3.3,...,43,--,--,--,--,--,00:00:00,00:45:58,--,--
2,Cycling,2024-07-17 17:32:00,False,Bath and North East Somerset Cycling,4.75,131,00:20:13,117,145,1.1,...,--,--,--,--,--,--,00:18:22,00:20:13,25,51
3,Cycling,2024-07-17 11:34:00,False,Bath and North East Somerset Cycling,4.12,103,00:18:21,108,141,0.5,...,--,--,--,--,--,--,00:17:31,03:00:05,26,53
4,Running,2024-07-16 18:37:00,False,Bath and North East Somerset Running,4.75,287,00:37:30,138,169,2.3,...,34,--,--,--,--,--,00:30:21,00:39:24,23,50


In [None]:
activities_df["Date"] = pd.to_datetime(activities_df["Date"], dayfirst= True)
activities_df.head()

Unnamed: 0,Activity Type,Date,Favorite,Title,Distance,Calories,Time,Avg HR,Max HR,Aerobic TE,...,Stress End,Avg Stress,Max Stress,Moving Time,Elapsed Time,Min Elevation,Max Elevation,Period,Duration_seconds,hr_zone
0,Running,2024-07-21 08:00:25,False,Bath and North East Somerset Running,15.83,1030,01:42:18,176.0,188,4.4,...,--,--,--,01:40:13,02:02:12,15,186,0,7332.0,5
1,Indoor Cycling,2024-07-17 20:59:15,False,Indoor Cycling,0.0,58,00:20:06,88.0,124,0.2,...,--,--,--,00:00:00,00:20:06,--,--,0,1206.0,1
2,Indoor Cycling,2024-07-17 17:59:00,False,Indoor Cycling,0.0,427,00:45:58,145.0,182,3.3,...,--,--,--,00:00:00,00:45:58,--,--,0,2758.0,2
3,Cycling,2024-07-17 17:32:09,False,Bath and North East Somerset Cycling,4.75,131,00:20:13,117.0,145,1.1,...,--,--,--,00:18:22,00:20:13,25,51,0,1213.0,1
4,Cycling,2024-07-17 11:34:19,False,Bath and North East Somerset Cycling,4.12,103,00:18:21,108.0,141,0.5,...,--,--,--,00:17:31,03:00:05,26,53,0,10805.0,1


## Get the running data
### Extract data from the strings.

In [None]:
#running_df.rename(columns = {"Time": "Duration"}, inplace= True)
running_df.loc[:,"Distance"] = pd.to_numeric(running_df["Distance"])
running_df.loc[:,['L_GCT Balance', 'R_GCT Balance']] = running_df["Avg GCT Balance"].str.split(" / ", expand= True)
running_df.loc[:,"L_GCT Balance"] = running_df.loc[:,"L_GCT Balance"].str.replace(r'\D', '', regex=True)
running_df.loc[:, "R_GCT Balance"] = running_df.loc[:,"L_GCT Balance"].str.replace(r'\D', '', regex=True)
col_list = ['Avg Stride Length', 'Avg Vertical Ratio', 'Avg Vertical Oscillation', 'Avg Run Cadence', 
            'Avg Ground Contact Time', "L_GCT Balance", "R_GCT Balance"]
for col in col_list:
    col_df = running_df[col]
    running_df[col] = pd.to_numeric(col_df, errors='coerce')
running_df.loc[:,"L_GCT Balance"] = running_df["L_GCT Balance"] / 1000
running_df.loc[:,"R_GCT Balance"] = running_df["R_GCT Balance"] / 1000
running_df.loc[:,"Date"] = pd.to_datetime(running_df["Date"], dayfirst=True, errors = "coerce")
running_df["Date"].dtype
#running_df.columns

A value is trying to be set on a copy of a slice from a DataFrame

See the caveats in the documentation: https://pandas.pydata.org/pandas-docs/stable/user_guide/indexing.html#returning-a-view-versus-a-copy
  running_df.rename(columns = {"Time": "Duration"}, inplace= True)


AttributeError: Can only use .str accessor with string values!

In [None]:
display(Markdown("## Calculate the rolling totals and percentages."))
first_date = df.Date.min()
print(first_date)
for ind in running_df.index:
    row = running_df.loc[ind]
    row_date = row["Date"]
    start_date = row_date - timedelta(days = 7)
    before_start_date = start_date - timedelta(days = 7)
    # Get the activities from the last 7 days
    week_rows = running_df[(running_df["Date"]>=start_date) & (running_df["Date"]<=row_date)]
    running_df.loc[ind,"Rolling_distance_tot"] = week_rows.Distance.sum()
    running_df.loc[ind,"Percent_distance_tot"] = running_df.loc[ind,"Distance"] / running_df.loc[ind,"Rolling_distance_tot"] 
    long_dist = running_df.loc[week_rows.Distance.idxmax(), "Distance"]
    tot_dist = running_df.loc[week_rows.Distance.idxmax(), "Rolling_distance_tot"]
    if (long_dist is not None and tot_dist is not None):
        running_df.loc[ind,"Long_run_percent"] = running_df.loc[week_rows.Distance.idxmax(), "Distance"] / running_df.loc[week_rows.Distance.idxmax(), "Rolling_distance_tot"]
    # Get the activities from the 7 days before the last
    week_before_rows = running_df[(running_df["Date"]>=before_start_date) & (running_df["Date"]<=start_date)]
    before_tot = week_before_rows.Distance.sum()
    if before_tot > 10:
        running_df.loc[ind,"Distance_percent_increase"] = (running_df.loc[ind,"Rolling_distance_tot"]-before_tot) / before_tot
    # Add the Period and Success status data
    #if row_date >= first_date:
    #    df_row = df[(row_date.date()<df.Date.dt.date) and (df.Date.dt.date < row_date.date()+timedelta(days=1))]
    #    print(row_date.date()<df.Date.dt.date < row_date.date()+timedelta(days=1))
    #    print(df_row)
    #    ind_2 = df_row.index[0]
    #    running_df.loc[ind, "Period"] = df_row.loc[ind_2,"Period"]
    #    running_df.loc[ind,"Success_status"] = df_row.loc[ind_2,"Success_status"]
running_df.head()
    

## Calculate the rolling totals and percentages.

2023-11-01 00:00:00


  running_df.loc[ind,"Percent_distance_tot"] = running_df.loc[ind,"Distance"] / running_df.loc[ind,"Rolling_distance_tot"]
  running_df.loc[ind,"Long_run_percent"] = running_df.loc[week_rows.Distance.idxmax(), "Distance"] / running_df.loc[week_rows.Distance.idxmax(), "Rolling_distance_tot"]


Unnamed: 0,Activity Type,Date,Favorite,Title,Distance,Calories,Duration,Avg HR,Max HR,Aerobic TE,...,Max Elevation,Rolling_distance_tot,Percent_distance_tot,Distance_Percent_increase,Period,Success_status,Long_run_percent,L_GCT Balance,R_GCT Balance,Distance_percent_increase
0,Running,2024-05-19 17:09:53,False,Bath and North East Somerset Running,3.19,190,00:20:02,154,164,2.2,...,33,50.2,0.063546,,,,,0.523,0.523,0.183125
1,Running,2024-05-19 16:30:37,False,Bath and North East Somerset Running,1.21,73,00:07:18.9,150,168,1.6,...,38,47.01,0.025739,,,,,0.514,0.514,0.107942
2,Running,2024-05-19 15:17:30,False,Wiltshire Running,1.7,94,00:10:29,147,164,1.5,...,202,45.8,0.037118,,,,,0.504,0.504,0.079425
3,Running,2024-05-19 14:30:55,False,Bath and North East Somerset Running,1.33,133,00:12:01,176,187,2.3,...,204,44.1,0.030159,,,,,0.503,0.503,0.039359
4,Running,2024-05-19 13:45:01,False,Bath and North East Somerset Running,1.21,73,00:07:33.7,152,162,1.3,...,38,42.77,0.028291,,,,,0.512,0.512,0.008013


In [None]:
display(Markdown("Function - scatter plot without perid data but with upper and lower bounds."))
def scatter_plot_no_p(data_frame, col1, col2, low = 0, high = 1.0):
    # Plot the points labelled Success first then others
    data_crop = data_frame[[col1, col2]].dropna(how = "any")
    data_crop["Band"] = None
    data_crop.loc[data_crop[col2]< low, "Band"] = "low"
    data_crop.loc[data_crop[col2]> high, "Band"] = "high"
    data_crop.loc[(data_crop[col2]>=low) & (data_crop[col2]<= high), "Band"] = "Mid"
    if col1 == "Date": xvar = col1+":T"
    else: xvar = col1+":Q"
    points = alt.Chart(data_crop).mark_square(size=20).encode(
            x=xvar,
            y=alt.Y(col2+":Q").scale(zero=False),
            color = 'Band'+":N",
            tooltip = [col1, col2]
        ).properties(
    width=800,
    height=300
).interactive()
    line = alt.Chart(data_crop).mark_line(
            color='blue',
            size=1
        ).transform_window(
            rolling_mean='mean('+col2+')',
            frame=[-5, 5]
        ).encode(
        x = xvar,
        y = alt.Y('rolling_mean:Q').scale(zero=False)
    ).properties(
    width=800,
    height=300
)
    return(points+line)


Function - scatter plot without perid data but with upper and lower bounds.

In [None]:
display(Markdown("## Plot the distances and percentages"))
upper_bound = 0.33
lower_bound = 0.1
date_percent_dist = scatter_plot_no_p(running_df, "Date", "Percent_distance_tot", lower_bound, upper_bound)
date_rolling_dist = scatter_plot_no_p(running_df, "Date", "Rolling_distance_tot", 30, 80)
date_dist_incre = scatter_plot_no_p(running_df, "Date", "Distance_percent_increase", 0, 0.1)
date_long_per = scatter_plot_no_p(running_df, "Date", "Long_run_percent", 0.15, 0.5)
date_percent_dist_period = scatter_plot(running_df, "Date", "Percent_distance_tot")
date_rolling_dist_period = scatter_plot(running_df, "Date", "Rolling_distance_tot")
date_dist_incre_period = scatter_plot(running_df, "Date", "Distance_percent_increase")
date_long_per_period = scatter_plot(running_df, "Date", "Long_run_percent")

## Plot the distances and percentages

In [None]:
(date_percent_dist) & (date_rolling_dist) & (date_dist_incre) & (date_long_per)

In [None]:
display(Markdown("### Plots of distances and percentages tracked by period"))
(date_percent_dist_period) & (date_rolling_dist_period) & (date_dist_incre_period) & (date_long_per_period)

### Plots of distances and percentages tracked by period



In [None]:
date_stride_leng = scatter_plot_no_p(running_df, "Date", 'Avg Stride Length')
date_vert_rat = scatter_plot_no_p(running_df, "Date", 'Avg Vertical Ratio')
date_cadence = scatter_plot_no_p(running_df, "Date", 'Avg Run Cadence')
date_vert_osc = scatter_plot_no_p(running_df, "Date", 'Avg Vertical Oscillation')
date_grd_cont = scatter_plot_no_p(running_df, "Date", 'Avg Ground Contact Time')
date_l_balance = scatter_plot_no_p(running_df, "Date","L_GCT Balance",0.45, 0.55)
date_r_balance = scatter_plot_no_p(running_df, "Date","R_GCT Balance", 0.45, 0.55)

(date_stride_leng | date_vert_rat) & (date_cadence | date_vert_osc) & (date_grd_cont ) & (date_l_balance | date_r_balance)

In [None]:
def get_rtype_by_weekday(run_date):
    if (run_date.weekday() == 0):
        r_day = "Mon"
        r_type = "Gym"
    elif (run_date.weekday() == 1):
        r_day = "Tue"
        r_type = "Intervals"
    elif (run_date.weekday() == 2):
        r_day = "Wed"
        r_type = "rest"
    elif (run_date.weekday() == 3):
        r_day = "Thur"
        r_type = "Intervals"
    elif (run_date.weekday() == 4):
        r_day = "Fri"
        r_type = "Gym"
    elif (run_date.weekday() == 5):
        r_day = "Sat"
        r_type = "rest"
    elif (run_date.weekday() == 6):
        r_day = "Sun"
        r_type = "Long_run"
    return ({"day" : r_day, "r_type": r_type})


In [None]:
def get_dist_by_type(r_type, week_dist, long_dist):
    if r_type == "Long_run": 
        r_dist = long_dist
    elif r_type == "Intervals":
        r_dist = (week_dist - long_dist)/2
    else: r_dist = 0
    return(r_dist)

In [None]:
last_date = running_df.loc[running_df.Date.idxmax(),"Date"]
second_last_date = last_date - timedelta(days = 7)
third_last_date = second_last_date - timedelta(days = 7)
fourth_last_date = third_last_date - timedelta(days = 7)
last_week_rows = running_df[(running_df["Date"]>=second_last_date) & (running_df["Date"]<=last_date)]
second_week_rows = running_df[(running_df["Date"]<=second_last_date) & (running_df["Date"]>=third_last_date)]
third_week_rows = running_df[(running_df["Date"]>=fourth_last_date) & (running_df["Date"]<=third_last_date)]
last_dist = last_week_rows.Distance.sum()
second_tot = second_week_rows.Distance.sum()
third_tot = third_week_rows.Distance.sum()
last_month_date = last_date - timedelta(days = 30)
last_month_rows = last_week_rows = running_df[(running_df["Date"]>=last_month_date) & (running_df["Date"]<=last_date)]
print(last_dist, second_tot, third_tot)

50.2 42.43000000000001 28.95


In [None]:
def reset_date_next_mon(date):
    day = date.weekday()
    day_week = day["day"]
    if day_week == "Mon":
        return(date)
    elif day_week == "Tue": return(date+timedelta(days=6))
    elif day_week == "Wed": return(date+timedelta(days=5))
    elif day_week == "Thur": return(date+timedelta(days=4))
    elif day_week == "Fri": return(date+timedelta(days=3))
    elif day_week == "Sat": return(date+timedelta(days=2))
    elif day_week == "Sun": return(date+timedelta(days=1))
    return

In [None]:
init_week_dist = 1.1*(third_tot + second_tot + last_dist)/3
init_long_dist = last_month_rows.Distance.max()
init_date = last_date + timedelta(days = 1)
check_day = get_rtype_by_weekday(init_date)
init_date = reset_date_next_mon(init_date)
r_type = check_day["r_type"]
r_dist = get_dist_by_type(r_type,init_week_dist, init_long_dist)
init_week_num = 0
init_dict = {
    "Week_No": init_week_num,
    "Day": check_day["day"],
    "Date": init_date,
    "Week_tot": init_week_dist,
    "Run_type": r_type,
    "Distance": r_dist,
    "Down_week": "N",
    "Percent_week_tot": 100*r_dist/init_week_dist
}
init_row = pd.DataFrame(index = [0],data=init_dict)

TypeError: 'int' object is not subscriptable

In [None]:
tot_w = 52
new_week_dist = init_week_dist
long_dist = init_long_dist
new_date = init_date
week_no = init_week_num
down_week_count = 1
DW = "N"
plan_df = pd.DataFrame(columns = ["Week_No", "Day","Date", "Distance", "Run_type", "Down_week", "Week_tot", "Percent_week_tot"])
plan_df = pd.concat([plan_df])
week_day_bound = 12
long_run_bound = 35
week_bound = 100
for nd in range(tot_w):
    check_day = get_rtype_by_weekday(new_date)
    # Get the weekly totalts
    down_week_count +=1
    down_week_count = down_week_count%4
    if down_week_count == 0:
        mul = 0.9
        DW = "Y"
    else:
        mul = 1.1
        DW = "N"
    if new_week_dist * mul >= week_bound:
        new_week_dist = week_bound
        week_bound *= 1.1
        long_run_bound *= 1.1
        week_day_bound *= 1.1
        mul = 1.0
    new_week_dist *= mul
    long_dist *= mul
    # Set default distances for the overflow days
    mon_dist = 0
    wed_dist = 0
    fri_dist = 0
    sat_dist = 0
    # Add Sun
    sun_date = new_date +timedelta(days=6)
    if long_dist > min(0.5*new_week_dist, long_run_bound):
        sun_dist = min(0.5*new_week_dist, long_run_bound)
        extra_dist = long_dist - sun_dist
        if extra_dist <= week_day_bound:
            sat_dist = extra_dist
        elif extra_dist > week_day_bound and extra_dist <= 2*week_day_bound:
            wed_dist += 0.5*extra_dist
            sat_dist += 0.5*extra_dist
        elif extra_dist >= 2*week_day_bound and extra_dist < 3*week_day_bound:
            wed_dist += extra_dist/3
            fri_dist += extra_dist/3
            sat_dist += extra_dist/3
        elif extra_dist >= 3*week_day_bound and extra_dist < 4*week_day_bound:
            mon_dist += extra_dist/4
            wed_dist += extra_dist/4
            fri_dist += extra_dist/4
            sat_dist += extra_dist/4
        else:
            mon_dist += week_day_bound
            wed_dist += week_day_bound
            fri_dist += week_day_bound
            sat_dist += week_day_bound
    else: sun_dist = long_dist
    # Set the Workout distances 
    # - Tues
    tue_date = new_date + timedelta(days=1)
    tue_dist = 0.5*(new_week_dist - sun_dist)
    if tue_dist > week_day_bound:
        extra_dist_t = tue_dist - week_day_bound
        mon_dist += min(extra_dist_t, week_day_bound)
        tue_dist = week_day_bound
    # - Thur
    thur_date = new_date +timedelta(days=3)
    thur_dist = 0.5*(new_week_dist - sun_dist)
    if thur_dist > week_day_bound:
        extra_dist_th = thur_dist - week_day_bound
        fri_dist += min(extra_dist_th, week_day_bound)
        thur_dist = week_day_bound
    # - Cap the overflow distances
    mon_dist = min(mon_dist, week_day_bound)
    wed_dist = min(wed_dist, week_day_bound)
    fri_dist = min(fri_dist, week_day_bound)
    sat_dist = min(sat_dist, week_day_bound)
    # - Set the overflow dates
    wed_date = new_date +timedelta(days=2)
    fri_date = new_date +timedelta(days=4)
    sat_date = new_date +timedelta(days=5)
    new_week_dist = sum([mon_dist, tue_dist, wed_dist, thur_dist, fri_dist, sat_dist, sun_dist])
    mon_dict =  {
        "Week_No": week_no,
        "Day": "Mon",
        "Date": new_date,
        "Week_tot": new_week_dist,
        "Run_type": "Top-up/ rest",
        "Distance": mon_dist,
        "Down_week": DW,
        "Percent_week_tot": 100*mon_dist/new_week_dist
        }
    tue_dict =  {
        "Week_No": week_no,
        "Day": "Tue",
        "Date": tue_date,
        "Week_tot": new_week_dist,
        "Run_type": "Workout",
        "Distance": tue_dist,
        "Down_week": DW,
        "Percent_week_tot": 100*tue_dist/new_week_dist
        }
    wed_dict =  {
        "Week_No": week_no,
        "Day": "Wed",
        "Date": wed_date,
        "Week_tot": new_week_dist,
        "Run_type": "Top-up/ rest",
        "Distance": wed_dist,
        "Down_week": DW,
        "Percent_week_tot": 100*wed_dist/new_week_dist
        }
    thur_dict =  {
        "Week_No": week_no,
        "Day": "Thur",
        "Date": thur_date,
        "Week_tot": new_week_dist,
        "Run_type": "Workout",
        "Distance": thur_dist,
        "Down_week": DW,
        "Percent_week_tot": 100*thur_dist/new_week_dist
        }
    fri_dict =  {
        "Week_No": week_no,
        "Day": "Fri",
        "Date": fri_date,
        "Week_tot": new_week_dist,
        "Run_type": "Top-up/ rest",
        "Distance": fri_dist,
        "Down_week": DW,
        "Percent_week_tot": 100*fri_dist/new_week_dist
        }
    sat_dict =  {
        "Week_No": week_no,
        "Day": "Sat",
        "Date": sat_date,
        "Week_tot": new_week_dist,
        "Run_type": "Top-up/ rest",
        "Distance": sat_dist,
        "Down_week": DW,
        "Percent_week_tot": 100*sat_dist/new_week_dist
        }
    sun_dict =  {
        "Week_No": week_no,
        "Day": "Sun",
        "Date": sun_date,
        "Week_tot": new_week_dist,
        "Run_type": "Long_run",
        "Distance": sun_dist,
        "Down_week": DW,
        "Percent_week_tot": 100*sun_dist/new_week_dist
        }
    new_row = pd.DataFrame(index = [nd],data= mon_dict)
    plan_df = pd.concat([plan_df, new_row])
    new_row = pd.DataFrame(index = [nd+1],data= tue_dict)
    plan_df = pd.concat([plan_df, new_row])
    new_row = pd.DataFrame(index = [nd+2],data= wed_dict)
    plan_df = pd.concat([plan_df, new_row])
    new_row = pd.DataFrame(index = [nd+3],data= thur_dict)
    plan_df = pd.concat([plan_df, new_row])
    new_row = pd.DataFrame(index = [nd+4],data= fri_dict)
    plan_df = pd.concat([plan_df, new_row])
    new_row = pd.DataFrame(index = [nd+5],data= sat_dict)
    plan_df = pd.concat([plan_df, new_row])
    new_row = pd.DataFrame(index = [nd+6],data= sun_dict)
    plan_df = pd.concat([plan_df, new_row])
    nd +=7
    week_no+=1
    new_date +=timedelta(days = 7)
plan_df
    
    

Unnamed: 0,Week_No,Day,Date,Distance,Run_type,Down_week,Week_tot,Percent_week_tot
0,0,Mon,2024-05-20 17:09:53,3.663633,Top-up/ rest,N,49.037267,7.471121
1,0,Tue,2024-05-21 17:09:53,12.000000,Workout,N,49.037267,24.471185
2,0,Wed,2024-05-22 17:09:53,0.000000,Top-up/ rest,N,49.037267,0.000000
3,0,Thur,2024-05-23 17:09:53,12.000000,Workout,N,49.037267,24.471185
4,0,Fri,2024-05-24 17:09:53,3.663633,Top-up/ rest,N,49.037267,7.471121
...,...,...,...,...,...,...,...,...
53,51,Wed,2025-05-14 17:09:53,0.000000,Top-up/ rest,N,179.944525,0.000000
54,51,Thur,2025-05-15 17:09:53,23.384605,Workout,N,179.944525,12.995452
55,51,Fri,2025-05-16 17:09:53,23.384605,Top-up/ rest,N,179.944525,12.995452
56,51,Sat,2025-05-17 17:09:53,18.201006,Top-up/ rest,N,179.944525,10.114787


In [None]:
weeks_df = pd.DataFrame(columns = [
    "Dates", "Mon", "Tue", "Wed", "Thur", "Fri", "Sat", "Sun",
    "Week_tot", "Percent_week_tot", "Down_week"])
week_nums = plan_df.Week_No.unique()

for wn in week_nums:
    per_week_tot = plan_df.loc[(plan_df.Week_No == wn) & (plan_df.Day == "Sun"),"Percent_week_tot"].sum()
    week_tot = plan_df.loc[(plan_df.Week_No == wn) & (plan_df.Day == "Sun"),"Week_tot"].sum()
    dw = plan_df.loc[(plan_df.Week_No == wn) & (plan_df.Day == "Sun"),"Down_week"].sum()
    week_date_st = plan_df.loc[(plan_df.Week_No == wn) & (plan_df.Day == "Mon")]["Date"].item().date()
    week_date_end = plan_df.loc[(plan_df.Week_No == wn) & (plan_df.Day == "Sun")]["Date"].item().date()
    week_date = str(week_date_st) + " -- " + str(week_date_end)
    new_dict = {
        "Dates": week_date,
        "Mon" : plan_df.loc[(plan_df.Week_No == wn) & (plan_df.Day == "Mon"),"Distance"].sum(),
        "Tue" : plan_df.loc[(plan_df.Week_No == wn) & (plan_df.Day == "Tue"),"Distance"].sum(),
        "Wed" : plan_df.loc[(plan_df.Week_No == wn) & (plan_df.Day == "Wed"),"Distance"].sum(),
        "Thur" : plan_df.loc[(plan_df.Week_No == wn) & (plan_df.Day == "Thur"),"Distance"].sum(),
        "Fri" : plan_df.loc[(plan_df.Week_No == wn) & (plan_df.Day == "Fri"),"Distance"].sum(),
        "Sat" : plan_df.loc[(plan_df.Week_No == wn) & (plan_df.Day == "Sat"),"Distance"].sum(),
        "Sun" : plan_df.loc[(plan_df.Week_No == wn) & (plan_df.Day == "Sun"),"Distance"].sum(),
        "Week_tot" : week_tot,
        "Percent_week_tot" : per_week_tot,
        "Down_week" : dw
    }
    new_row = pd.DataFrame(index = [wn], data= new_dict)
    weeks_df = pd.concat([weeks_df, new_row])
weeks_df

Unnamed: 0,Dates,Mon,Tue,Wed,Thur,Fri,Sat,Sun,Week_tot,Percent_week_tot,Down_week
0,2024-05-20 -- 2024-05-26,3.663633,12.0,0.0,12.0,3.663633,0.0,17.71,49.037267,36.11539,N
1,2024-05-27 -- 2024-06-02,5.229997,12.0,0.0,12.0,5.229997,0.0,19.481,53.940993,36.11539,N
2,2024-06-03 -- 2024-06-09,3.506997,12.0,0.0,12.0,3.506997,0.0,17.5329,48.546894,36.11539,Y
3,2024-06-10 -- 2024-06-16,5.057697,12.0,0.0,12.0,5.057697,0.0,19.28619,53.401583,36.11539,N
4,2024-06-17 -- 2024-06-23,6.763466,12.0,0.0,12.0,6.763466,0.0,21.214809,58.741742,36.11539,N
5,2024-06-24 -- 2024-06-30,8.639813,12.0,0.0,12.0,8.639813,0.0,23.33629,64.615916,36.11539,N
6,2024-07-01 -- 2024-07-07,6.575832,12.0,0.0,12.0,6.575832,0.0,21.002661,58.154324,36.11539,Y
7,2024-07-08 -- 2024-07-14,8.433415,12.0,0.0,12.0,8.433415,0.0,23.102927,63.969757,36.11539,N
8,2024-07-15 -- 2024-07-21,10.476756,12.0,0.0,12.0,10.476756,0.0,25.41322,70.366732,36.11539,N
9,2024-07-22 -- 2024-07-28,12.0,12.0,0.0,12.0,12.0,0.0,27.954542,75.954542,36.804306,N


In [None]:
thresh_dates = plan_df.loc[plan_df.Week_tot > 100, "Date"]
indices = thresh_dates.index
mara_date = thresh_dates[indices[0]]
print("Marathon ready by "+ str(mara_date))
print("Earliest race "+ str(mara_date + timedelta(weeks = 3)))

Marathon ready by 2024-11-04 17:09:53
Earliest race 2024-11-25 17:09:53


In [None]:
dt.date(2023, 5, 21) + timedelta(weeks=45 + 3)

datetime.date(2024, 4, 21)

In [None]:

activities_df["Date"] = pd.to_datetime(activities_df["Date"], dayfirst= True)
activities_df.head()

Unnamed: 0,Activity Type,Date,Favorite,Title,Distance,Calories,Time,Avg HR,Max HR,Aerobic TE,...,Max Resp,Stress Change,Stress Start,Stress End,Avg Stress,Max Stress,Moving Time,Elapsed Time,Min Elevation,Max Elevation
0,Indoor Cycling,2024-07-17 20:59:00,False,Indoor Cycling,0.0,58,00:20:06,88,124,0.2,...,17,--,--,--,--,--,00:00:00,00:20:06,--,--
1,Indoor Cycling,2024-07-17 17:59:00,False,Indoor Cycling,0.0,427,00:45:58,145,182,3.3,...,43,--,--,--,--,--,00:00:00,00:45:58,--,--
2,Cycling,2024-07-17 17:32:00,False,Bath and North East Somerset Cycling,4.75,131,00:20:13,117,145,1.1,...,--,--,--,--,--,--,00:18:22,00:20:13,25,51
3,Cycling,2024-07-17 11:34:00,False,Bath and North East Somerset Cycling,4.12,103,00:18:21,108,141,0.5,...,--,--,--,--,--,--,00:17:31,03:00:05,26,53
4,Running,2024-07-16 18:37:00,False,Bath and North East Somerset Running,4.75,287,00:37:30,138,169,2.3,...,34,--,--,--,--,--,00:30:21,00:39:24,23,50


In [None]:
activities_df.columns

Index(['Activity Type', 'Date', 'Favorite', 'Title', 'Distance', 'Calories',
       'Time', 'Avg HR', 'Max HR', 'Aerobic TE', 'Avg Bike Cadence',
       'Max Bike Cadence', 'Avg Speed', 'Max Speed', 'Total Ascent',
       'Total Descent', 'Avg Stride Length', 'Avg Vertical Ratio',
       'Avg Vertical Oscillation', 'Avg Ground Contact Time',
       'Avg GCT Balance', 'Avg Bike Cadence.1', 'Max Bike Cadence.1',
       'Normalized Power® (NP®)', 'Training Stress Score®',
       'Max Avg Power (20 min)', 'Avg Power', 'Max Power', 'Grit', 'Flow',
       'Avg. Swolf', 'Avg Stroke Rate', 'Total Reps', 'Total Sets', 'Min Temp',
       'Decompression', 'Best Lap Time', 'Number of Laps', 'Max Temp',
       'Avg Resp', 'Min Resp', 'Max Resp', 'Stress Change', 'Stress Start',
       'Stress End', 'Avg Stress', 'Max Stress', 'Moving Time', 'Elapsed Time',
       'Min Elevation', 'Max Elevation'],
      dtype='object')

## Get the running data
### Extract data from the strings.

In [None]:
running_df = activities_df[activities_df["Activity Type"] == "Running"]
running_df.rename(columns = {"Time": "Duration"}, inplace= True)
running_df["Rolling_distance_tot"] = None
running_df["Percent_distance_tot"] = None
running_df["Distance_Percent_increase"] = None
running_df["Period"] = None
running_df["Success_status"] = None
running_df["Long_run_percent"] = None
running_df["Distance"] = pd.to_numeric(running_df["Distance"])
running_df[['L_GCT Balance', 'R_GCT Balance']] = running_df["Avg GCT Balance"].str.split(" / ", expand= True)
running_df.loc[:,"L_GCT Balance"] = running_df.loc[:,"L_GCT Balance"].str.replace(r'\D', '', regex=True)
running_df.loc[:, "R_GCT Balance"] = running_df.loc[:,"L_GCT Balance"].str.replace(r'\D', '', regex=True)
col_list = ['Avg Stride Length', 'Avg Vertical Ratio', 'Avg Vertical Oscillation', 'Avg Run Cadence', 
            'Avg Ground Contact Time', "L_GCT Balance", "R_GCT Balance"]
for col in col_list:
    col_df = running_df[col]
    running_df[col] = pd.to_numeric(col_df, errors='coerce')
running_df["L_GCT Balance"] = running_df["L_GCT Balance"] / 1000
running_df["R_GCT Balance"] = running_df["R_GCT Balance"] / 1000
running_df["Date"] = pd.to_datetime(running_df["Date"], dayfirst=True, errors = "coerce")
running_df["Date"].dtype
#running_df.columns

A value is trying to be set on a copy of a slice from a DataFrame

See the caveats in the documentation: https://pandas.pydata.org/pandas-docs/stable/user_guide/indexing.html#returning-a-view-versus-a-copy
  running_df.rename(columns = {"Time": "Duration"}, inplace= True)
A value is trying to be set on a copy of a slice from a DataFrame.
Try using .loc[row_indexer,col_indexer] = value instead

See the caveats in the documentation: https://pandas.pydata.org/pandas-docs/stable/user_guide/indexing.html#returning-a-view-versus-a-copy
  running_df["Rolling_distance_tot"] = None
A value is trying to be set on a copy of a slice from a DataFrame.
Try using .loc[row_indexer,col_indexer] = value instead

See the caveats in the documentation: https://pandas.pydata.org/pandas-docs/stable/user_guide/indexing.html#returning-a-view-versus-a-copy
  running_df["Percent_distance_tot"] = None
A value is trying to be set on a copy of a slice from a DataFrame.
Try using .loc[row_indexer,col_indexer] = valu

KeyError: 'Avg Run Cadence'

In [None]:
display(Markdown("## Calculate the rolling totals and percentages."))
first_date = df.Date.min()
print(first_date)
for ind in running_df.index:
    row = running_df.loc[ind]
    row_date = row["Date"]
    start_date = row_date - timedelta(days = 7)
    before_start_date = start_date - timedelta(days = 7)
    # Get the activities from the last 7 days
    week_rows = running_df[(running_df["Date"]>=start_date) & (running_df["Date"]<=row_date)]
    running_df.loc[ind,"Rolling_distance_tot"] = week_rows.Distance.sum()
    running_df.loc[ind,"Percent_distance_tot"] = running_df.loc[ind,"Distance"] / running_df.loc[ind,"Rolling_distance_tot"] 
    long_dist = running_df.loc[week_rows.Distance.idxmax(), "Distance"]
    tot_dist = running_df.loc[week_rows.Distance.idxmax(), "Rolling_distance_tot"]
    if (long_dist is not None and tot_dist is not None):
        running_df.loc[ind,"Long_run_percent"] = running_df.loc[week_rows.Distance.idxmax(), "Distance"] / running_df.loc[week_rows.Distance.idxmax(), "Rolling_distance_tot"]
    # Get the activities from the 7 days before the last
    week_before_rows = running_df[(running_df["Date"]>=before_start_date) & (running_df["Date"]<=start_date)]
    before_tot = week_before_rows.Distance.sum()
    if before_tot > 10:
        running_df.loc[ind,"Distance_percent_increase"] = (running_df.loc[ind,"Rolling_distance_tot"]-before_tot) / before_tot
    # Add the Period and Success status data
    #if row_date >= first_date:
    #    df_row = df[(row_date.date()<df.Date.dt.date) and (df.Date.dt.date < row_date.date()+timedelta(days=1))]
    #    print(row_date.date()<df.Date.dt.date < row_date.date()+timedelta(days=1))
    #    print(df_row)
    #    ind_2 = df_row.index[0]
    #    running_df.loc[ind, "Period"] = df_row.loc[ind_2,"Period"]
    #    running_df.loc[ind,"Success_status"] = df_row.loc[ind_2,"Success_status"]
running_df.head()
    

## Calculate the rolling totals and percentages.

2023-11-01 00:00:00


  running_df.loc[ind,"Percent_distance_tot"] = running_df.loc[ind,"Distance"] / running_df.loc[ind,"Rolling_distance_tot"]
  running_df.loc[ind,"Long_run_percent"] = running_df.loc[week_rows.Distance.idxmax(), "Distance"] / running_df.loc[week_rows.Distance.idxmax(), "Rolling_distance_tot"]


Unnamed: 0,Activity Type,Date,Favorite,Title,Distance,Calories,Duration,Avg HR,Max HR,Aerobic TE,...,Max Elevation,Rolling_distance_tot,Percent_distance_tot,Distance_Percent_increase,Period,Success_status,Long_run_percent,L_GCT Balance,R_GCT Balance,Distance_percent_increase
0,Running,2024-05-19 17:09:53,False,Bath and North East Somerset Running,3.19,190,00:20:02,154,164,2.2,...,33,50.2,0.063546,,,,,0.523,0.523,0.183125
1,Running,2024-05-19 16:30:37,False,Bath and North East Somerset Running,1.21,73,00:07:18.9,150,168,1.6,...,38,47.01,0.025739,,,,,0.514,0.514,0.107942
2,Running,2024-05-19 15:17:30,False,Wiltshire Running,1.7,94,00:10:29,147,164,1.5,...,202,45.8,0.037118,,,,,0.504,0.504,0.079425
3,Running,2024-05-19 14:30:55,False,Bath and North East Somerset Running,1.33,133,00:12:01,176,187,2.3,...,204,44.1,0.030159,,,,,0.503,0.503,0.039359
4,Running,2024-05-19 13:45:01,False,Bath and North East Somerset Running,1.21,73,00:07:33.7,152,162,1.3,...,38,42.77,0.028291,,,,,0.512,0.512,0.008013


In [None]:
display(Markdown("Function - scatter plot without perid data but with upper and lower bounds."))
def scatter_plot_no_p(data_frame, col1, col2, low = 0, high = 1.0):
    # Plot the points labelled Success first then others
    data_crop = data_frame[[col1, col2]].dropna(how = "any")
    data_crop["Band"] = None
    data_crop.loc[data_crop[col2]< low, "Band"] = "low"
    data_crop.loc[data_crop[col2]> high, "Band"] = "high"
    data_crop.loc[(data_crop[col2]>=low) & (data_crop[col2]<= high), "Band"] = "Mid"
    if col1 == "Date": xvar = col1+":T"
    else: xvar = col1+":Q"
    points = alt.Chart(data_crop).mark_square(size=20).encode(
            x=xvar,
            y=alt.Y(col2+":Q").scale(zero=False),
            color = 'Band'+":N",
            tooltip = [col1, col2]
        ).properties(
    width=800,
    height=300
).interactive()
    line = alt.Chart(data_crop).mark_line(
            color='blue',
            size=1
        ).transform_window(
            rolling_mean='mean('+col2+')',
            frame=[-5, 5]
        ).encode(
        x = xvar,
        y = alt.Y('rolling_mean:Q').scale(zero=False)
    ).properties(
    width=800,
    height=300
)
    return(points+line)


Function - scatter plot without perid data but with upper and lower bounds.

In [None]:
display(Markdown("## Plot the distances and percentages"))
upper_bound = 0.33
lower_bound = 0.1
date_percent_dist = scatter_plot_no_p(running_df, "Date", "Percent_distance_tot", lower_bound, upper_bound)
date_rolling_dist = scatter_plot_no_p(running_df, "Date", "Rolling_distance_tot", 30, 80)
date_dist_incre = scatter_plot_no_p(running_df, "Date", "Distance_percent_increase", 0, 0.1)
date_long_per = scatter_plot_no_p(running_df, "Date", "Long_run_percent", 0.15, 0.5)
date_percent_dist_period = scatter_plot(running_df, "Date", "Percent_distance_tot")
date_rolling_dist_period = scatter_plot(running_df, "Date", "Rolling_distance_tot")
date_dist_incre_period = scatter_plot(running_df, "Date", "Distance_percent_increase")
date_long_per_period = scatter_plot(running_df, "Date", "Long_run_percent")

## Plot the distances and percentages

In [None]:
(date_percent_dist) & (date_rolling_dist) & (date_dist_incre) & (date_long_per)

In [None]:
display(Markdown("### Plots of distances and percentages tracked by period"))
(date_percent_dist_period) & (date_rolling_dist_period) & (date_dist_incre_period) & (date_long_per_period)

### Plots of distances and percentages tracked by period



In [None]:
date_stride_leng = scatter_plot_no_p(running_df, "Date", 'Avg Stride Length')
date_vert_rat = scatter_plot_no_p(running_df, "Date", 'Avg Vertical Ratio')
date_cadence = scatter_plot_no_p(running_df, "Date", 'Avg Run Cadence')
date_vert_osc = scatter_plot_no_p(running_df, "Date", 'Avg Vertical Oscillation')
date_grd_cont = scatter_plot_no_p(running_df, "Date", 'Avg Ground Contact Time')
date_l_balance = scatter_plot_no_p(running_df, "Date","L_GCT Balance",0.45, 0.55)
date_r_balance = scatter_plot_no_p(running_df, "Date","R_GCT Balance", 0.45, 0.55)

(date_stride_leng | date_vert_rat) & (date_cadence | date_vert_osc) & (date_grd_cont ) & (date_l_balance | date_r_balance)

In [None]:
def get_rtype_by_weekday(run_date):
    if (run_date.weekday() == 0):
        r_day = "Mon"
        r_type = "Gym"
    elif (run_date.weekday() == 1):
        r_day = "Tue"
        r_type = "Intervals"
    elif (run_date.weekday() == 2):
        r_day = "Wed"
        r_type = "rest"
    elif (run_date.weekday() == 3):
        r_day = "Thur"
        r_type = "Intervals"
    elif (run_date.weekday() == 4):
        r_day = "Fri"
        r_type = "Gym"
    elif (run_date.weekday() == 5):
        r_day = "Sat"
        r_type = "rest"
    elif (run_date.weekday() == 6):
        r_day = "Sun"
        r_type = "Long_run"
    return ({"day" : r_day, "r_type": r_type})


In [None]:
def get_dist_by_type(r_type, week_dist, long_dist):
    if r_type == "Long_run": 
        r_dist = long_dist
    elif r_type == "Intervals":
        r_dist = (week_dist - long_dist)/2
    else: r_dist = 0
    return(r_dist)

In [None]:
last_date = running_df.loc[running_df.Date.idxmax(),"Date"]
second_last_date = last_date - timedelta(days = 7)
third_last_date = second_last_date - timedelta(days = 7)
fourth_last_date = third_last_date - timedelta(days = 7)
last_week_rows = running_df[(running_df["Date"]>=second_last_date) & (running_df["Date"]<=last_date)]
second_week_rows = running_df[(running_df["Date"]<=second_last_date) & (running_df["Date"]>=third_last_date)]
third_week_rows = running_df[(running_df["Date"]>=fourth_last_date) & (running_df["Date"]<=third_last_date)]
last_dist = last_week_rows.Distance.sum()
second_tot = second_week_rows.Distance.sum()
third_tot = third_week_rows.Distance.sum()
last_month_date = last_date - timedelta(days = 30)
last_month_rows = last_week_rows = running_df[(running_df["Date"]>=last_month_date) & (running_df["Date"]<=last_date)]
print(last_dist, second_tot, third_tot)

27.58 34.56 22.97


In [None]:
def reset_date_next_mon(date):
    day = date.weekday()
    day_week = weekday(day)
    if day_week == "Mon":
        return(date)
    elif day_week == "Tue": return(date+timedelta(days=6))
    elif day_week == "Wed": return(date+timedelta(days=5))
    elif day_week == "Thur": return(date+timedelta(days=4))
    elif day_week == "Fri": return(date+timedelta(days=3))
    elif day_week == "Sat": return(date+timedelta(days=2))
    elif day_week == "Sun": return(date+timedelta(days=1))
    return

In [None]:
init_week_dist = 1.1*(third_tot + second_tot + last_dist)/3
init_long_dist = last_month_rows.Distance.max()
init_date = last_date + timedelta(days = 1)
check_day = get_rtype_by_weekday(init_date)
init_date = reset_date_next_mon(init_date)
r_type = check_day["r_type"]
r_dist = get_dist_by_type(r_type,init_week_dist, init_long_dist)
init_week_num = 0
init_dict = {
    "Week_No": init_week_num,
    "Day": check_day["day"],
    "Date": init_date,
    "Week_tot": init_week_dist,
    "Run_type": r_type,
    "Distance": r_dist,
    "Down_week": "N",
    "Percent_week_tot": 100*r_dist/init_week_dist
}
init_row = pd.DataFrame(index = [0],data=init_dict)

NameError: name 'get_rtype_by_weekday' is not defined

In [None]:
tot_w = 52
new_week_dist = init_week_dist
long_dist = init_long_dist
new_date = init_date
week_no = init_week_num
down_week_count = 1
DW = "N"
plan_df = pd.DataFrame(columns = ["Week_No", "Day","Date", "Distance", "Run_type", "Down_week", "Week_tot", "Percent_week_tot"])
plan_df = pd.concat([plan_df])
week_day_bound = 12
long_run_bound = 35
week_bound = 100
for nd in range(tot_w):
    check_day = get_rtype_by_weekday(new_date)
    # Get the weekly totalts
    down_week_count +=1
    down_week_count = down_week_count%4
    if down_week_count == 0:
        mul = 0.9
        DW = "Y"
    else:
        mul = 1.1
        DW = "N"
    if new_week_dist * mul >= week_bound:
        new_week_dist = week_bound
        week_bound *= 1.1
        long_run_bound *= 1.1
        week_day_bound *= 1.1
        mul = 1.0
    new_week_dist *= mul
    long_dist *= mul
    # Set default distances for the overflow days
    mon_dist = 0
    wed_dist = 0
    fri_dist = 0
    sat_dist = 0
    # Add Sun
    sun_date = new_date +timedelta(days=6)
    if long_dist > min(0.5*new_week_dist, long_run_bound):
        sun_dist = min(0.5*new_week_dist, long_run_bound)
        extra_dist = long_dist - sun_dist
        if extra_dist <= week_day_bound:
            sat_dist = extra_dist
        elif extra_dist > week_day_bound and extra_dist <= 2*week_day_bound:
            wed_dist += 0.5*extra_dist
            sat_dist += 0.5*extra_dist
        elif extra_dist >= 2*week_day_bound and extra_dist < 3*week_day_bound:
            wed_dist += extra_dist/3
            fri_dist += extra_dist/3
            sat_dist += extra_dist/3
        elif extra_dist >= 3*week_day_bound and extra_dist < 4*week_day_bound:
            mon_dist += extra_dist/4
            wed_dist += extra_dist/4
            fri_dist += extra_dist/4
            sat_dist += extra_dist/4
        else:
            mon_dist += week_day_bound
            wed_dist += week_day_bound
            fri_dist += week_day_bound
            sat_dist += week_day_bound
    else: sun_dist = long_dist
    # Set the Workout distances 
    # - Tues
    tue_date = new_date + timedelta(days=1)
    tue_dist = 0.5*(new_week_dist - sun_dist)
    if tue_dist > week_day_bound:
        extra_dist_t = tue_dist - week_day_bound
        mon_dist += min(extra_dist_t, week_day_bound)
        tue_dist = week_day_bound
    # - Thur
    thur_date = new_date +timedelta(days=3)
    thur_dist = 0.5*(new_week_dist - sun_dist)
    if thur_dist > week_day_bound:
        extra_dist_th = thur_dist - week_day_bound
        fri_dist += min(extra_dist_th, week_day_bound)
        thur_dist = week_day_bound
    # - Cap the overflow distances
    mon_dist = min(mon_dist, week_day_bound)
    wed_dist = min(wed_dist, week_day_bound)
    fri_dist = min(fri_dist, week_day_bound)
    sat_dist = min(sat_dist, week_day_bound)
    # - Set the overflow dates
    wed_date = new_date +timedelta(days=2)
    fri_date = new_date +timedelta(days=4)
    sat_date = new_date +timedelta(days=5)
    new_week_dist = sum([mon_dist, tue_dist, wed_dist, thur_dist, fri_dist, sat_dist, sun_dist])
    mon_dict =  {
        "Week_No": week_no,
        "Day": "Mon",
        "Date": new_date,
        "Week_tot": new_week_dist,
        "Run_type": "Top-up/ rest",
        "Distance": mon_dist,
        "Down_week": DW,
        "Percent_week_tot": 100*mon_dist/new_week_dist
        }
    tue_dict =  {
        "Week_No": week_no,
        "Day": "Tue",
        "Date": tue_date,
        "Week_tot": new_week_dist,
        "Run_type": "Workout",
        "Distance": tue_dist,
        "Down_week": DW,
        "Percent_week_tot": 100*tue_dist/new_week_dist
        }
    wed_dict =  {
        "Week_No": week_no,
        "Day": "Wed",
        "Date": wed_date,
        "Week_tot": new_week_dist,
        "Run_type": "Top-up/ rest",
        "Distance": wed_dist,
        "Down_week": DW,
        "Percent_week_tot": 100*wed_dist/new_week_dist
        }
    thur_dict =  {
        "Week_No": week_no,
        "Day": "Thur",
        "Date": thur_date,
        "Week_tot": new_week_dist,
        "Run_type": "Workout",
        "Distance": thur_dist,
        "Down_week": DW,
        "Percent_week_tot": 100*thur_dist/new_week_dist
        }
    fri_dict =  {
        "Week_No": week_no,
        "Day": "Fri",
        "Date": fri_date,
        "Week_tot": new_week_dist,
        "Run_type": "Top-up/ rest",
        "Distance": fri_dist,
        "Down_week": DW,
        "Percent_week_tot": 100*fri_dist/new_week_dist
        }
    sat_dict =  {
        "Week_No": week_no,
        "Day": "Sat",
        "Date": sat_date,
        "Week_tot": new_week_dist,
        "Run_type": "Top-up/ rest",
        "Distance": sat_dist,
        "Down_week": DW,
        "Percent_week_tot": 100*sat_dist/new_week_dist
        }
    sun_dict =  {
        "Week_No": week_no,
        "Day": "Sun",
        "Date": sun_date,
        "Week_tot": new_week_dist,
        "Run_type": "Long_run",
        "Distance": sun_dist,
        "Down_week": DW,
        "Percent_week_tot": 100*sun_dist/new_week_dist
        }
    new_row = pd.DataFrame(index = [nd],data= mon_dict)
    plan_df = pd.concat([plan_df, new_row])
    new_row = pd.DataFrame(index = [nd+1],data= tue_dict)
    plan_df = pd.concat([plan_df, new_row])
    new_row = pd.DataFrame(index = [nd+2],data= wed_dict)
    plan_df = pd.concat([plan_df, new_row])
    new_row = pd.DataFrame(index = [nd+3],data= thur_dict)
    plan_df = pd.concat([plan_df, new_row])
    new_row = pd.DataFrame(index = [nd+4],data= fri_dict)
    plan_df = pd.concat([plan_df, new_row])
    new_row = pd.DataFrame(index = [nd+5],data= sat_dict)
    plan_df = pd.concat([plan_df, new_row])
    new_row = pd.DataFrame(index = [nd+6],data= sun_dict)
    plan_df = pd.concat([plan_df, new_row])
    nd +=7
    week_no+=1
    new_date +=timedelta(days = 7)
plan_df
    
    

NameError: name 'init_week_dist' is not defined

In [None]:
weeks_df = pd.DataFrame(columns = [
    "Dates", "Mon", "Tue", "Wed", "Thur", "Fri", "Sat", "Sun",
    "Week_tot", "Percent_week_tot", "Down_week"])
week_nums = plan_df.Week_No.unique()

for wn in week_nums:
    per_week_tot = plan_df.loc[(plan_df.Week_No == wn) & (plan_df.Day == "Sun"),"Percent_week_tot"].sum()
    week_tot = plan_df.loc[(plan_df.Week_No == wn) & (plan_df.Day == "Sun"),"Week_tot"].sum()
    dw = plan_df.loc[(plan_df.Week_No == wn) & (plan_df.Day == "Sun"),"Down_week"].sum()
    week_date_st = plan_df.loc[(plan_df.Week_No == wn) & (plan_df.Day == "Mon")]["Date"].item().date()
    week_date_end = plan_df.loc[(plan_df.Week_No == wn) & (plan_df.Day == "Sun")]["Date"].item().date()
    week_date = str(week_date_st) + " -- " + str(week_date_end)
    new_dict = {
        "Dates": week_date,
        "Mon" : plan_df.loc[(plan_df.Week_No == wn) & (plan_df.Day == "Mon"),"Distance"].sum(),
        "Tue" : plan_df.loc[(plan_df.Week_No == wn) & (plan_df.Day == "Tue"),"Distance"].sum(),
        "Wed" : plan_df.loc[(plan_df.Week_No == wn) & (plan_df.Day == "Wed"),"Distance"].sum(),
        "Thur" : plan_df.loc[(plan_df.Week_No == wn) & (plan_df.Day == "Thur"),"Distance"].sum(),
        "Fri" : plan_df.loc[(plan_df.Week_No == wn) & (plan_df.Day == "Fri"),"Distance"].sum(),
        "Sat" : plan_df.loc[(plan_df.Week_No == wn) & (plan_df.Day == "Sat"),"Distance"].sum(),
        "Sun" : plan_df.loc[(plan_df.Week_No == wn) & (plan_df.Day == "Sun"),"Distance"].sum(),
        "Week_tot" : week_tot,
        "Percent_week_tot" : per_week_tot,
        "Down_week" : dw
    }
    new_row = pd.DataFrame(index = [wn], data= new_dict)
    weeks_df = pd.concat([weeks_df, new_row])
weeks_df

Unnamed: 0,Dates,Mon,Tue,Wed,Thur,Fri,Sat,Sun,Week_tot,Percent_week_tot,Down_week
0,2024-05-20 -- 2024-05-26,3.663633,12.0,0.0,12.0,3.663633,0.0,17.71,49.037267,36.11539,N
1,2024-05-27 -- 2024-06-02,5.229997,12.0,0.0,12.0,5.229997,0.0,19.481,53.940993,36.11539,N
2,2024-06-03 -- 2024-06-09,3.506997,12.0,0.0,12.0,3.506997,0.0,17.5329,48.546894,36.11539,Y
3,2024-06-10 -- 2024-06-16,5.057697,12.0,0.0,12.0,5.057697,0.0,19.28619,53.401583,36.11539,N
4,2024-06-17 -- 2024-06-23,6.763466,12.0,0.0,12.0,6.763466,0.0,21.214809,58.741742,36.11539,N
5,2024-06-24 -- 2024-06-30,8.639813,12.0,0.0,12.0,8.639813,0.0,23.33629,64.615916,36.11539,N
6,2024-07-01 -- 2024-07-07,6.575832,12.0,0.0,12.0,6.575832,0.0,21.002661,58.154324,36.11539,Y
7,2024-07-08 -- 2024-07-14,8.433415,12.0,0.0,12.0,8.433415,0.0,23.102927,63.969757,36.11539,N
8,2024-07-15 -- 2024-07-21,10.476756,12.0,0.0,12.0,10.476756,0.0,25.41322,70.366732,36.11539,N
9,2024-07-22 -- 2024-07-28,12.0,12.0,0.0,12.0,12.0,0.0,27.954542,75.954542,36.804306,N


In [None]:
thresh_dates = plan_df.loc[plan_df.Week_tot > 100, "Date"]
indices = thresh_dates.index
mara_date = thresh_dates[indices[0]]
print("Marathon ready by "+ str(mara_date))
print("Earliest race "+ str(mara_date + timedelta(weeks = 3)))

Marathon ready by 2024-11-04 17:09:53
Earliest race 2024-11-25 17:09:53


In [None]:
dt.date(2023, 5, 21) + timedelta(weeks=45 + 3)

datetime.date(2024, 4, 21)