# Recreating Zwift ride powerplot

## Import .fit file and convert to pandas dataframe

In [5]:
import os
import datetime
from fitparse import FitFile    # https://github.com/dtcooper/python-fitparse
import pandas as pd
import numpy as np
import streamlit as st
import matplotlib
import matplotlib.pyplot as plt
from tqdm import tqdm
from smooth import smooth
from matplotlib.offsetbox import (TextArea, DrawingArea, OffsetImage,
                                  AnnotationBbox, AnchoredText, AnchoredOffsetbox)
from matplotlib.text import Annotation


Matplotlib is building the font cache; this may take a moment.


## Title of Streamlit app

In [6]:
st.title('Workout Graph in Zwift Style')

2025-09-02 15:10:52.244 
  command:

    streamlit run /opt/miniconda3/lib/python3.13/site-packages/ipykernel_launcher.py [ARGUMENTS]


DeltaGenerator()

##  Obtain FTP value from user to determine workout zones in graph

In [7]:
# set up try / except loop:
n = 0
while n < 3: 
    try:
        ftp = int(input("Enter FTP in watts (whole numbers only):  "))
        print(f"\nYour FTP has been recorded as {ftp} watts.")
        break
    except ValueError:
        n += 1
        print("\nYour FTP value cannot contain letters, be left blank, or be entered as a decimal value. \n")


Enter FTP in watts (whole numbers only):   109



Your FTP has been recorded as 109 watts.


## Have user enter file name / upload file

The code for importing .fit files and converting to a pandas dataframe is from http://johannesjacob.com/analyze-your-cycling-data-python/.
To install the python packages, type 'pip install pandas numpy fitparse matplotlib tqdm' on the command line.


In [9]:
filename = input("Type filename, including .fit extension:  ")
# 2021-10-05-10-54-32.fit

fitfile = FitFile(filename)

Type filename, including .fit extension:   /Users/ginadurante/Downloads/2025-08-24-18-47-20.fit


#### From Johannes Jacob's blog post (http://johannesjacob.com/2019/03/13/analyze-your-cycling-data-python/):  
_"Now we are ready to import the workout file and transform the data into a 
pandas dataframe. Unfortunately we have to use an ugly hack with this "while" 
loop to avoid timing issues. Then we are looping through the file, append 
the records to a list and convert the list to a pandas dataframe."_

In [10]:
def parse_fitfile(uploaded_file):
    fitfile = FitFile(uploaded_file)
    while True:
        try:
            fitfile.messages
            break
        except KeyError:
            continue
    workout = []
    for record in fitfile.get_messages('record'):
        r = {}
        for record_data in record:
            r[record_data.name] = record_data.value
        workout.append(r)
    df = pd.DataFrame(workout)
    
    return df

In [11]:
df = parse_fitfile(filename)

In [12]:
df

Unnamed: 0,accumulated_power,activity_type,altitude,ball_speed,cadence,cadence256,calories,combined_pedal_smoothness,compressed_accumulated_power,compressed_speed_distance,...,time_from_course,timestamp,total_cycles,total_hemoglobin_conc,total_hemoglobin_conc_max,total_hemoglobin_conc_min,vertical_oscillation,vertical_speed,zone,unknown_139
0,,,16.0,,88,,,,,,...,,2025-08-25 01:47:24,,,,,,,,
1,,,16.0,,88,,,,,,...,,2025-08-25 01:47:25,,,,,,,,
2,,,16.0,,90,,,,,,...,,2025-08-25 01:47:26,,,,,,,,
3,,,16.0,,89,,,,,,...,,2025-08-25 01:47:27,,,,,,,,
4,,,16.0,,89,,,,,,...,,2025-08-25 01:47:28,,,,,,,,
...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...
3122,,,18.6,,0,,,,,,...,,2025-08-25 02:39:26,,,,,,,,
3123,,,18.6,,0,,,,,,...,,2025-08-25 02:39:27,,,,,,,,
3124,,,18.6,,0,,,,,,...,,2025-08-25 02:39:28,,,,,,,,
3125,,,18.6,,0,,,,,,...,,2025-08-25 02:39:29,,,,,,,,


In [13]:
column_list = list(df.columns)
column_list

['accumulated_power',
 'activity_type',
 'altitude',
 'ball_speed',
 'cadence',
 'cadence256',
 'calories',
 'combined_pedal_smoothness',
 'compressed_accumulated_power',
 'compressed_speed_distance',
 'core_data_quality',
 'core_reserved',
 'cycle_length',
 'cycles',
 'device_index',
 'distance',
 'enhanced_altitude',
 'enhanced_speed',
 'fractional_cadence',
 'gps_accuracy',
 'grade',
 'heart_rate',
 'heat_strain_index',
 'left_pedal_smoothness',
 'left_right_balance',
 'left_torque_effectiveness',
 'position_lat',
 'position_long',
 'power',
 'resistance',
 'right_pedal_smoothness',
 'right_torque_effectiveness',
 'saturated_hemoglobin_percent',
 'saturated_hemoglobin_percent_max',
 'saturated_hemoglobin_percent_min',
 'skin_temperature',
 'speed',
 'speed_1s',
 'stance_time',
 'stance_time_percent',
 'stroke_type',
 'target_power',
 'temperature',
 'time128',
 'time_from_course',
 'timestamp',
 'total_cycles',
 'total_hemoglobin_conc',
 'total_hemoglobin_conc_max',
 'total_hemoglob

##  Remove unnecessary columns and remove null values

In [14]:
def df_clean_trim(df):
    # Set up new dataframe with only necessary columns
    # First, check to see if heart rate data is present
    if ('heart_rate' in column_list):
        df_cleaned = df[['heart_rate', 'power', 'timestamp']].copy()
        # Insert a column 'data_points' to enable selection of max hr and watts by index
        df_cleaned.insert(loc=0, column='data_points', value=np.arange(len(df)))
        df_cleaned.rename(columns = {'power':'watts'}, inplace = True)
        df_cleaned.fillna({'watts': 0}, inplace=True)
        df_cleaned.fillna({'heart_rate': 0}, inplace=True)
    else:
        df_cleaned = df[['power', 'timestamp']].copy()
        # Insert a column 'data_points' to enable selection of max hr and watts by index
        df_cleaned.insert(loc=0, column='data_points', value=np.arange(len(df)))
        df_cleaned.rename(columns = {'power':'watts'}, inplace = True)
        df_cleaned.fillna({'watts': 0}, inplace=True)

    return df_cleaned

In [15]:
df_cleaned = df_clean_trim(df)
df_cleaned

The behavior will change in pandas 3.0. This inplace method will never work because the intermediate object on which we are setting values always behaves as a copy.

For example, when doing 'df[col].method(value, inplace=True)', try using 'df.method({col: value}, inplace=True)' or df[col] = df[col].method(value) instead, to perform the operation inplace on the original object.


  df_cleaned['watts'].fillna(0, inplace=True)
The behavior will change in pandas 3.0. This inplace method will never work because the intermediate object on which we are setting values always behaves as a copy.

For example, when doing 'df[col].method(value, inplace=True)', try using 'df.method({col: value}, inplace=True)' or df[col] = df[col].method(value) instead, to perform the operation inplace on the original object.


  df_cleaned['heart_rate'].fillna(0, inplace=True)


Unnamed: 0,data_points,heart_rate,watts,timestamp
0,0,113,39,2025-08-25 01:47:24
1,1,113,84,2025-08-25 01:47:25
2,2,113,85,2025-08-25 01:47:26
3,3,113,63,2025-08-25 01:47:27
4,4,113,52,2025-08-25 01:47:28
...,...,...,...,...
3122,3122,132,0,2025-08-25 02:39:26
3123,3123,132,0,2025-08-25 02:39:27
3124,3124,132,0,2025-08-25 02:39:28
3125,3125,132,0,2025-08-25 02:39:29


## Get date of workout and length of workout in seconds/minutes 

In [16]:
def workout_date_time_freq(df):
    # Get date
    df1 = df.copy()
    timestamp = df1['timestamp'][:1]
    date = np.datetime_as_string(timestamp, unit='D')
    date_str = str(date)
    date_str = date_str.strip("[")
    date_str = date_str.strip("]")
    date_str = date_str.strip("'")
    
    # Get workout length in minutes
    num_datapoints = int(len(df1['timestamp']))
    workout_timelength = df1['timestamp'][num_datapoints-1] - df1['timestamp'][0]
    workout_seconds = int(workout_timelength.total_seconds())
    workout_minutes = workout_seconds/60

    # Compute frequency of data recording from number of seconds in workout divided by the number of data points
    rec_freq = round(workout_seconds/num_datapoints)
    freq = 60 / rec_freq

    return date_str, num_datapoints, workout_minutes, rec_freq, freq


In [17]:
date_str, num_datapoints, workout_minutes, rec_freq, freq = workout_date_time_freq(df_cleaned)

In [18]:
date_str, num_datapoints, workout_minutes, rec_freq, freq

('2025-08-25', 3127, 52.1, 1, 60.0)

In [19]:
df_cleaned

Unnamed: 0,data_points,heart_rate,watts,timestamp
0,0,113,39,2025-08-25 01:47:24
1,1,113,84,2025-08-25 01:47:25
2,2,113,85,2025-08-25 01:47:26
3,3,113,63,2025-08-25 01:47:27
4,4,113,52,2025-08-25 01:47:28
...,...,...,...,...
3122,3122,132,0,2025-08-25 02:39:26
3123,3123,132,0,2025-08-25 02:39:27
3124,3124,132,0,2025-08-25 02:39:28
3125,3125,132,0,2025-08-25 02:39:29


## Convert dataframe to NumPy array

In [20]:
def convert_to_arr(df_cleaned):
    workout_data = df_cleaned.to_records(index=False)
    watts = workout_data['watts']
    max_watts = max(watts)

    # Find maximum power value and time stamp
    minutes = workout_data['data_points']/freq
    max_watts_idx = np.argmax(workout_data['watts'])
    max_watts_timestamp = minutes[max_watts_idx]

    # Find maximum heart rate value and time stamp
    hr = workout_data['heart_rate']
    max_hr = max(hr)
    max_hr_idx = np.argmax(workout_data['heart_rate'])
    max_hr_timestamp = minutes[max_hr_idx]

    return watts, max_watts, minutes, max_pwr_timestamp, hr, max_hr, max_hr_timestamp


In [21]:
workout_data = df_cleaned.to_records(index=False)

In [22]:
workout_data

rec.array([(   0, 113, 39, '2025-08-25T01:47:24.000000000'),
           (   1, 113, 84, '2025-08-25T01:47:25.000000000'),
           (   2, 113, 85, '2025-08-25T01:47:26.000000000'), ...,
           (3124, 132,  0, '2025-08-25T02:39:28.000000000'),
           (3125, 132,  0, '2025-08-25T02:39:29.000000000'),
           (3126, 133,  0, '2025-08-25T02:39:30.000000000')],
          dtype=[('data_points', '<i8'), ('heart_rate', '<i8'), ('watts', '<i8'), ('timestamp', '<M8[ns]')])

In [23]:
watts = workout_data['watts']

In [24]:
watts

array([39, 84, 85, ...,  0,  0,  0], shape=(3127,))

In [25]:
len(watts)

3127

In [26]:
max_watts = int(max(watts))
max_watts

424

In [27]:
np.argmax(watts)

np.int64(1064)

## Smooth power curve

In [28]:
# using helper function 'smooth.py'

watts_smoothed = smooth(watts, window_len=10)
print(len(watts_smoothed))

3127


In [29]:
watts_smoothed

array([39.        , 53.57913848, 62.97422493, ...,  2.24725346,
        0.36393087,  0.        ], shape=(3127,))

In [30]:
max(watts_smoothed)

np.float64(357.27602228090404)

In [31]:
watts_smoothed[2134]

np.float64(126.2837222495319)

##  Convert workout x-axis time values to minutes

In [32]:
# converting recording data into minutes  
# freq represents how many rows of data are contained in 1 minute of workout time
# For example, if data is recorded every 5 seconds, then there will be 12 rows of data 
# per every one minute of workout time

freq = 60 / rec_freq
freq


60.0

In [33]:
minutes = workout_data['data_points']/freq
print(minutes)

[0.00000000e+00 1.66666667e-02 3.33333333e-02 ... 5.20666667e+01
 5.20833333e+01 5.21000000e+01]


## Find maximum power value and time stamp

In [34]:
max_pwr_idx = np.argmax(workout_data['watts'])

In [35]:
max_pwr_idx

np.int64(1064)

In [36]:
max_pwr_timestamp = round(minutes[max_pwr_idx], ndigits=3)
max_pwr_timestamp

np.float64(17.733)

In [37]:
workout_data[max_pwr_idx]

np.record((1064, 137, 424, '2025-08-25T02:05:08.000000000'), dtype=[('data_points', '<i8'), ('heart_rate', '<i8'), ('watts', '<i8'), ('timestamp', '<M8[ns]')])

## Find maximum heart rate value and time stamp

Note:  if no heart rate data is available, this section will be skipped, as will the heart rate graphing section

In [38]:
# Function to find max HR & time stamp, if applicable

def max_hr_stamp(workout_data):
    if ('heart_rate' in column_list):
        print('File contains HR data')
        hr = workout_data['heart_rate']
        max_hr = int(max(hr))
        max_hr_idx = np.argmax(workout_data['heart_rate'])
        max_hr_timestamp = minutes[max_hr_idx]
    else:
        print('File does not contain HR data')
        hr = 0
        max_hr = 0
        max_hr_idx = 0
        max_hr_timestamp = 0
    return hr, max_hr, max_hr_idx, max_hr_timestamp


In [39]:
hr, max_hr, max_hr_idx, max_hr_timestamp = max_hr_stamp(workout_data)

File contains HR data


In [40]:
hr

array([113, 113, 113, ..., 132, 132, 133], shape=(3127,))

In [41]:
max_hr

174

In [42]:
max_hr_idx

np.int64(2383)

In [43]:
max_hr_timestamp

np.float64(39.71666666666667)

## Plot data

In [None]:
import matplotlib 
matplotlib.use('qtagg')

if ftp != None:
    figsize = (28, 12)    
    img, ax1 = plt.subplots(figsize=figsize)
    ax1.set_facecolor(color='#252525')
    ax1.set_xlabel("Minutes", fontsize=22.0)
    ax1.set_ylabel("Watts", fontsize=22.0)
    ax1.tick_params(labelsize=22.0)

    # This expands the top of the graph to 80% beyond max watts
    ax1.set_ylim(top=max(watts)*1.80)

    # logic for color under the graph based on % of FTP (thanks to Jonas Häggqvist for this code)
    ax1.grid(which='major', axis='y', alpha=0.1, linewidth=1)
    plt.fill_between(minutes, watts_smoothed, where=watts_smoothed > 0.00*ftp, color='#646464')
    plt.fill_between(minutes, watts_smoothed, where=watts_smoothed > 0.60*ftp, color='#328bff')
    plt.fill_between(minutes, watts_smoothed, where=watts_smoothed > 0.75*ftp, color='#59bf59')
    plt.fill_between(minutes, watts_smoothed, where=watts_smoothed > 0.90*ftp, color='#ffcc3f')
    plt.fill_between(minutes, watts_smoothed, where=watts_smoothed > 1.05*ftp, color='#ff663a')
    plt.fill_between(minutes, watts_smoothed, where=watts_smoothed > 1.18*ftp, color='#ff340c')

    # Setting workout date annotation (thanks to Phil Daws for the code that helped me get started)
    # Note:  xy for the purposes of workout date label is set using 'data' for coordinates 
    xmin, xmax = ax1.get_xlim()
    ymin, ymax = ax1.get_ylim()
    xy = [xmax-(xmax*0.05), ymax-(ymax*0.05)]
    
    # Adding the workout date to the graph
    workout_date = Annotation(f'Workout date: {date_str}', xy=[xmax//2, ymax-(ymax*0.08)], 
                              ha='center', color='white', fontweight='bold', fontsize=22.0)
    ax1.add_artist(workout_date)
    
    # Plot smoothed power, line color, and thickness
    plt.plot(minutes, watts_smoothed, color='white', linewidth=1.25)
    
    # Annotate max power 
    max_power = Annotation(f'{max_watts}w', xy=(max_pwr_timestamp, max_watts), xytext=(0, 15), 
                           textcoords="offset pixels", ha='center', color='white', fontweight='bold', 
                           fontsize=22.0, arrowprops=dict(arrowstyle='wedge', color='yellow'))
    ax1.add_artist(max_power)
    
    plt.vlines(x=max_pwr_timestamp, ymin=0, ymax=max_watts, color='white', linewidth=1.5)
    
    # Add HR data to graph
    if ('heart_rate' in column_list):
        # Instantiate second y axis for heart rate graph
        ax2 = ax1.twinx()
        ax2.set_ylabel("Heart Rate", fontsize=22.0)    
        ax2.set_ylim(top=max(hr)*1.20)
        ax2.tick_params(labelsize=22.0)

        # Plot heart rate
        ax2.plot(minutes, hr, color='red', linewidth=1.2)

        # Annotate max heart rate
        max_hr_annt = Annotation(f'{max_hr}bpm', xy=(max_hr_timestamp, max_hr), xytext=(0, 15), 
                               textcoords="offset pixels", ha='center', color='white', fontweight='bold', 
                               fontsize=22.0, arrowprops=dict(arrowstyle='wedge', color='red'))
        ax2.add_artist(max_hr_annt)
    
    plt.show()

else:
    print(f"\nThe graph cannot be drawn; no valid FTP was provided.")
    print(f"If you wish to try again, please have your FTP value ready and then reload this page.")


  plt.show()
