# Recreating Zwift ride powerplot

## Import .fit file and convert to pandas dataframe

The code for importing .fit files and converting to a pandas dataframe is from http://johannesjacob.com/analyze-your-cycling-data-python/.
To install the python packages, type 'pip install pandas numpy fitparse matplotlib tqdm' on the command line.


In [1]:
import os
import datetime
from fitparse import FitFile
import pandas as pd
import numpy as np
import matplotlib.pyplot as plt
from tqdm import tqdm
from smooth import smooth
from matplotlib.offsetbox import (TextArea, DrawingArea, OffsetImage,
                                  AnnotationBbox, AnchoredText, AnchoredOffsetbox)
from matplotlib.text import Annotation


In [2]:
filename = input("Type filename, including .fit extension:  ")
# 2021-10-05-10-54-32.fit

fitfile = FitFile(filename)

Type filename, including .fit extension:  2021-10-05-10-54-32.fit


From Johannes Jacob's blog post:  
_"Now we are ready to import the workout file and transform the data into a 
pandas dataframe. Unfortunately we have to use an ugly hack with this "while" 
loop to avoid timing issues. Then we are looping through the file, append 
the records to a list and convert the list to a pandas dataframe."_

In [3]:
while True:
    try:
        fitfile.messages
        break
    except KeyError:
        continue
workout = []
for record in fitfile.get_messages('record'):
    r = {}
    for record_data in record:
        r[record_data.name] = record_data.value
    workout.append(r)
df = pd.DataFrame(workout)

In [4]:
df

Unnamed: 0,altitude,cadence,compressed_speed_distance,cycle_length,distance,enhanced_altitude,enhanced_speed,grade,heart_rate,position_lat,position_long,power,resistance,speed,temperature,time_from_course,timestamp
0,5.0,108,,,,5.0,9.948,,165,-138819701,1991832102,280,,9.948,,,2021-10-05 18:05:18
1,5.0,109,,,,5.0,10.150,,166,-138820063,1991831076,248,,10.150,,,2021-10-05 18:05:19
2,5.0,110,,,,5.0,10.311,,167,-138820372,1991830018,245,,10.311,,,2021-10-05 18:05:20
3,5.0,108,,,,5.0,10.464,,168,-138820865,1991828979,248,,10.464,,,2021-10-05 18:05:21
4,4.8,107,,,,4.8,10.771,,168,-138821057,1991827881,234,,10.771,,,2021-10-05 18:05:22
...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...
4671,-5.4,0,,,,-5.4,1.061,,143,-138956810,1991928451,0,,1.061,,,2021-10-05 19:23:09
4672,-5.4,0,,,,-5.4,0.330,,142,-138956881,1991928423,0,,0.330,,,2021-10-05 19:23:10
4673,-5.4,0,,,,-5.4,0.000,,142,-138956889,1991928420,0,,0.000,,,2021-10-05 19:23:11
4674,-5.4,0,,,,-5.4,0.000,,141,-138956888,1991928421,0,,0.000,,,2021-10-05 19:23:12


In [5]:
df.tail(1)

Unnamed: 0,altitude,cadence,compressed_speed_distance,cycle_length,distance,enhanced_altitude,enhanced_speed,grade,heart_rate,position_lat,position_long,power,resistance,speed,temperature,time_from_course,timestamp
4675,-5.4,0,,,,-5.4,0.0,,141,-138956888,1991928421,0,,0.0,,,2021-10-05 19:23:13


## Get date of workout from column 'timestamp' 

In [6]:
timestamp = df['timestamp'].tail(1).values
timestamp

array(['2021-10-05T19:23:13.000000000'], dtype='datetime64[ns]')

In [7]:
date = np.datetime_as_string(timestamp, unit='D')
date

array(['2021-10-05'], dtype='<U28')

In [8]:
date_str = str(date)
type(date_str)
print(date_str)


['2021-10-05']


In [9]:
date_str = date_str.strip("[")
date_str = date_str.strip("]")
date_str = date_str.strip("'")
print(date_str)

2021-10-05


##  Remove unnecessary columns

In [10]:
df.columns

Index(['altitude', 'cadence', 'compressed_speed_distance', 'cycle_length',
       'distance', 'enhanced_altitude', 'enhanced_speed', 'grade',
       'heart_rate', 'position_lat', 'position_long', 'power', 'resistance',
       'speed', 'temperature', 'time_from_course', 'timestamp'],
      dtype='object')

Keep the following columns:
* power


In [69]:
df_power = pd.DataFrame(df, columns=['power'])
df_power

Unnamed: 0,power
0,280
1,248
2,245
3,248
4,234
...,...
4671,0
4672,0
4673,0
4674,0


In [70]:
df_power['power'].isna().value_counts()

False    4676
Name: power, dtype: int64

In [71]:
len(df_power)

4676

##  Insert a column 'time_unit' 

_**Note:  Zwift records workout data once every second.  Using .fit files with data recorded more or less frequently will result in an incorrect number of minutes on the x-axis of the graph.**_

In [72]:
df_power.insert(loc=0, column='time_unit', value=np.arange(len(df_power)))

In [75]:
df_power.loc[df_power['power'] == df_power['power'].max()]

Unnamed: 0,time_unit,power
2134,2134,294


In [20]:
df_power.rename(columns = {'power':'watts'}, inplace = True)

In [21]:
df_power

Unnamed: 0,time_unit,watts
0,0,280
1,1,248
2,2,245
3,3,248
4,4,234
...,...,...
4671,4671,0
4672,4672,0
4673,4673,0
4674,4674,0


In [22]:
df_power['watts'].max()

294

In [23]:
max_w_t = df_power['time_unit'].loc[df_power['watts'] == df_power['watts'].max()]
max_w_t

2134    2134
Name: time_unit, dtype: int64

In [25]:
df_power.loc[df_power['watts'] == "NaN"]

Unnamed: 0,time_unit,watts


In [26]:
df_power['watts'].fillna(0, inplace=True)

##  Obtain FTP value from user to determine workout zones in graph

In [27]:
ftp = None

# set up try / except loop:
n = 0
while n < 3: 
    try:
        ftp = int(input("Enter FTP in watts (whole numbers only):  "))
        print(f"\nYour FTP has been recorded as {ftp} watts.")
        break
    except ValueError:
        n += 1
        print("\nYour FTP value cannot contain letters, be left blank, or be entered as a decimal value. \n")


Enter FTP in watts (whole numbers only):  126

Your FTP has been recorded as 126 watts.


In [28]:
# convert df to numpy array

workout_data = df_power.to_records(index=False)

In [29]:
workout_data

rec.array([(   0, 280), (   1, 248), (   2, 245), ..., (4673,   0),
           (4674,   0), (4675,   0)],
          dtype=[('time_unit', '<i8'), ('watts', '<i8')])

In [30]:
watts = workout_data['watts']
time = workout_data['time_unit']

In [31]:
workout_data['watts'].max()

294

In [32]:
watts

array([280, 248, 245, ...,   0,   0,   0])

In [33]:
max_watts = max(watts)
max_watts

294

In [34]:
y_top = max(watts)*1.05
y_top

308.7

In [None]:
plt.figure(figsize=(18,8))
plt.plot(time, watts)
plt.plot(max_watts, 'r+')
plt.annotate(f'Max power: {max_watts}', max_watts, xytext=(0, 20), arrowprops={}, textcoords='offset points');

In [36]:
watts_smoothed = smooth(watts, window_len=20)
print(len(watts_smoothed))
watts_smoothed

4676


array([280.        , 268.64838868, 257.4518006 , ...,   0.        ,
         0.        ,   0.        ])

##  Give user the opportunity to enter how often .fit file data is recorded, in seconds (default is once per second, as on Zwift)

In [37]:
# Workout .fit file recorded by Zwift?

zwift_or_not = input("Was your .fit file recorded by Zwift, and/or did you device record the workout in 1-second increments?  \nEnter 'y' for yes or 'n' for no. ")

Was your .fit file recorded by Zwift, and/or did you device record the workout in 1-second increments?  
Enter 'y' for yes or 'n' for no. 


In [38]:
zwift_or_not

''

In [39]:
if zwift_or_not=='y' or zwift_or_not=='':
    rec_freq = 1
    print(f"\nThe default recording frequency has been set to {rec_freq} second.")

    
# If .fit file not recorded by Zwift, how frequently was data recorded, in seconds?

if zwift_or_not=='n':
    # default recording frequency to start with:
    rec_freq = 1
    
    # set up try / except loop:
    n = 0
    while n < 3: 
        try:
            rec_freq = int(input("Please enter the frequency that your workout data was recorded, in seconds.  \nEntry must be in numbers >0 and <=60, e.g., '1' for once per second, '5' to represent data recorded once every 5 seconds, '10' to signify once every 10 seconds, etc.   "))
            print(f"\nThe recording frequency has been set to {rec_freq} second(s).")
            break
        except ValueError:
            n += 1
            print()
        if n == 3:
            print(f"\nThe recording frequency has been set to {rec_freq} second(s).")



The default recording frequency has been set to 1 second.


In [40]:
rec_freq

1

##  Convert workout x-axis time values to minutes

In [41]:
# converting recording data into minutes  
# freq represents how many rows of data are contained in 1 minute of workout time
# For example, if data is recorded every 5 seconds, then there will be 12 rows of data 
# per every one minute of workout time

freq = 60 / rec_freq
freq


60.0

In [42]:
minutes = workout_data['time_unit']/freq
print(minutes)
print(type(minutes))
print(minutes.dtype)

[0.00000000e+00 1.66666667e-02 3.33333333e-02 ... 7.78833333e+01
 7.79000000e+01 7.79166667e+01]
<class 'numpy.ndarray'>
float64


In [43]:
type(workout_data['watts'])

numpy.ndarray

In [44]:
workout_data['watts'].dtype

dtype('int64')

In [None]:
workout_data_minutes = np.multiply()

In [46]:
new_arr = np.stack([minutes, watts], axis=1)

In [53]:
new_arr

array([[0.00000000e+00, 2.80000000e+02],
       [1.66666667e-02, 2.48000000e+02],
       [3.33333333e-02, 2.45000000e+02],
       ...,
       [7.78833333e+01, 0.00000000e+00],
       [7.79000000e+01, 0.00000000e+00],
       [7.79166667e+01, 0.00000000e+00]])

In [54]:
new_arr.shape

(4676, 2)

In [55]:
new_arr

array([[0.00000000e+00, 2.80000000e+02],
       [1.66666667e-02, 2.48000000e+02],
       [3.33333333e-02, 2.45000000e+02],
       ...,
       [7.78833333e+01, 0.00000000e+00],
       [7.79000000e+01, 0.00000000e+00],
       [7.79166667e+01, 0.00000000e+00]])

In [143]:
new_arr[2:12].max()

248.0

In [151]:
new_arr[4000:].max()

229.0

## Find time where max power occurred

In [80]:
workout_data

rec.array([(   0, 280), (   1, 248), (   2, 245), ..., (4673,   0),
           (4674,   0), (4675,   0)],
          dtype=[('time_unit', '<i8'), ('watts', '<i8')])

In [85]:
workout_data.dtype

dtype((numpy.record, [('time_unit', '<i8'), ('watts', '<i8')]))

In [115]:
workout_data[:][:][1].max()

TypeError: cannot perform reduce with flexible type

In [100]:
workout_data.ndim

1

In [98]:
workout_data[0].dtype

dtype((numpy.record, [('time_unit', '<i8'), ('watts', '<i8')]))

In [92]:
workout_data[2134]

(2134, 294)

In [61]:
max_watts_t = np.where(new_arr[1, :].max())
max_watts_t

(array([0]),)

## Graph workout

In [None]:
if ftp != None:
    figsize = (18, 8)    
    img, ax = plt.subplots(figsize=figsize)
    ax.set_facecolor(color='#252525')
    ax.set_xlabel("Time in Minutes", fontsize='large')
    ax.set_ylabel("Watts", fontsize='large')
    ax.tick_params(labelsize='large')

    # This expands the top of the graph to 20% beyond max watts
    ax.set_ylim(top=max(watts)*1.20)

    # logic for color under the graph based on % of FTP (thanks to Jonas Häggqvist for this code)
    ax.grid(which='major', axis='y', alpha=0.1, linewidth=1)
    plt.fill_between(minutes, watts_smoothed, where=watts_smoothed > 0.00*ftp, color='#646464')
    plt.fill_between(minutes, watts_smoothed, where=watts_smoothed > 0.60*ftp, color='#328bff')
    plt.fill_between(minutes, watts_smoothed, where=watts_smoothed > 0.75*ftp, color='#59bf59')
    plt.fill_between(minutes, watts_smoothed, where=watts_smoothed > 0.90*ftp, color='#ffcc3f')
    plt.fill_between(minutes, watts_smoothed, where=watts_smoothed > 1.05*ftp, color='#ff663a')
    plt.fill_between(minutes, watts_smoothed, where=watts_smoothed > 1.18*ftp, color='#ff340c')

    # Setting the image and location (thanks to Phil Daws for the code that helped me get started)
    # Note:  xy for the purposes of workout date label is set using 'data' for coordinates 
    xmin, xmax = ax.get_xlim()
    ymin, ymax = ax.get_ylim()
    xy = [xmax-(xmax*0.05), ymax-(ymax*0.05)]
    
    # Adding the workout date to the graph
    workout_date = Annotation(f'Workout date: {date_str}', xy=[xmax//2, ymax-(ymax*0.08)], 
                              ha='center', color='white', fontweight='bold', fontsize='large')
    ax.add_artist(workout_date)
    
    # Annotating max power
    max_watts_point = Annotation(f'Max power: {max_watts}', xy=[])
    

    # Setting plot line color and thickness
    
    plt.plot(minutes, watts_smoothed, color='white', linewidth=1.0)

    plt.show()

else:
    print(f"\nThe graph cannot be drawn; no valid FTP was provided.")
    print(f"If you wish to try again, please have your FTP value ready and then reload this page.")
