# Strava Data Analysis

Analysis of my Strava data

In [2]:
#pandas import
import pandas as pd
#matplotlib import
import matplotlib.pyplot as plt
from matplotlib.pyplot import cm #for colors
#and numpy
import numpy as np

#disable the warnings for chained data in pandas
pd.options.mode.chained_assignment = None  # default='warn'


In [3]:
#import data into pandas dataframe

df = pd.read_csv("../../strava_data/activities.csv")

In [4]:
#check all of the keys in the dataframe
df.keys()

Index(['Activity ID', 'Activity Date', 'Activity Name', 'Activity Type',
       'Activity Description', 'Elapsed Time', 'Distance', 'Max Heart Rate',
       'Relative Effort', 'Commute', 'Activity Private Note', 'Activity Gear',
       'Filename', 'Athlete Weight', 'Bike Weight', 'Elapsed Time.1',
       'Moving Time', 'Distance.1', 'Max Speed', 'Average Speed',
       'Elevation Gain', 'Elevation Loss', 'Elevation Low', 'Elevation High',
       'Max Grade', 'Average Grade', 'Average Positive Grade',
       'Average Negative Grade', 'Max Cadence', 'Average Cadence',
       'Max Heart Rate.1', 'Average Heart Rate', 'Max Watts', 'Average Watts',
       'Calories', 'Max Temperature', 'Average Temperature',
       'Relative Effort.1', 'Total Work', 'Number of Runs', 'Uphill Time',
       'Downhill Time', 'Other Time', 'Perceived Exertion', 'Type',
       'Start Time', 'Weighted Average Power', 'Power Count',
       'Prefer Perceived Exertion', 'Perceived Relative Effort', 'Commute.1',
    

In [5]:
#drop a bunch of keys that we don't need

df.drop(['Activity ID','Relative Effort', 'Commute', 'Activity Private Note',
       'Filename', 'Athlete Weight', 'Bike Weight', 
       'Max Grade', 'Average Grade', 'Average Positive Grade',
       'Average Negative Grade', 'Max Cadence', 'Average Cadence',
       'Max Watts', 'Average Watts',
       'Relative Effort.1', 'Total Work', 'Number of Runs', 'Uphill Time',
       'Downhill Time', 'Other Time', 'Perceived Exertion',
       'Weighted Average Power', 'Power Count',
       'Prefer Perceived Exertion', 'Perceived Relative Effort',
       'Total Weight Lifted', 'From Upload', 'Grade Adjusted Distance',
       'Weather Observation Time', 'Weather Condition', 'Weather Temperature',
       'Apparent Temperature', 'Dewpoint', 'Humidity', 'Weather Pressure',
       'Wind Speed', 'Wind Gust', 'Wind Bearing', 'Precipitation Intensity',
       'Sunrise Time', 'Sunset Time', 'Moon Phase', 'Bike',
       'Precipitation Probability', 'Precipitation Type', 'Cloud Cover',
       'Weather Visibility', 'UV Index', 'Weather Ozone', 'Jump Count',
       'Total Grit', 'Average Flow', 'Flagged',
       'Dirt Distance', 'Newly Explored Distance',
       'Newly Explored Dirt Distance', 'Activity Count',
       'Carbon Saved', 'Pool Length', 'Training Load',
       'Average Grade Adjusted Pace', 'Timer Time', 'Total Cycles', 'Media','Distance.1',
       'Elapsed Time.1','Commute.1','Start Time','Type','Intensity','Max Heart Rate.1','Max Temperature'],axis=1,inplace=True)

df.tail(5)

Unnamed: 0,Activity Date,Activity Name,Activity Type,Activity Description,Elapsed Time,Distance,Max Heart Rate,Activity Gear,Moving Time,Max Speed,...,Elevation Gain,Elevation Loss,Elevation Low,Elevation High,Average Heart Rate,Calories,Average Temperature,Gear,Average Elapsed Speed,Total Steps
553,"Oct 1, 2024, 2:33:12 PM",Morning Run,Run,,2104,6.38,173.0,blue-yellow 2,2081.0,4.098307,...,63.0,61.0,24.799999,78.199997,153.013962,515.0,21.0,18129028.0,3.031844,6162.0
554,"Oct 3, 2024, 6:11:54 PM",Lunch Run,Run,,3386,10.21,180.0,Ghost Blue-orange,3257.0,4.491659,...,71.0,74.0,7.6,60.799999,161.860748,856.0,19.0,15462145.0,3.017407,9528.0
555,"Oct 5, 2024, 4:33:12 PM",Morning Run,Run,,4160,12.17,179.0,Ghost Blue-orange,4087.0,4.648507,...,133.0,116.0,4.4,107.800003,157.222717,992.0,21.0,15462145.0,2.925365,11982.0
556,"Oct 7, 2024, 1:21:03 PM",Morning Run,Run,,2218,6.32,170.0,Ghost Blue-orange,2172.0,4.378336,...,83.0,57.0,11.8,58.799999,148.512436,511.0,20.0,15462145.0,2.852692,6370.0
557,"Oct 11, 2024, 12:03:56 AM",Before I become a little less wise tomorrow…,Run,🦷🦷,2836,8.65,176.0,Ghost Blue-orange,2832.0,4.22167,...,66.0,68.0,5.2,35.200001,153.563965,688.0,19.0,15462145.0,3.050388,8174.0


In [6]:
#mask out anything that isn't a run
run_mask = df['Activity Type'].isin(['Run'])

run_df = df[run_mask]

In [7]:
run_df.head()

Unnamed: 0,Activity Date,Activity Name,Activity Type,Activity Description,Elapsed Time,Distance,Max Heart Rate,Activity Gear,Moving Time,Max Speed,...,Elevation Gain,Elevation Loss,Elevation Low,Elevation High,Average Heart Rate,Calories,Average Temperature,Gear,Average Elapsed Speed,Total Steps
0,"Dec 2, 2021, 2:35:00 AM",Evening Run,Run,Watch decided to quit the workout app somewher...,1766,5.65,,,1742.0,5.829807,...,31.542353,27.642401,6.2,25.4,,579.666992,,,3.200994,
1,"Dec 3, 2021, 11:37:11 PM",Afternoon Run,Run,,3963,12.18,,,3800.0,6.470929,...,122.065285,114.565002,6.3,71.199997,,1271.583862,,,3.075138,
3,"Dec 9, 2021, 2:35:58 AM",Evening Run,Run,,2290,7.16,184.0,,2212.0,5.181694,...,75.169441,74.169403,25.4,67.300003,160.6875,744.039062,,,3.130288,
4,"Dec 11, 2021, 2:04:02 AM",Evening Run,Run,,2837,8.71,180.0,,2765.0,4.975095,...,101.592628,103.792999,16.200001,97.5,156.538239,904.015808,,,3.070377,
5,"Dec 13, 2021, 2:31:06 PM",Morning Run,Run,"A cold, wet start to the week.",3244,9.93,182.0,,3206.0,7.838587,...,104.995361,103.394997,16.200001,88.0,161.34729,1029.512939,,,3.063205,


In [8]:
run_df.dtypes

Activity Date             object
Activity Name             object
Activity Type             object
Activity Description      object
Elapsed Time               int64
Distance                 float64
Max Heart Rate           float64
Activity Gear             object
Moving Time              float64
Max Speed                float64
Average Speed            float64
Elevation Gain           float64
Elevation Loss           float64
Elevation Low            float64
Elevation High           float64
Average Heart Rate       float64
Calories                 float64
Average Temperature      float64
Gear                     float64
Average Elapsed Speed    float64
Total Steps              float64
dtype: object

In [9]:
#conversions from the stored values to things that we want

#convert activity date into the datetime frame
run_df['Activity Date'] = pd.to_datetime(run_df['Activity Date'],utc=True)
#run_df['Elapsed Time'] = pd.to_timedelta(run_df['Elapsed Time'])


#convert from UTC to Pacific time. Most (but not all) events will be in Pacific time. Close enough
run_df = run_df.set_index('Activity Date')
run_df.index = run_df.index.tz_convert('US/Pacific')
run_df = run_df.reset_index()

#get our start time from the activity date. must be done after conversion from UTC to Pacific
run_df['Start Time'] = run_df['Activity Date'].dt.time

#add in information about month, day, year, etc
run_df['Month'] = run_df['Activity Date'].dt.month
run_df['Year'] = run_df['Activity Date'].dt.year
run_df['Day of Year'] = run_df['Activity Date'].dt.dayofyear


In [10]:
run_df.dtypes

Activity Date            datetime64[ns, US/Pacific]
Activity Name                                object
Activity Type                                object
Activity Description                         object
Elapsed Time                                  int64
Distance                                    float64
Max Heart Rate                              float64
Activity Gear                                object
Moving Time                                 float64
Max Speed                                   float64
Average Speed                               float64
Elevation Gain                              float64
Elevation Loss                              float64
Elevation Low                               float64
Elevation High                              float64
Average Heart Rate                          float64
Calories                                    float64
Average Temperature                         float64
Gear                                        float64
Average Elap

In [11]:
run_df.loc(0)[342]

Activity Date                               2024-10-10 17:03:56-07:00
Activity Name            Before I become a little less wise tomorrow…
Activity Type                                                     Run
Activity Description                                               🦷🦷
Elapsed Time                                                     2836
Distance                                                         8.65
Max Heart Rate                                                  176.0
Activity Gear                                       Ghost Blue-orange
Moving Time                                                    2832.0
Max Speed                                                     4.22167
Average Speed                                                3.054696
Elevation Gain                                                   66.0
Elevation Loss                                                   68.0
Elevation Low                                                     5.2
Elevation High      