<a href="https://colab.research.google.com/github/cmannnn/2021_strava/blob/main/2021_strava.ipynb" target="_parent"><img src="https://colab.research.google.com/assets/colab-badge.svg" alt="Open In Colab"/></a>

In [1]:
# imports
import pandas as pd
import matplotlib.pyplot as plt
import seaborn as sns
# plt.style.use('ggplot')
# sns.set_style('darkgrid')

In [2]:
# mounting google drive
from google.colab import drive
drive.mount('/content/drive')

Mounted at /content/drive


In [3]:
# setting the path of each interesting CSV
activities_path = "/content/drive/MyDrive/Colabby Krabby Paddy/2021_strava/2021_strava/activities.csv"
local_legends_segments_path = "/content/drive/MyDrive/Colabby Krabby Paddy/2021_strava/2021_strava/local_legend_segments.csv"
kudos_path = "/content/drive/MyDrive/Colabby Krabby Paddy/2021_strava/2021_strava/kudos.csv"
segments_path = "/content/drive/MyDrive/Colabby Krabby Paddy/2021_strava/2021_strava/segments.csv"

In [4]:
# reading in activities data
activities_df = pd.read_csv(activities_path, parse_dates = True)

In [5]:
# columns to drop from activities_df
cols_to_drop = ['Activity Description', 'Max Heart Rate', 'Relative Effort', 'Commute', 'Activity Gear', 'Filename', 'Athlete Weight', 
                'Bike Weight', 'Elapsed Time.1', 'Moving Time', 'Distance.1', 'Average Positive Grade', 'Average Negative Grade',
                'Max Cadence', 'Average Cadence', 'Max Heart Rate.1', 'Average Heart Rate', 'Max Watts', 'Average Watts', 'Max Temperature',
                'Average Temperature', 'Relative Effort.1', 'Total Work', 'Number of Runs', 'Uphill Time', 'Downhill Time', 'Other Time',
                '<span class="translation_missing" title="translation missing: en-US.lib.export.portability_exporter.activities.horton_values.start_time">Start Time</span>',
                'Weighted Average Power', 'Power Count', 'Prefer Perceived Exertion', 'Commute.1', 'Total Weight Lifted', 'From Upload', 'Grade Adjusted Distance',
                'Weather Observation Time', 'Weather Condition', 'Weather Temperature', 'Apparent Temperature', 'Dewpoint', 'Humidity', 'Weather Pressure',
                'Wind Speed', 'Wind Gust', 'Wind Bearing', 'Precipitation Intensity', 'Sunset Time', 'Moon Phase', 'Bike', 'Gear', 'Precipitation Probability', 
                'Precipitation Type', 'Cloud Cover', 'Weather Visibility', 'UV Index', 'Weather Ozone', 'Sunrise Time', 'Activity Name',
                '<span class="translation_missing" title="translation missing: en-US.lib.export.portability_exporter.activities.horton_values.type">Type</span>', 
                '<span class="translation_missing" title="translation missing: en-US.lib.export.portability_exporter.activities.horton_values.jump_count">Jump Count</span>',
                '<span class="translation_missing" title="translation missing: en-US.lib.export.portability_exporter.activities.horton_values.total_grit">Total Grit</span>',
                '<span class="translation_missing" title="translation missing: en-US.lib.export.portability_exporter.activities.horton_values.avg_flow">Avg Flow</span>',
                '<span class="translation_missing" title="translation missing: en-US.lib.export.portability_exporter.activities.horton_values.flagged">Flagged</span>',
                '<span class="translation_missing" title="translation missing: en-US.lib.export.portability_exporter.activities.horton_values.avg_elapsed_speed">Avg Elapsed Speed</span>',
                '<span class="translation_missing" title="translation missing: en-US.lib.export.portability_exporter.activities.horton_values.dirt_distance">Dirt Distance</span>',
                '<span class="translation_missing" title="translation missing: en-US.lib.export.portability_exporter.activities.horton_values.newly_explored_distance">Newly Explored Distance</span>',
                '<span class="translation_missing" title="translation missing: en-US.lib.export.portability_exporter.activities.horton_values.newly_explored_dirt_distance">Newly Explored Dirt Distance</span>',
                ]

In [6]:
# dropping the columns
activities_df.drop(axis=1, columns = cols_to_drop, inplace = True)

In [7]:
# checking correct columns were dropped
print(activities_df.columns)

Index(['Activity ID', 'Activity Date', 'Activity Type', 'Elapsed Time',
       'Distance', 'Max Speed', 'Average Speed', 'Elevation Gain',
       'Elevation Loss', 'Elevation Low', 'Elevation High', 'Max Grade',
       'Average Grade', 'Calories', 'Perceived Exertion',
       'Perceived Relative Effort'],
      dtype='object')


In [8]:
print(activities_df['Activity Date'].head())

0    Jun 28, 2020, 5:03:05 PM
1    Jun 29, 2020, 9:42:34 PM
2    Jun 30, 2020, 9:01:11 PM
3     Jul 1, 2020, 8:38:04 PM
4     Jul 3, 2020, 1:44:58 PM
Name: Activity Date, dtype: object


In [9]:
# changing activity date column to datetime
activities_df['Activity Date'] = pd.to_datetime(activities_df['Activity Date'])

In [10]:
# checking the data types of each column
print(activities_df.dtypes)

Activity ID                           int64
Activity Date                datetime64[ns]
Activity Type                        object
Elapsed Time                          int64
Distance                             object
Max Speed                           float64
Average Speed                       float64
Elevation Gain                      float64
Elevation Loss                      float64
Elevation Low                       float64
Elevation High                      float64
Max Grade                           float64
Average Grade                       float64
Calories                            float64
Perceived Exertion                  float64
Perceived Relative Effort           float64
dtype: object


In [11]:
print(activities_df['Activity Date'].head())

0   2020-06-28 17:03:05
1   2020-06-29 21:42:34
2   2020-06-30 21:01:11
3   2020-07-01 20:38:04
4   2020-07-03 13:44:58
Name: Activity Date, dtype: datetime64[ns]


In [12]:
activities_df = activities_df[activities_df['Activity Date'] > '2021-01-01']

In [13]:
activities_df.set_index('Activity Date', inplace = True)

In [14]:
print(activities_df['Elapsed Time'].head())

Activity Date
2021-01-01 19:06:13    8439
2021-01-03 18:17:59    3473
2021-01-10 20:19:44    1756
2021-01-11 20:40:51    1935
2021-01-12 20:35:44    2065
Name: Elapsed Time, dtype: int64


In [15]:
activities_df['Elapsed Time'] = activities_df['Elapsed Time'] / 60

In [17]:
print(activities_df.head())

                     Activity ID  ... Perceived Relative Effort
Activity Date                     ...                          
2021-01-01 19:06:13   4551165027  ...                      67.0
2021-01-03 18:17:59   4562526489  ...                      87.0
2021-01-10 20:19:44   4600905750  ...                      29.0
2021-01-11 20:40:51   4605651974  ...                      46.0
2021-01-12 20:35:44   4610990847  ...                      34.0

[5 rows x 15 columns]


In [18]:
print(activities_df.columns)

Index(['Activity ID', 'Activity Type', 'Elapsed Time', 'Distance', 'Max Speed',
       'Average Speed', 'Elevation Gain', 'Elevation Loss', 'Elevation Low',
       'Elevation High', 'Max Grade', 'Average Grade', 'Calories',
       'Perceived Exertion', 'Perceived Relative Effort'],
      dtype='object')


In [10]:
local_legends_segments_df = pd.read_csv(local_legends_segments_path)
print(local_legends_segments_df.head())

   Segment ID      Segment Name
0      784795  Trafton Rd Climb
1     5265111       Sheddsville
2     6548500           college
3     8396145             Kings
4     8691292  Cypress St Climb
