In [113]:
import numpy as np
import pandas as pd
import glob
from datetime import datetime
import matplotlib.pyplot as plt
#Seaborn is a data visualization library.
#import seaborn as sns

In [114]:
strava = pd.read_json('../observations/strava/activities.json')
sugarwod = pd.read_csv('../observations/sugarwod/workouts.csv')

Create new Strava Dataframe columns 'date' and 'start_time' from 'start_date_local.'<br/>
This makes it easier to merge SugarWOD and Strava data on a common column.<br/>
We can drop the 'start_date_local' column afterward, we won't need it anymore.

In [117]:
strava['start_date_local'] = pd.to_datetime(strava['start_date_local'])
strava['start_time'] = strava['start_date_local'].dt.strftime('%H:%M:%S')
strava['date'] = strava['start_date_local'].dt.strftime('%Y-%m-%d')
strava = strava.drop(columns=['start_date_local'])

In [118]:
# Convert sugarwod data format to datetime format
sugarwod['date'] = pd.to_datetime(sugarwod['date'])
sugarwod['date'] = sugarwod['date'].dt.strftime('%Y-%m-%d')

In [136]:
# Test: Merge Strava and SugarWOD files together by date field using slimmed down dataframes for testing
strav_test=strava[['date', 'average_heartrate', 'max_heartrate']].copy()
sugar_test=sugarwod[['date', 'score_type', 'pr']].copy()

In [137]:
test = pd.concat([strav_test, sugar_test])

In [138]:
# verify there are some rows where there is data for all fields
test.loc[test['date'] == '2022-07-20']

Unnamed: 0,date,average_heartrate,max_heartrate,score_type,pr
57,2022-07-20,158.1,171.0,,
60,2022-07-20,,,Time,PR


In [140]:
# create a new DataFrame by combining rows with same date values
# as_index 
df_new = test.groupby(test['date'], as_index=False).aggregate('first')

# verify the rows are merged appropriately
df_new.loc[df_new['date'] == '2022-07-20']

Unnamed: 0,date,average_heartrate,max_heartrate,score_type,pr
154,2022-07-20,158.1,171.0,Time,PR


In [79]:
# concatenation appears to merge rows with common 'Date' field correctly
# only after the above cell's modification
pd.concat([wodify_lifts, wodify_metcons])

Unnamed: 0,Date,Component,Affiliate Name,Class Name,Result,Is Personal Record,Personal Record Description,Performance Result Type,Comment,From Weightlifting Total,Component Description,Component(2),Fully Formatted Result,Is Rx,Is Rx Plus,Result Type Label,Full Comment
0,2023-02-08,Back Squat,CrossFit Cove,Cove Fitness: 5:45 PM,1 x 1 @ 245 lbs,False,,Weight,,False,,,,,,,
1,2023-02-07,Deadlift,CrossFit Cove,Cove Fitness: 5:45 PM,1 x 10 @ 155 lbs,False,,Weight,,False,,,,,,,
2,2023-01-18,Deadlift,CrossFit Cove,Cove Fitness: Noon,1 x 8 @ 215 lbs,False,,Weight,,False,,,,,,,
3,2023-01-16,Hang Power Snatch,CrossFit Cove,Cove Fitness: 5:45 PM,1 x 1 @ 135 lbs,False,,Weight,,False,,,,,,,
4,2023-01-10,Deadlift,CrossFit Cove,Cove Fitness: Noon,1 x 5 @ 275 lbs,True,PR by 20 lbs vs. 255 on 11/02/2022,Weight,,False,,,,,,,
5,2023-01-09,Hang Power Clean,CrossFit Cove,Cove Fitness: Noon,1 x 1 @ 165 lbs,False,,Weight,,False,,,,,,,
6,2023-01-05,Bench Press,CrossFit Cove,Cove Fitness: Noon,1 x 10 @ 155 lbs,False,,Weight,,False,,,,,,,
7,2022-11-28,Overhead Squat,CrossFit Cove,Cove Fitness: 7:00 AM,1 x 1 @ 135 lbs,False,,Weight,,False,,,,,,,
8,2022-11-08,Strict Press,CrossFit Cove,Cove Fitness: 7:00 AM,1 x 5 @ 115 lbs,False,,Weight,,False,,,,,,,
9,2022-11-02,Deadlift,CrossFit Cove,Cove Fitness: 7:00 AM,1 x 3 @ 255 lbs,False,,Weight,,False,,,,,,,


Merge all Wodify files together into one Dataframe

In [141]:
wodify_lifts = pd.read_excel('../observations/wodify/PerformanceResults.xlsx')
wodify_metcons = pd.read_excel('../observations/wodify/PerformanceResultsMetcons.xlsx')
wodify_prs = pd.read_excel('../observations/wodify/PerformanceResultsPRs.xlsx')

In [151]:
wodify_lifts['date'] = wodify_lifts['Date'].dt.strftime('%Y-%m-%d')
wodify_lifts = wodify_lifts.drop(columns=['Date'])
wodify_metcons['date'] = wodify_metcons['Date'].dt.strftime('%Y-%m-%d')
wodify_metcons = wodify_metcons.drop(columns=['Date'])
wodify_prs['Peformance Result Date'] = pd.to_datetime(wodify_prs['Performance Result Date'])
wodify_prs['date'] = wodify_prs['Performance Result Date'].dt.date
wodify_prs = wodify_prs.drop(columns=['Performance Result Date'])

### TODO: how to merge Dataframes with the same Column names and preserve the data in each?

#### Notice how merging dataframes with overlapping columns results in data loss

In [172]:
# Test: Create slimmed down Dataframes for testing purposes

lift_test = wodify_lifts[['date', 'Component', 'Affiliate Name']].copy()
metcon_test = wodify_metcons[['date', 'Component', 'Affiliate Name']].copy()
prs_test = wodify_prs[['date', 'Component Name', 'Result']].copy()
wodify_test = pd.concat([lift_test, metcon_test, prs_test])
prs_test

Unnamed: 0,date,Component Name,Result
0,2023-01-10,Deadlift,1 x 5 @ 275 lbs


In [173]:
# verify there are some rows where there is data for all fields
#
# Use date '2023-01-10' because it is the only date for which 
# there is an entry in all three Dataframes

wodify_test.loc[wodify_test['date'] == '2023-01-10']

Unnamed: 0,date,Component,Affiliate Name,Component Name,Result
4,2023-01-10,Deadlift,CrossFit Cove,,
14,2023-01-10,Marston,CrossFit Cove,,


#### Where as if I selected non-overlapping columns...

In [182]:
lift_test2 = wodify_lifts[['date', 'From Weightlifting Total']].copy()
metcon_test2 = wodify_metcons[['date', 'Is Rx']].copy()
prs_test2 = wodify_prs[['date', 'Rep Scheme']].copy()
wodify_test2 = pd.concat([lift_test2, metcon_test2, prs_test2])

In [183]:
wodify_test2.loc[wodify_test2['date'] == '2023-01-10']

Unnamed: 0,date,From Weightlifting Total,Is Rx,Rep Scheme
4,2023-01-10,False,,
14,2023-01-10,,True,


In [181]:
wodify_prs

Unnamed: 0,Component Name,Result,Rep Scheme,Performance Result Comment,Class Name,Personal Record Text,Peformance Result Date,date
0,Deadlift,1 x 5 @ 275 lbs,Build to a heavy set of 5,,Cove Fitness: Noon,PR by 20 lbs vs. 255 on 11/02/2022,2023-01-10,2023-01-10
