In [1]:
import pandas as pd
import numpy as np

Pyarrow will become a required dependency of pandas in the next major release of pandas (pandas 3.0),
(to allow more performant data types, such as the Arrow string type, and better interoperability with other libraries)
but was not found to be installed on your system.
If this would cause problems for you,
please provide us feedback at https://github.com/pandas-dev/pandas/issues/54466
        
  import pandas as pd


In [34]:
# Load data
df_normalized_sets = pd.read_csv("../data/raw/workout_2024-02-05.csv")

In [35]:
df_normalized_sets

Unnamed: 0,DATE,WORKOUT,EXERCISE,MUSCLE,SET,NB_REPS,WEIGHT
0,2023-03-27,Legs,Barbell Squat,Legs,1,10,40.0
1,2023-03-27,Legs,Barbell Squat,Legs,2,8,50.0
2,2023-03-27,Legs,Barbell Squat,Legs,3,8,60.0
3,2023-03-27,Legs,Barbell Squat,Legs,4,8,50.0
4,2023-03-27,Legs,Leg Press,Legs,1,10,60.0
...,...,...,...,...,...,...,...
1719,2024-02-04,Legs,Leg Extensions,Legs,2,8,89.0
1720,2024-02-04,Legs,Leg Extensions,Legs,3,7,89.0
1721,2024-02-04,Legs,Smith machine standing calf raise,Legs,1,12,110.0
1722,2024-02-04,Legs,Smith machine standing calf raise,Legs,2,11,110.0


In [36]:
def weighted_mean(x):
    return np.average(x, weights=df_normalized_sets.loc[x.index, "NB_REPS"])

In [37]:
# Workout days dataframe
df_workout_days = df_normalized_sets \
    .groupby(["DATE", "WORKOUT", "EXERCISE"], sort=False) \
    .agg(SETS=("SET", len)) \
    .reset_index()

In [38]:
df_workout_days

Unnamed: 0,DATE,WORKOUT,EXERCISE,SETS
0,2023-03-27,Legs,Barbell Squat,4
1,2023-03-27,Legs,Leg Press,4
2,2023-03-27,Legs,Leg Extensions,4
3,2023-03-28,Back / Shoulders,Lat pull-down,4
4,2023-03-28,Back / Shoulders,Pull-up,1
...,...,...,...,...
547,2024-02-04,Legs,Barbell Squat,3
548,2024-02-04,Legs,Leg Press,3
549,2024-02-04,Legs,Seated Leg Curl,3
550,2024-02-04,Legs,Leg Extensions,3


In [39]:
# Exercises dataframe
df_exercises = df_normalized_sets.groupby("EXERCISE").agg(MUSCLE=("MUSCLE", 'first'),
                                                          AVERAGE_REPS=("NB_REPS", "mean"),
                                                          MAX_WEIGHT=("WEIGHT", "max"),
                                                          AVERAGE_WEIGHT=("WEIGHT", weighted_mean)).reset_index()

In [40]:
df_exercises

Unnamed: 0,EXERCISE,MUSCLE,AVERAGE_REPS,MAX_WEIGHT,AVERAGE_WEIGHT
0,Barbell Curl,Biceps,8.25,30.0,23.919192
1,Barbell Squat,Legs,7.743243,105.0,78.094241
2,Behind-the-head skullcrusher,Triceps,8.8,35.0,23.518939
3,Bench press,Chest,7.916667,65.0,55.326316
4,Bent Over Barbell Row,Back,8.942308,65.0,56.163441
5,Bent-over dumbbell rear delt fly,Shoulders,10.888889,8.0,6.938776
6,Cable cross-over,Chest,9.37931,23.0,16.893382
7,Cable overhead triceps extension,Triceps,7.933333,29.0,25.453782
8,Cable rope hammer curl,Biceps,10.111111,41.0,37.509158
9,Close-grip bench press,Triceps,9.333333,50.0,47.380952


In [41]:
# Sets dataframe
df_sets = df_normalized_sets.groupby(["EXERCISE", "SET"]).agg(MAX_REPS=("NB_REPS", "max"),
                                                              AVERAGE_REPS=("NB_REPS", "mean"),
                                                              MAX_WEIGHT=("WEIGHT", "max"),
                                                              AVERAGE_WEIGHT=("WEIGHT", weighted_mean)).reset_index()

In [42]:
df_sets

Unnamed: 0,EXERCISE,SET,MAX_REPS,AVERAGE_REPS,MAX_WEIGHT,AVERAGE_WEIGHT
0,Barbell Curl,1,14,10.000000,26.0,22.866667
1,Barbell Curl,2,10,8.500000,30.0,27.529412
2,Barbell Curl,3,10,9.500000,30.0,25.789474
3,Barbell Curl,4,3,3.000000,26.0,26.000000
4,Barbell Curl,5,10,10.000000,18.0,18.000000
...,...,...,...,...,...,...
182,Triceps Pushdown,3,9,7.764706,36.0,24.121212
183,Triceps dip,1,12,9.500000,10.0,4.210526
184,Triceps dip,2,11,8.750000,10.0,4.000000
185,Triceps dip,3,10,7.750000,10.0,4.516129


In [43]:
# Workouts per date dataframe
workouts_by_date = df_normalized_sets.groupby("DATE")["WORKOUT"].first()

In [44]:
workouts_by_date

DATE
2023-03-27                           Legs
2023-03-28               Back / Shoulders
2023-04-03              Chest / Shoulders
2023-04-04               Back / Shoulders
2023-04-06                           Legs
                         ...             
2024-01-27    Chest / Shoulders / Triceps
2024-01-28                           Legs
2024-02-02                  Back / Biceps
2024-02-03    Chest / Shoulders / Triceps
2024-02-04                           Legs
Name: WORKOUT, Length: 103, dtype: object

In [45]:
# Number of workout per workout dataframe
workout_occurrences = workouts_by_date.reset_index().groupby("WORKOUT").agg(OCCURRENCES=("DATE", len))
workout_occurrences = workout_occurrences.sort_values(by="OCCURRENCES", ascending=False).reset_index()

In [46]:
workout_occurrences

Unnamed: 0,WORKOUT,OCCURRENCES
0,Chest / Shoulders / Triceps,34
1,Legs,23
2,Back / Shoulders / Biceps,18
3,Back / Biceps,13
4,Arms,4
5,Back / Shoulders,4
6,Chest / Shoulders,4
7,Arms / Shoulders,1
8,Biceps / Abs,1
9,Chest,1
