In [9]:
import pandas as pd
import numpy as np

In [31]:
# Load data
df_normalized_sets = pd.read_csv("../data/raw/workout_2024-02-05.csv")

In [32]:
df_normalized_sets

Unnamed: 0,DATE,WORKOUT,EXERCISE,MUSCLE,SET,NB_REPS,WEIGHT
0,2023-03-27,Legs,Squat,Legs,1,10,40.0
1,2023-03-27,Legs,Squat,Legs,2,8,50.0
2,2023-03-27,Legs,Squat,Legs,3,8,60.0
3,2023-03-27,Legs,Squat,Legs,4,8,50.0
4,2023-03-27,Legs,Presse à cuisses,Legs,1,10,60.0
...,...,...,...,...,...,...,...
1719,2024-02-04,Legs,Leg extension,Legs,2,8,89.0
1720,2024-02-04,Legs,Leg extension,Legs,3,7,89.0
1721,2024-02-04,Legs,Extension mollet debout,Legs,1,12,110.0
1722,2024-02-04,Legs,Extension mollet debout,Legs,2,11,110.0


In [29]:
def weighted_mean(x):
    return np.average(x, weights=df_normalized_sets.loc[x.index, "NB_REPS"])

In [12]:
# Workout days dataframe
df_workout_days = df_normalized_sets \
    .groupby(["DATE", "WORKOUT", "EXERCISE"], sort=False) \
    .agg(SETS=("SET", len)) \
    .reset_index()

In [13]:
df_workout_days

Unnamed: 0,DATE,WORKOUT,EXERCISE,SETS
0,2023-03-27,Legs,Squat,4
1,2023-03-27,Legs,Presse à cuisses,4
2,2023-03-27,Legs,Leg extension,4
3,2023-03-28,Back / Shoulders,Tirage vertical,4
4,2023-03-28,Back / Shoulders,Tractions pronation,1
...,...,...,...,...
548,2024-02-04,Legs,Squat,3
549,2024-02-04,Legs,Presse à cuisses,3
550,2024-02-04,Legs,Leg curl replié,3
551,2024-02-04,Legs,Leg extension,3


In [16]:
# Exercises dataframe
df_exercises = df_normalized_sets.groupby("EXERCISE").agg(MUSCLE=("MUSCLE", 'first'),
                                                          AVERAGE_REPS=("NB_REPS", "mean"),
                                                          MAX_WEIGHT=("WEIGHT", "max"),
                                                          AVERAGE_WEIGHT=("WEIGHT", weighted_mean)).reset_index()

In [17]:
df_exercises

Unnamed: 0,EXERCISE,MUSCLE,AVERAGE_REPS,MAX_WEIGHT,AVERAGE_WEIGHT
0,Adducteurs,Legs,7.0,66.0,61.0
1,Barre au front derriere la tête,Triceps,8.8,35.0,23.518939
2,Barre au front derrière la tête incliné,Triceps,12.0,25.0,22.5
3,Cable chest flies,Chest,9.0,18.0,15.777778
4,Cable crossover,Chest,9.37931,23.0,16.893382
5,Cable lat pullovers,Back,10.179487,45.0,33.400504
6,Cable rear delt fly,Shoulders,10.179487,16.0,12.604534
7,Crunch,Abs,7.0,0.0,0.0
8,Curl barre,Biceps,8.25,30.0,23.919192
9,Curl haltères,Biceps,7.25,14.0,11.816092


In [19]:
# Sets dataframe
df_sets = df_normalized_sets.groupby(["EXERCISE", "SET"]).agg(MAX_REPS=("NB_REPS", "max"),
                                                              AVERAGE_REPS=("NB_REPS", "mean"),
                                                              MAX_WEIGHT=("WEIGHT", "max"),
                                                              AVERAGE_WEIGHT=("WEIGHT", weighted_mean)).reset_index()

In [20]:
df_sets

Unnamed: 0,EXERCISE,SET,MAX_REPS,AVERAGE_REPS,MAX_WEIGHT,AVERAGE_WEIGHT
0,Adducteurs,1,10,10.000000,59.0,59.000000
1,Adducteurs,2,4,4.000000,66.0,66.000000
2,Barre au front derriere la tête,1,16,10.300000,35.0,23.148867
3,Barre au front derriere la tête,2,14,8.482759,35.0,23.743902
4,Barre au front derriere la tête,3,10,7.620690,35.0,24.321267
...,...,...,...,...,...,...
192,Élévation latérale à la poulie,3,8,7.166667,16.0,13.418605
193,Élévations latérales,1,14,10.750000,14.0,11.383721
194,Élévations latérales,2,11,8.718750,14.0,11.566308
195,Élévations latérales,3,10,7.406250,14.0,11.510549


In [25]:
# Workouts per date dataframe
workouts_by_date = df_normalized_sets.groupby("DATE")["WORKOUT"].first()

In [26]:
workouts_by_date

DATE
2023-03-27                           Legs
2023-03-28               Back / Shoulders
2023-04-03              Chest / Shoulders
2023-04-04               Back / Shoulders
2023-04-06                           Legs
                         ...             
2024-01-27    Chest / Shoulders / Triceps
2024-01-28                           Legs
2024-02-02                  Back / Biceps
2024-02-03    Chest / Shoulders / Triceps
2024-02-04                           Legs
Name: WORKOUT, Length: 103, dtype: object

In [27]:
# Number of workout per workout dataframe
workout_occurrences = workouts_by_date.reset_index().groupby("WORKOUT").agg(OCCURRENCES=("DATE", len))
workout_occurrences = workout_occurrences.sort_values(by="OCCURRENCES", ascending=False).reset_index()

In [28]:
workout_occurrences

Unnamed: 0,WORKOUT,OCCURRENCES
0,Chest / Shoulders / Triceps,34
1,Legs,23
2,Back / Shoulders / Biceps,18
3,Back / Biceps,13
4,Arms,4
5,Back / Shoulders,4
6,Chest / Shoulders,4
7,Arms / Shoulders,1
8,Biceps / Abs,1
9,Chest,1
