In [1]:
import pandas as pd
import numpy as np
import seaborn as sb
import matplotlib.pyplot as plt

sb.set()

### Gym Members Dataset

#### Import Gym Members Dataset

In [2]:
exercise_df = pd.read_csv('../datasets/gym_members_exercise_tracking.csv')

exercise_df.rename(columns={'Weight (kg)':'Weight',
                            'Height (m)':'Height',
                            'Session_Duration (hours)':'Duration',
                            'Water_Intake (liters)':'Water_Intake',
                            'Workout_Frequency (days/week)':'Frequency'},
                            inplace=True)

exercise_df.head()

Unnamed: 0,Age,Gender,Weight,Height,Max_BPM,Avg_BPM,Resting_BPM,Duration,Calories_Burned,Workout_Type,Fat_Percentage,Water_Intake,Frequency,Experience_Level,BMI
0,56,Male,88.3,1.71,180,157,60,1.69,1313.0,Yoga,12.6,3.5,4,3,30.2
1,46,Female,74.9,1.53,179,151,66,1.3,883.0,HIIT,33.9,2.1,4,2,32.0
2,32,Female,68.1,1.66,167,122,54,1.11,677.0,Cardio,33.4,2.3,4,2,24.71
3,25,Male,53.2,1.7,190,164,56,0.59,532.0,Strength,28.8,2.1,3,1,18.41
4,38,Male,46.1,1.79,188,158,68,0.64,556.0,Strength,29.2,2.8,3,1,14.39


In [3]:
print('Data Type: ', type(exercise_df))
print('Data Dimension: ', exercise_df.shape)

Data Type:  <class 'pandas.core.frame.DataFrame'>
Data Dimension:  (973, 15)


In [4]:
exercise_df.info()

<class 'pandas.core.frame.DataFrame'>
RangeIndex: 973 entries, 0 to 972
Data columns (total 15 columns):
 #   Column            Non-Null Count  Dtype  
---  ------            --------------  -----  
 0   Age               973 non-null    int64  
 1   Gender            973 non-null    object 
 2   Weight            973 non-null    float64
 3   Height            973 non-null    float64
 4   Max_BPM           973 non-null    int64  
 5   Avg_BPM           973 non-null    int64  
 6   Resting_BPM       973 non-null    int64  
 7   Duration          973 non-null    float64
 8   Calories_Burned   973 non-null    float64
 9   Workout_Type      973 non-null    object 
 10  Fat_Percentage    973 non-null    float64
 11  Water_Intake      973 non-null    float64
 12  Frequency         973 non-null    int64  
 13  Experience_Level  973 non-null    int64  
 14  BMI               973 non-null    float64
dtypes: float64(7), int64(6), object(2)
memory usage: 114.2+ KB


In [5]:
exercise_df.describe()

Unnamed: 0,Age,Weight,Height,Max_BPM,Avg_BPM,Resting_BPM,Duration,Calories_Burned,Fat_Percentage,Water_Intake,Frequency,Experience_Level,BMI
count,973.0,973.0,973.0,973.0,973.0,973.0,973.0,973.0,973.0,973.0,973.0,973.0,973.0
mean,38.683453,73.854676,1.72258,179.883864,143.766701,62.223022,1.256423,905.422405,24.976773,2.626619,3.321686,1.809866,24.912127
std,12.180928,21.2075,0.12772,11.525686,14.345101,7.32706,0.343033,272.641516,6.259419,0.600172,0.913047,0.739693,6.660879
min,18.0,40.0,1.5,160.0,120.0,50.0,0.5,303.0,10.0,1.5,2.0,1.0,12.32
25%,28.0,58.1,1.62,170.0,131.0,56.0,1.04,720.0,21.3,2.2,3.0,1.0,20.11
50%,40.0,70.0,1.71,180.0,143.0,62.0,1.26,893.0,26.2,2.6,3.0,2.0,24.16
75%,49.0,86.0,1.8,190.0,156.0,68.0,1.46,1076.0,29.3,3.1,4.0,2.0,28.56
max,59.0,129.9,2.0,199.0,169.0,74.0,2.0,1783.0,35.0,3.7,5.0,3.0,49.84


#### Data Cleaning and Feature Engineering

In [6]:
# check if everyone has the right BMI based on weight and height values
incorrect_bmi_rows = []

for index, row in exercise_df.iterrows():
    if row['BMI'] != round(row['Weight']/(row['Height']**2),2):
        incorrect_bmi_rows.append(index)

incorrect_bmi_rows

# all the BMI recorded is correct

[]

In [7]:
# dropping experience level as it is an arbitraury value that may not be the same comparison or scale for everyone

exercise_df = exercise_df.drop(columns=['Experience_Level'])

exercise_df.head()

Unnamed: 0,Age,Gender,Weight,Height,Max_BPM,Avg_BPM,Resting_BPM,Duration,Calories_Burned,Workout_Type,Fat_Percentage,Water_Intake,Frequency,BMI
0,56,Male,88.3,1.71,180,157,60,1.69,1313.0,Yoga,12.6,3.5,4,30.2
1,46,Female,74.9,1.53,179,151,66,1.3,883.0,HIIT,33.9,2.1,4,32.0
2,32,Female,68.1,1.66,167,122,54,1.11,677.0,Cardio,33.4,2.3,4,24.71
3,25,Male,53.2,1.7,190,164,56,0.59,532.0,Strength,28.8,2.1,3,18.41
4,38,Male,46.1,1.79,188,158,68,0.64,556.0,Strength,29.2,2.8,3,14.39


In [8]:
# rate 1-5 for fitness index score for each of the following category

# Underweight (<18.5) = Score 1
# Normal (18.5 - 24.9) = Score 5
# Overweight (25 - 29.9) = Score 3
# Obesity (30 - 34.9) = Score 2
# Morbid Obesity (>35) = Score 1

for index, row in exercise_df.iterrows():
    if row['BMI'] < 18.5:
        exercise_df.loc[index, 'BMI_score'] = 1
    elif row['BMI'] >= 18.5 or row['BMI'] <= 24.9:
        exercise_df.loc[index, 'BMI_score'] = 5
    elif row['BMI'] >= 25 or row['BMI'] <= 29.9:
        exercise_df.loc[index, 'BMI_score'] = 3
    elif row['BMI'] >= 30 or row['BMI'] <= 34.9:
        exercise_df.loc[index, 'BMI_score'] = 2
    else:
        exercise_df.loc[index, 'BMI_score'] = 1

exercise_df.head()

Unnamed: 0,Age,Gender,Weight,Height,Max_BPM,Avg_BPM,Resting_BPM,Duration,Calories_Burned,Workout_Type,Fat_Percentage,Water_Intake,Frequency,BMI,BMI_score
0,56,Male,88.3,1.71,180,157,60,1.69,1313.0,Yoga,12.6,3.5,4,30.2,5.0
1,46,Female,74.9,1.53,179,151,66,1.3,883.0,HIIT,33.9,2.1,4,32.0,5.0
2,32,Female,68.1,1.66,167,122,54,1.11,677.0,Cardio,33.4,2.3,4,24.71,5.0
3,25,Male,53.2,1.7,190,164,56,0.59,532.0,Strength,28.8,2.1,3,18.41,1.0
4,38,Male,46.1,1.79,188,158,68,0.64,556.0,Strength,29.2,2.8,3,14.39,1.0


In [11]:
# age    target heart rate     max heart rate
# 25           98-166                195
# 35           93-157                185
# 45           88-149                175
# 55           83-140                165
# 65           78-132                155

# Healthy resting heart rate is between 50-100 heartbeats per min

for index, row in exercise_df.iterrows():
    if row['Age'] < 35 and (row['Avg_BPM'] < 98 and row['Avg_BPM'] > 166):
        exercise_df.loc[index, 'Avg_BPM_Score'] = 'Unhealthy'

In [17]:
exercise_df.loc[(exercise_df['Age'] < 35) & (exercise_df['Avg_BPM'] < 98) & (exercise_df['Avg_BPM'] > 166)]

Unnamed: 0,Age,Gender,Weight,Height,Max_BPM,Avg_BPM,Resting_BPM,Duration,Calories_Burned,Workout_Type,Fat_Percentage,Water_Intake,Frequency,BMI,BMI_score
