In [152]:
using CSV
using DataFrames
using MLJ

In [153]:
# Read the CSV file into a DataFrame
df = CSV.read("megaGymDataset.csv", DataFrame)

# Display summary statistics of the DataFrame
describe(df)

# Get the number of rows and columns in the DataFrame
nrow(df), ncol(df)

(2918, 9)

In [154]:
# Select the two columns from the original DataFrame
selected_columns = df[:, [:Title,  :BodyPart]]
# Create a new DataFrame with the selected columns
new_df = DataFrame(selected_columns)
nrow(new_df), ncol(new_df)

(2918, 2)

In [155]:
schema(new_df)

┌──────────┬──────────┬──────────┐
│[22m names    [0m│[22m scitypes [0m│[22m types    [0m│
├──────────┼──────────┼──────────┤
│ Title    │ Textual  │ String   │
│ BodyPart │ Textual  │ String15 │
└──────────┴──────────┴──────────┘


In [156]:
using Flux

X = new_df.Title  # Exercise names
y = new_df.BodyPart   # Target body parts

2918-element PooledArrays.PooledVector{String15, UInt32, Vector{UInt32}}:
 "Abdominals"
 "Abdominals"
 "Abdominals"
 "Abdominals"
 "Abdominals"
 "Abdominals"
 "Abdominals"
 "Abdominals"
 "Abdominals"
 "Abdominals"
 "Abdominals"
 "Abdominals"
 "Abdominals"
 ⋮
 "Triceps"
 "Triceps"
 "Triceps"
 "Triceps"
 "Triceps"
 "Triceps"
 "Triceps"
 "Triceps"
 "Triceps"
 "Triceps"
 "Triceps"
 "Triceps"

In [157]:
# Step 2: Feature Extraction
# In this simple example, we'll use one-hot encoding for the exercise names
X_encoded = Flux.onehotbatch(X, sort(unique(X))) |> Matrix{Float32}
X_unique = sort(unique(X))

2909-element Vector{String}:
 "1.5-rep push-up"
 "3/4 sit-up"
 "30 Arms BFR Close-Grip Push-Up"
 "30 Arms BFR Dumbbell Kick-Back"
 "30 Arms BFR High Cable Curl"
 "30 Arms BFR Machine Preacher Curl"
 "30 Arms Barbell Skullcrusher"
 "30 Arms Cable Concentration Curl"
 "30 Arms Cable Rope Hammer Curl"
 "30 Arms Cable Rope Overhead Triceps Extension"
 "30 Arms Cable Rope Push-Down"
 "30 Arms Cable Straight-Bar Curl"
 "30 Arms Cable Straight-Bar Push-Down"
 ⋮
 "Wide-grip bench press"
 "Wide-grip hands-elevated push-up"
 "Windmills"
 "World's greatest stretch"
 "Wrist Circles"
 "Wrist Roller"
 "X-body V-up"
 "Yates Row"
 "Yates Row Reverse Grip"
 "Yoga plex"
 "Yoke Walk"
 "Zercher squat"

In [158]:
using Statistics
# Standardize the input features using z-score normalization
X_standardized = (X_encoded .- mean(X_encoded, dims=1)) ./ std(X_encoded, dims=1)


2909×2918 Matrix{Float32}:
 -0.0185408  -0.0185408  -0.0185408  …  -0.0185408  -0.0185408  -0.0185408
 -0.0185408  -0.0185408  -0.0185408     -0.0185408  -0.0185408  -0.0185408
 -0.0185408  -0.0185408  -0.0185408     -0.0185408  -0.0185408  -0.0185408
 -0.0185408  -0.0185408  -0.0185408     -0.0185408  -0.0185408  -0.0185408
 -0.0185408  -0.0185408  -0.0185408     -0.0185408  -0.0185408  -0.0185408
 -0.0185408  -0.0185408  -0.0185408  …  -0.0185408  -0.0185408  -0.0185408
 -0.0185408  -0.0185408  -0.0185408     -0.0185408  -0.0185408  -0.0185408
 -0.0185408  -0.0185408  -0.0185408     -0.0185408  -0.0185408  -0.0185408
 -0.0185408  -0.0185408  -0.0185408     -0.0185408  -0.0185408  -0.0185408
 -0.0185408  -0.0185408  -0.0185408     -0.0185408  -0.0185408  -0.0185408
 -0.0185408  -0.0185408  -0.0185408  …  -0.0185408  -0.0185408  -0.0185408
 -0.0185408  -0.0185408  -0.0185408     -0.0185408  -0.0185408  -0.0185408
 -0.0185408  -0.0185408  -0.0185408     -0.0185408  -0.0185408  -0.018540

In [159]:
# Encode target labels as integers
label_mapping = Dict(unique(y) .=> 1:length(unique(y)))
y_encoded = [label_mapping[label] for label in y]

2918-element Vector{Int64}:
  1
  1
  1
  1
  1
  1
  1
  1
  1
  1
  1
  1
  1
  ⋮
 17
 17
 17
 17
 17
 17
 17
 17
 17
 17
 17
 17

In [160]:
# Step 3: Model Training
# Split the data into training and testing sets
data = [(x, y) for (x, y) in zip(eachrow(X_encoded), y_encoded)]
train_size = Int(round(length(data) * 0.8))
train_data = data[1:train_size]
test_data = data[train_size+1:end]

582-element Vector{Tuple{SubArray{Float32, 1, Matrix{Float32}, Tuple{Int64, Base.Slice{Base.OneTo{Int64}}}, true}, Int64}}:
 ([0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0  …  0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0], 15)
 ([0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0  …  0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0], 15)
 ([0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0  …  0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0], 15)
 ([0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0  …  0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0], 15)
 ([0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0  …  0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0], 15)
 ([0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0  …  0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0], 15)
 ([0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0  …  0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0], 15)
 ([0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0  …  0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0

In [176]:
# Define the model architecture
input_size = size(X_encoded, 2)
model = Chain(
    Dense(input_size, 64, relu),
    Dense(64, 32, relu),
    Dense(32, length(label_mapping))
)

Chain(
  Dense(2918 => 64, relu),              [90m# 186_816 parameters[39m
  Dense(64 => 32, relu),                [90m# 2_080 parameters[39m
  Dense(32 => 17),                      [90m# 561 parameters[39m
) [90m                  # Total: 6 arrays, [39m189_457 parameters, 740.441 KiB.

In [177]:
# Define the loss function
loss(x, y) = Flux.crossentropy(softmax(model(x)), Flux.onehotbatch(y, 1:length(label_mapping)))
# Define the optimizer
optimizer = Flux.ADAM()

Adam(0.001, (0.9, 0.999), 1.0e-8, IdDict{Any, Any}())

In [178]:
# Train the model
for epoch in 1:10
    Flux.train!(loss, Flux.params(model), train_data, optimizer)
end

In [179]:
# Step 4: Model Evaluation
# Make predictions on the test set
X_test = [x for (x, _) in test_data]
y_test = [y for (_, y) in test_data]
y_pred = Flux.argmax(model.(X_test), dims=2)

582-element Vector{Int64}:
   1
   2
   3
   4
   5
   6
   7
   8
   9
  10
  11
  12
  13
   ⋮
 571
 572
 573
 574
 575
 576
 577
 578
 579
 580
 581
 582

In [180]:
# Calculate accuracy
accuracy = sum(y_pred .== reshape(y_test, :)') / length(y_test)

println("Accuracy: $accuracy")

Accuracy: 1.0


In [181]:
println("Dimension of X_encoded: ", size(X_encoded))
println("Dimension of X_standardized: ", size(X_standardized))


Dimension of X_encoded: (2909, 2918)
Dimension of X_standardized: (2909, 2918)


In [182]:
# Step 5: Model Deployment
# You can now use the trained model to make predictions on new exercise names

exercise_name = "Wrist Roller" # Example exercise name for prediction
exercise_encoded = Flux.onehotbatch([exercise_name], sort(unique(X))) |> Matrix{Float32}
predicted_body_part = Flux.argmax(model(exercise_encoded), dims=2)[1]

inverse_label_mapping = invert(label_mapping)
predicted_body_part_label = inverse_label_mapping[predicted_body_part]

println("Predicted body part: $predicted_body_part_label")

LoadError: DimensionMismatch: layer Dense(2918 => 64, relu) expects size(input, 1) == 2918, but got 2909×1 Matrix{Float32}