In [1]:
import os
import json
import pickle
import numpy as np
import pandas as pd

from sklearn.model_selection import train_test_split
from sklearn.preprocessing import LabelEncoder
from sklearn.metrics import confusion_matrix, classification_report

import tensorflow as tf
from tensorflow.keras.models import Sequential
from tensorflow.keras.layers import Input, Masking, LSTM, Bidirectional, Dense, Dropout
from tensorflow.keras.utils import to_categorical


In [2]:
base_path = r"C:\Users\andjelija.jovanovic\Desktop\movement project\data"

landmarks_path   = os.path.join(base_path, "landmarks.csv")
angles_path      = os.path.join(base_path, "angles.csv")
labels_path      = os.path.join(base_path, "labels.csv")
dist3d_path      = os.path.join(base_path, "calculated_3d_distances.csv")

landmarks = pd.read_csv(landmarks_path)
angles    = pd.read_csv(angles_path)
labels    = pd.read_csv(labels_path)
dist3d    = pd.read_csv(dist3d_path)

print("LANDMARKS:", landmarks.shape)
print("ANGLES:", angles.shape)
print("DIST3D:", dist3d.shape)
print("LABELS:", labels.shape)

landmarks.head()


LANDMARKS: (83922, 101)
ANGLES: (83922, 9)
DIST3D: (83922, 18)
LABELS: (448, 2)


Unnamed: 0,vid_id,frame_order,x_nose,y_nose,z_nose,x_left_eye_inner,y_left_eye_inner,z_left_eye_inner,x_left_eye,y_left_eye,...,z_left_heel,x_right_heel,y_right_heel,z_right_heel,x_left_foot_index,y_left_foot_index,z_left_foot_index,x_right_foot_index,y_right_foot_index,z_right_foot_index
0,0,0,-0.645851,-59.99263,-80.985,0.560464,-62.55525,-76.38421,1.362609,-62.543415,...,42.49331,-4.885307,67.51277,40.333897,5.356711,73.93424,11.78033,-5.852993,73.78203,9.016774
1,0,1,-0.290473,-61.06931,-78.4787,0.881309,-63.67481,-73.719315,1.639633,-63.648945,...,48.48736,-4.753275,64.96957,45.439384,5.492989,73.17727,18.108229,-6.038326,72.70349,14.22201
2,0,2,-0.378156,-61.102,-86.33219,0.968603,-63.431263,-81.922356,1.788657,-63.423435,...,49.983517,-4.517086,64.51098,48.99688,5.433758,72.199036,19.192911,-5.51349,71.79309,17.322145
3,0,3,-0.004211,-61.846817,-98.9491,1.419466,-64.42455,-94.67355,2.102673,-64.361015,...,53.7625,-4.67454,64.720245,53.58178,5.76875,72.69629,23.325266,-5.238461,72.11217,21.887375
4,0,4,0.215262,-59.717796,-96.07627,1.495876,-62.19619,-91.90727,2.157559,-62.149612,...,53.40909,-4.098778,62.49023,52.845634,5.633003,70.438194,23.657516,-5.467475,70.08317,22.496626


In [3]:
data = landmarks.merge(
    angles,
    on=["vid_id", "frame_order"],
    how="inner"
)

data = data.merge(
    dist3d,
    on=["vid_id", "frame_order"],
    how="inner"
)

data = data.merge(
    labels,
    on="vid_id",
    how="inner"
)

print("DATA shape:", data.shape)
data.head()


DATA shape: (83922, 125)


Unnamed: 0,vid_id,frame_order,x_nose,y_nose,z_nose,x_left_eye_inner,y_left_eye_inner,z_left_eye_inner,x_left_eye,y_left_eye,...,right_shoulder_right_ankle,left_hip_right_wrist,right_hip_left_wrist,left_elbow_right_elbow,left_knee_right_knee,left_wrist_right_wrist,left_ankle_right_ankle,left_hip_avg_left_wrist_left_ankle,right_hip_avg_right_wrist_right_ankle,class
0,0,0,-0.645851,-59.99263,-80.985,0.560464,-62.55525,-76.38421,1.362609,-62.543415,...,127.11895,51.88966,34.165817,29.366705,11.997074,34.49884,9.536471,34.766296,34.263794,jumping_jack
1,0,1,-0.290473,-61.06931,-78.4787,0.881309,-63.67481,-73.719315,1.639633,-63.648945,...,126.65802,49.100586,33.16866,28.751902,11.787817,35.02561,9.934409,34.764217,33.39555,jumping_jack
2,0,2,-0.378156,-61.102,-86.33219,0.968603,-63.431263,-81.922356,1.788657,-63.423435,...,132.46251,51.61608,40.597023,27.868805,10.669149,36.084465,9.258501,34.062817,33.561043,jumping_jack
3,0,3,-0.004211,-61.846817,-98.9491,1.419466,-64.42455,-94.67355,2.102673,-64.361015,...,141.82007,59.323757,51.28837,28.674877,10.537262,39.659523,9.53826,34.75624,34.213974,jumping_jack
4,0,4,0.215262,-59.717796,-96.07627,1.495876,-62.19619,-91.90727,2.157559,-62.149612,...,138.79196,58.721348,47.935104,30.19299,10.41341,41.268585,9.376397,30.759424,31.533577,jumping_jack


In [4]:
def normalize_landmarks_df(df):
    df = df.copy()
    req = ["x_left_hip", "y_left_hip", "z_left_hip",
           "x_right_hip", "y_right_hip", "z_right_hip"]
    if not all(c in df.columns for c in req):
        print("Nedostaju hip kolone, preskačem normalizaciju.")
        return df

    df["mid_hip_x"] = (df["x_left_hip"] + df["x_right_hip"]) / 2
    df["mid_hip_y"] = (df["y_left_hip"] + df["y_right_hip"]) / 2
    df["mid_hip_z"] = (df["z_left_hip"] + df["z_right_hip"]) / 2

    coord_cols = [c for c in df.columns if c.startswith(("x_", "y_", "z_"))]

    for c in coord_cols:
        if c.startswith("x_"):
            df[c] = df[c] - df["mid_hip_x"]
        elif c.startswith("y_"):
            df[c] = df[c] - df["mid_hip_y"]
        elif c.startswith("z_"):
            df[c] = df[c] - df["mid_hip_z"]

    df = df.drop(columns=["mid_hip_x", "mid_hip_y", "mid_hip_z"])
    return df

data = data.groupby("vid_id", group_keys=False).apply(normalize_landmarks_df)
data.head()


  data = data.groupby("vid_id", group_keys=False).apply(normalize_landmarks_df)


Unnamed: 0,vid_id,frame_order,x_nose,y_nose,z_nose,x_left_eye_inner,y_left_eye_inner,z_left_eye_inner,x_left_eye,y_left_eye,...,right_shoulder_right_ankle,left_hip_right_wrist,right_hip_left_wrist,left_elbow_right_elbow,left_knee_right_knee,left_wrist_right_wrist,left_ankle_right_ankle,left_hip_avg_left_wrist_left_ankle,right_hip_avg_right_wrist_right_ankle,class
0,0,0,-0.645851,-59.992637,-80.985,0.560464,-62.555257,-76.38421,1.362609,-62.543422,...,127.11895,51.88966,34.165817,29.366705,11.997074,34.49884,9.536471,34.766296,34.263794,jumping_jack
1,0,1,-0.29048,-61.06931,-78.4787,0.881303,-63.67481,-73.719315,1.639626,-63.648945,...,126.65802,49.100586,33.16866,28.751902,11.787817,35.02561,9.934409,34.764217,33.39555,jumping_jack
2,0,2,-0.378149,-61.102,-86.33219,0.96861,-63.431263,-81.922356,1.788664,-63.423435,...,132.46251,51.61608,40.597023,27.868805,10.669149,36.084465,9.258501,34.062817,33.561043,jumping_jack
3,0,3,-0.004218,-61.846824,-98.9491,1.41946,-64.424557,-94.67355,2.102666,-64.361022,...,141.82007,59.323757,51.28837,28.674877,10.537262,39.659523,9.53826,34.75624,34.213974,jumping_jack
4,0,4,0.215262,-59.717803,-96.07627,1.495876,-62.196197,-91.90727,2.157559,-62.149619,...,138.79196,58.721348,47.935104,30.19299,10.41341,41.268585,9.376397,30.759424,31.533577,jumping_jack


In [5]:

yolo_joints = [
    "nose",
    "left_eye", "right_eye",
    "left_ear", "right_ear",
    "left_shoulder", "right_shoulder",
    "left_elbow", "right_elbow",
    "left_wrist", "right_wrist",
    "left_hip", "right_hip",
    "left_knee", "right_knee",
    "left_ankle", "right_ankle",
]


coord_cols = [f"{axis}_{name}" for name in yolo_joints for axis in ["x", "y", "z"]]


angle_cols = [
    "right_elbow_right_shoulder_right_hip",
    "left_elbow_left_shoulder_left_hip",
    "right_hip_right_knee_right_ankle",
    "left_hip_left_knee_left_ankle",
    "right_wrist_right_elbow_right_shoulder",
    "left_wrist_left_elbow_left_shoulder",
]


dist_cols = [
    "left_shoulder_left_wrist",
    "right_shoulder_right_wrist",
    "left_hip_left_ankle",
    "right_hip_right_ankle",
    "left_hip_left_wrist",
    "right_hip_right_wrist",
    "left_shoulder_left_ankle",
    "right_shoulder_right_ankle",
]

feature_cols = coord_cols + angle_cols + dist_cols

missing = [c for c in feature_cols if c not in data.columns]
print("Nedostaju kolone:", missing)
print("Ukupno feature-a:", len(feature_cols))
feature_cols[:20]


Nedostaju kolone: []
Ukupno feature-a: 65


['x_nose',
 'y_nose',
 'z_nose',
 'x_left_eye',
 'y_left_eye',
 'z_left_eye',
 'x_right_eye',
 'y_right_eye',
 'z_right_eye',
 'x_left_ear',
 'y_left_ear',
 'z_left_ear',
 'x_right_ear',
 'y_right_ear',
 'z_right_ear',
 'x_left_shoulder',
 'y_left_shoulder',
 'z_left_shoulder',
 'x_right_shoulder',
 'y_right_shoulder']

In [None]:
TARGET_LEN = 120  

def resample_group(group, target_len=TARGET_LEN):
    """
    group: DataFrame jednog videa, sortiran po frame_order
    vraća: np.array shape (target_len, num_features)
    """
    group = group.sort_values("frame_order")
    arr = group[feature_cols].values.astype("float32")
    n = arr.shape[0]

    if n == target_len:
        return arr
    elif n < 2:
      
        return np.repeat(arr, target_len, axis=0)[:target_len]


    idxs = np.linspace(0, n - 1, num=target_len).astype(int)
    return arr[idxs]


In [7]:
videos = []
video_labels = []

for vid, group in data.groupby("vid_id"):
    seq = resample_group(group, TARGET_LEN)
    label = group["class"].iloc[0]

    videos.append(seq)
    video_labels.append(label)

len(videos), len(video_labels), videos[0].shape, video_labels[0]


(448, 448, (120, 65), 'jumping_jack')

In [8]:
X_train_list, X_temp_list, y_train, y_temp = train_test_split(
    videos,
    video_labels,
    test_size=0.3,
    random_state=42,
    stratify=video_labels
)

X_val_list, X_test_list, y_val, y_test = train_test_split(
    X_temp_list,
    y_temp,
    test_size=0.5,
    random_state=42,
    stratify=y_temp
)

len(X_train_list), len(X_val_list), len(X_test_list)


(313, 67, 68)

In [9]:
le = LabelEncoder()

y_train_enc = to_categorical(le.fit_transform(y_train))
y_val_enc   = to_categorical(le.transform(y_val))
y_test_enc  = to_categorical(le.transform(y_test))

num_classes = y_train_enc.shape[1]
num_classes, le.classes_


(5,
 array(['jumping_jack', 'pull_up', 'push_up', 'situp', 'squat'],
       dtype='<U12'))

In [10]:
X_train = np.stack(X_train_list, axis=0)  
X_val   = np.stack(X_val_list, axis=0)
X_test  = np.stack(X_test_list, axis=0)

X_train.shape, X_val.shape, X_test.shape


((313, 120, 65), (67, 120, 65), (68, 120, 65))

In [None]:
seq_len = TARGET_LEN
num_features = X_train.shape[2]

model = Sequential([
    Input(shape=(seq_len, num_features)),
    
    Bidirectional(LSTM(128, return_sequences=True)),
    Dropout(0.3),

    Bidirectional(LSTM(64, return_sequences=False)),
    Dropout(0.3),

    Dense(128, activation="relu"),
    Dropout(0.3),

    Dense(num_classes, activation="softmax")
])

model.compile(
    optimizer=tf.keras.optimizers.Adam(learning_rate=1e-3),
    loss="categorical_crossentropy",
    metrics=["accuracy"]
)

model.summary()


In [12]:
from tensorflow.keras.callbacks import EarlyStopping, ModelCheckpoint

checkpoint_path = "exercise_lstm_yolo_v2_best.keras"

callbacks = [
    EarlyStopping(
        monitor="val_loss",
        patience=6,
        restore_best_weights=True
    ),
    ModelCheckpoint(
        filepath=checkpoint_path,
        monitor="val_loss",
        save_best_only=True
    )
]

history = model.fit(
    X_train, y_train_enc,
    epochs=40,
    batch_size=8,
    validation_data=(X_val, y_val_enc),
    callbacks=callbacks,
    verbose=1
)


Epoch 1/40
[1m40/40[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m8s[0m 99ms/step - accuracy: 0.5399 - loss: 1.2195 - val_accuracy: 0.7910 - val_loss: 0.7696
Epoch 2/40
[1m40/40[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m3s[0m 80ms/step - accuracy: 0.7764 - loss: 0.6214 - val_accuracy: 0.7910 - val_loss: 0.6319
Epoch 3/40
[1m40/40[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m3s[0m 77ms/step - accuracy: 0.8115 - loss: 0.4877 - val_accuracy: 0.6866 - val_loss: 0.8529
Epoch 4/40
[1m40/40[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m3s[0m 72ms/step - accuracy: 0.8403 - loss: 0.4175 - val_accuracy: 0.7463 - val_loss: 0.8284
Epoch 5/40
[1m40/40[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m3s[0m 77ms/step - accuracy: 0.8786 - loss: 0.3568 - val_accuracy: 0.8657 - val_loss: 0.4601
Epoch 6/40
[1m40/40[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m3s[0m 81ms/step - accuracy: 0.8818 - loss: 0.3187 - val_accuracy: 0.7164 - val_loss: 0.7421
Epoch 7/40
[1m40/40[0m [32m━━━━

In [13]:
test_loss, test_acc = model.evaluate(X_test, y_test_enc, verbose=1)
print(f"Test accuracy: {test_acc:.3f}, test loss: {test_loss:.3f}")


[1m3/3[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m0s[0m 42ms/step - accuracy: 0.8676 - loss: 0.5444
Test accuracy: 0.868, test loss: 0.544


In [14]:
y_test_pred_probs = model.predict(X_test)
y_test_pred = np.argmax(y_test_pred_probs, axis=1)
y_test_true = np.argmax(y_test_enc, axis=1)

print("Confusion matrix:")
print(confusion_matrix(y_test_true, y_test_pred))

print("\nKlasifikacioni izveštaj:\n")
print(classification_report(y_test_true, y_test_pred, target_names=le.classes_))


[1m3/3[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m1s[0m 315ms/step
Confusion matrix:
[[13  3  0  0  0]
 [ 0 14  1  0  0]
 [ 0  0 15  0  0]
 [ 0  0  1 11  0]
 [ 0  1  1  2  6]]

Klasifikacioni izveštaj:

              precision    recall  f1-score   support

jumping_jack       1.00      0.81      0.90        16
     pull_up       0.78      0.93      0.85        15
     push_up       0.83      1.00      0.91        15
       situp       0.85      0.92      0.88        12
       squat       1.00      0.60      0.75        10

    accuracy                           0.87        68
   macro avg       0.89      0.85      0.86        68
weighted avg       0.89      0.87      0.86        68



In [None]:
def predict_sequence(seq_2d):
    """
    seq_2d: np.array [n_frames, num_features] – NE mora da bude TARGET_LEN
    """
   
    if seq_2d.shape[0] != TARGET_LEN:
        n = seq_2d.shape[0]
        idxs = np.linspace(0, n - 1, num=TARGET_LEN).astype(int)
        seq_2d = seq_2d[idxs]

    seq_2d = seq_2d.astype("float32")
    seq_3d = np.expand_dims(seq_2d, axis=0)

    probs = model.predict(seq_3d, verbose=0)[0]
    idx = int(np.argmax(probs))
    class_name = le.inverse_transform([idx])[0]
    confidence = float(probs[idx])
    return class_name, confidence


test_seq = X_test_list[0]
true_label = y_test[0]
pred_label, conf = predict_sequence(test_seq)

print("STVARNA:", true_label)
print("PREDIKCIJA:", pred_label, f"({conf*100:.1f}%)")


STVARNA: push_up
PREDIKCIJA: push_up (58.5%)


In [None]:
model_name = "exercise_lstm_yolo_v2.keras"


model.save(model_name)


with open("label_encoder.pkl", "wb") as f:
    pickle.dump(le, f)


with open("feature_cols.json", "w") as f:
    json.dump(feature_cols, f)

with open("config.json", "w") as f:
    json.dump({
        "seq_len": int(TARGET_LEN),
        "num_features": int(num_features)
    }, f)

os.listdir()


['best_lstm_angles.keras',
 'best_lstm_model.keras',
 'config.json',
 'data',
 'exercise_lstm_angles.keras',
 'exercise_lstm_final.keras',
 'exercise_lstm_yolo_compatible_best.keras',
 'exercise_lstm_yolo_v2.keras',
 'exercise_lstm_yolo_v2_best.keras',
 'exercise_ltsm_v2_yolo.ipynb',
 'exercise_ltsm_yolo.ipynb',
 'feature_cols.json',
 'label_encoder.pkl',
 'model_training.ipynb',
 'notebooks',
 'pose_capture.ipynb',
 'pose_capture.py',
 'realtime_exercise_cam.py',
 'src',
 'venv',
 'videos',
 'yolov8n-pose.pt']