# 1. Import Dependencies

In [57]:
from sklearn.preprocessing import StandardScaler
import tensorflow as tf
from tensorflow.keras import layers
import numpy as np
import pandas as pd
import cv2
import mediapipe as mp
from imblearn.over_sampling import SMOTE
from tensorflow.keras import regularizers
from tensorflow.keras.callbacks import Callback

# 2. Data Preparation

In [58]:
df = pd.read_csv('push_up_head_position_fix.csv')

In [59]:
jumlah_kelas_0 = df[df['class'] == 0].shape[0]
jumlah_kelas_1 = df[df['class'] == 1].shape[0]

print("Jumlah data kelas 0:", jumlah_kelas_0)
print("Jumlah data kelas 1:", jumlah_kelas_1)

Jumlah data kelas 0: 2989
Jumlah data kelas 1: 4676


In [60]:
X = df.drop('class', axis=1)
y = df['class']

In [61]:
smote = SMOTE(random_state=42)
X, y = smote.fit_resample(X, y)

In [62]:
gpus = tf.config.experimental.list_physical_devices('GPU')
for gpu in gpus:
    tf.config.experimental.set_memory_growth(gpu, True)

# 3. Modelling

In [63]:
model = tf.keras.Sequential([
    layers.Dense(64, input_dim=132, activation='relu', kernel_regularizer=regularizers.l2(0.05)),
    layers.Dense(32, activation='relu', kernel_regularizer=regularizers.l2(0.05)),
    layers.Dense(1, activation='sigmoid')  
])


In [64]:
model.compile(optimizer=tf.keras.optimizers.Adam(0.001),
              loss='binary_crossentropy',
              metrics=['accuracy'])

In [65]:
class AccuracyStopCallback(Callback):
    def __init__(self, target_accuracy=0.97):
        super(AccuracyStopCallback, self).__init__()
        self.target_accuracy = target_accuracy
        self.val_accuracy = target_accuracy

    def on_epoch_end(self, epoch, logs={}):
        if logs.get('accuracy') >= self.target_accuracy and logs.get('val_accuracy') >= self.target_accuracy:
            print(f"\nReached target accuracy of {self.target_accuracy}, stopping training!")
            self.model.stop_training = True

In [74]:
callback = AccuracyStopCallback(target_accuracy=0.93)

In [75]:
model.fit(X, y, epochs=50, batch_size=32, validation_split=0.2, callbacks=[callback])

Epoch 1/50
Epoch 2/50
Epoch 3/50
Reached target accuracy of 0.93, stopping training!


<keras.callbacks.History at 0x1fc4cf80250>

In [76]:
converter = tf.lite.TFLiteConverter.from_keras_model(model)
tflite_model = converter.convert()

with open('model_head_position.tflite', 'wb') as f:
    f.write(tflite_model)

INFO:tensorflow:Assets written to: C:\Users\ASUS\AppData\Local\Temp\tmpzxlo7po3\assets


INFO:tensorflow:Assets written to: C:\Users\ASUS\AppData\Local\Temp\tmpzxlo7po3\assets


# 4. Testing

In [77]:
mp_drawing = mp.solutions.drawing_utils 
mp_pose = mp.solutions.pose

landmarks = ["class"]
for val in range(1, 33+1):
    landmarks += ['x{}'.format(val), 'y{}'.format(val), 'z{}'.format(val), 'v{}'.format(val)]

vid_path = "test.mp4"
cap = cv2.VideoCapture(vid_path)
current_stage = ''

# Initiate Pose Model
with mp_pose.Pose(min_detection_confidence=0.5, min_tracking_confidence=0.5) as pose:
    # Streaming the video
    while cap.isOpened():
        ret, frame = cap.read()
        if not ret:
            break

        # Resize the frame to the desired window size
        frame = cv2.resize(frame, (720, 600))

        # Recolor feed
        image = cv2.cvtColor(frame, cv2.COLOR_BGR2RGB)
        image.flags.writeable = False

        # Make detections
        results = pose.process(image)

        # Recolor image back to BGR for rendering
        image.flags.writeable = True
        image = cv2.cvtColor(image, cv2.COLOR_RGB2BGR)

        mp_drawing.draw_landmarks(image, results.pose_landmarks, mp_pose.POSE_CONNECTIONS,
                                  mp_drawing.DrawingSpec(color=(245, 117, 66), thickness=2, circle_radius=4),
                                  mp_drawing.DrawingSpec(color=(245, 66, 230), thickness=2, circle_radius=2))
        try:
            if results.pose_landmarks:
                row = np.array([[res.x, res.y, res.z, res.visibility] for res in results.pose_landmarks.landmark]).flatten().tolist()
                row = np.expand_dims(row, axis=0) 
                prediction = model.predict(row)
                print(f'Result: {prediction[0][0]}')
                
                if prediction[0][0] > 0.5:
                    current_stage = 'Correct'
                elif prediction[0][0] <= 0.5:
                    current_stage = 'Wrong'

                cv2.rectangle(image, (0, 0), (250, 60), (245, 117, 16), -1)

                cv2.putText(image, 'CLASS', (95, 12), cv2.FONT_HERSHEY_SIMPLEX, 0.5, (0, 0, 0), 1, cv2.LINE_AA)
                cv2.putText(image, current_stage, (95, 40), cv2.FONT_HERSHEY_SIMPLEX, 1, (255, 255, 255), 2, cv2.LINE_AA)

                cv2.putText(image, 'PROB', (15, 12), cv2.FONT_HERSHEY_SIMPLEX, 0.5, (0, 0, 0), 1, cv2.LINE_AA)
                cv2.putText(image, str(round(prediction[0][0], 2)), (10, 40), cv2.FONT_HERSHEY_SIMPLEX, 1, (255, 255, 255), 2, cv2.LINE_AA)
        except Exception as e:
            print(f"Error: {e}")


        # Stream video result
        cv2.imshow("Raw Cam Feed", image)

        # Press 'q' to stop the video
        if cv2.waitKey(1) & 0xFF == ord('q'):
            break

    cap.release()
    cv2.destroyAllWindows()


Result: 0.03934874013066292
Result: 0.030841421335935593
Result: 0.0282343290746212
Result: 0.01638219505548477
Result: 0.009610714390873909
Result: 0.007550488691776991
Result: 0.007844788022339344
Result: 0.010091477073729038
Result: 0.014847731217741966
Result: 0.05221734568476677
Result: 0.05781984701752663
Result: 0.04192974045872688
Result: 0.11135026812553406
Result: 0.3099052906036377
Result: 0.5858107209205627
Result: 0.3959885537624359
Result: 0.5300021171569824
Result: 0.8226948976516724
Result: 0.8293960690498352
Result: 0.8680196404457092
Result: 0.8924304842948914
Result: 0.9077318906784058
Result: 0.9335623979568481
Result: 0.9447581768035889
Result: 0.9421858191490173
Result: 0.9389544725418091
Result: 0.9356670379638672
Result: 0.935415506362915
Result: 0.9334214329719543
Result: 0.9340240359306335
Result: 0.9338906407356262
Result: 0.932273805141449
Result: 0.9309136271476746
Result: 0.927023708820343
Result: 0.9282166957855225
Result: 0.930310845375061
Result: 0.9276

In [70]:
import numpy as np
import cv2
import mediapipe as mp

In [71]:
mp_drawing = mp.solutions.drawing_utils 
mp_pose = mp.solutions.pose

In [72]:
landmarks = ["class"]
for val in range(1, 33+1):
    landmarks += ['x{}'.format(val), 'y{}'.format(val), 'z{}'.format(val), 'v{}'.format(val)]

In [73]:
vid_path = "push_up_wrong_6.mp4"
cap = cv2.VideoCapture(vid_path)
# counter = 0
current_stage = ''

# Initiate Holistic Model
with mp_pose.Pose(min_detection_confidence=0.5, min_tracking_confidence=0.5) as pose:
    # Streaming the video
    while cap.isOpened():
        ret, frame = cap.read()
        if not ret:
            break

        # Resize the frame to the desired window size
        frame = cv2.resize(frame, (720, 600))

        # Recolor feed
        image = cv2.cvtColor(frame, cv2.COLOR_BGR2RGB)
        image.flags.writeable = False

        # Make detections
        results = pose.process(image)

        # Recolor image back to BGR for rendering
        image.flags.writeable = True
        image = cv2.cvtColor(image, cv2.COLOR_RGB2BGR)

        mp_drawing.draw_landmarks(image, results.pose_landmarks, mp_pose.POSE_CONNECTIONS,
                                  mp_drawing.DrawingSpec(color=(245, 117, 66), thickness=2, circle_radius=4),
                                  mp_drawing.DrawingSpec(color=(245, 66, 230), thickness=2, circle_radius=2))

        # Extract landmarks
        try:
            if results.pose_landmarks:
                row = np.array([[res.x, res.y, res.z, res.visibility] for res in results.pose_landmarks.landmark]).flatten().tolist()
                X = pd.DataFrame([row], columns=landmarks[1:])
                body_language_class = model.predict(X)[0]
                body_language_prob = model.predict_proba(X)[0]
                print(body_language_class, body_language_prob)
                
                if body_language_class == 0:
                    current_stage = 'Wrong'
                elif body_language_class == 1:
                    current_stage = 'Correct'

#                 if body_language_class == 0 and body_language_prob.max() >= .7:
#                     current_stage = 'Wrong'
#                 elif body_language_class == 1 and body_language_prob.max() >= .9:
#                     current_stage = 'Correct'
#                     counter += 1

                cv2.rectangle(image, (0, 0), (250, 60), (245, 117, 16), -1)

                cv2.putText(image, 'CLASS', (95, 12), cv2.FONT_HERSHEY_SIMPLEX, 0.5, (0, 0, 0), 1, cv2.LINE_AA)
                cv2.putText(image, current_stage, (95, 40), cv2.FONT_HERSHEY_SIMPLEX, 1, (255, 255, 255), 2, cv2.LINE_AA)

                cv2.putText(image, 'PROB', (15, 12), cv2.FONT_HERSHEY_SIMPLEX, 0.5, (0, 0, 0), 1, cv2.LINE_AA)
                cv2.putText(image, str(round(body_language_prob[np.argmax(body_language_prob)], 2)), (10, 40), cv2.FONT_HERSHEY_SIMPLEX, 1, (255, 255, 255), 2, cv2.LINE_AA)

#                 cv2.putText(image, 'COUNT', (180, 12), cv2.FONT_HERSHEY_SIMPLEX, 0.5, (0, 0, 0), 1, cv2.LINE_AA)
#                 cv2.putText(image, str(counter), (175, 40), cv2.FONT_HERSHEY_SIMPLEX, 1, (255, 255, 255), 2, cv2.LINE_AA)
        except Exception as e:
            print(f"Error: {e}")

        # Stream video result
        cv2.imshow("Raw Cam Feed", image)

        # Press 'q' to stop the video
        if cv2.waitKey(1) & 0xFF == ord('q'):
            break

    cap.release()
    cv2.destroyAllWindows()