In [1]:
import pandas as pd
from tensorflow import keras
from sklearn.model_selection import train_test_split
from data import BodyPart
import tensorflow as tf

In [2]:
def load_csv(csv_path):
    df = pd.read_csv(csv_path)
    df.drop(['filename'],axis=1, inplace=True)
    classes = df.pop('class_name').unique()
    y = df.pop('class_no')
    
    X = df.astype('float64')
    y = keras.utils.to_categorical(y)
    
    return X, y, classes

In [3]:
def preprocess_data(X_train):
    processed_X_train = []
    for i in range(X_train.shape[0]):
        embedding = landmarks_to_embedding(tf.reshape(tf.convert_to_tensor(X_train.iloc[i]), (1, 51)))
        processed_X_train.append(tf.reshape(embedding, (34)))
    return tf.convert_to_tensor(processed_X_train)

In [4]:

def landmarks_to_embedding(landmarks_and_scores):
    """Converts the input landmarks into a pose embedding."""
    # Reshape the flat input into a matrix with shape=(17, 3)
    reshaped_inputs = keras.layers.Reshape((17, 3))(landmarks_and_scores)

    # Normalize landmarks 2D
    landmarks = normalize_pose_landmarks(reshaped_inputs[:, :, :2])
    # Flatten the normalized landmark coordinates into a vector
    embedding = keras.layers.Flatten()(landmarks)
    return embedding


In [5]:

def normalize_pose_landmarks(landmarks):
    """Normalizes the landmarks translation by moving the pose center to (0,0) and
    scaling it to a constant pose size.
  """
  # Move landmarks so that the pose center becomes (0,0)
    pose_center = get_center_point(landmarks, BodyPart.LEFT_HIP, 
                                 BodyPart.RIGHT_HIP)

    pose_center = tf.expand_dims(pose_center, axis=1)
    # Broadcast the pose center to the same size as the landmark vector to perform
    # substraction
    pose_center = tf.broadcast_to(pose_center, 
                                [tf.size(landmarks) // (17*2), 17, 2])
    landmarks = landmarks - pose_center

    # Scale the landmarks to a constant pose size
    pose_size = get_pose_size(landmarks)
    landmarks /= pose_size
    return landmarks

In [6]:
def get_center_point(landmarks, left_bodypart, right_bodypart):
    """Calculates the center point of the two given landmarks."""
    left = tf.gather(landmarks, left_bodypart.value, axis=1)
    right = tf.gather(landmarks, right_bodypart.value, axis=1)
    center = left * 0.5 + right * 0.5
    return center

In [7]:
def get_pose_size(landmarks, torso_size_multiplier=2.5):
    """Calculates pose size.

    It is the maximum of two values:
    * Torso size multiplied by `torso_size_multiplier`
    * Maximum distance from pose center to any pose landmark
    """
    # Hips center
    hips_center = get_center_point(landmarks, BodyPart.LEFT_HIP, 
                                 BodyPart.RIGHT_HIP)

    # Shoulders center
    shoulders_center = get_center_point(landmarks, BodyPart.LEFT_SHOULDER,
                                      BodyPart.RIGHT_SHOULDER)

    # Torso size as the minimum body size
    torso_size = tf.linalg.norm(shoulders_center - hips_center)
    # Pose center
    pose_center_new = get_center_point(landmarks, BodyPart.LEFT_HIP, 
                                     BodyPart.RIGHT_HIP)
    pose_center_new = tf.expand_dims(pose_center_new, axis=1)
    # Broadcast the pose center to the same size as the landmark vector to
    # perform substraction
    pose_center_new = tf.broadcast_to(pose_center_new,
                                    [tf.size(landmarks) // (17*2), 17, 2])

    # Dist to pose center
    d = tf.gather(landmarks - pose_center_new, 0, axis=0,
                name="dist_to_pose_center")
    # Max dist to pose center
    max_dist = tf.reduce_max(tf.linalg.norm(d, axis=0))

    # Normalize scale
    pose_size = tf.maximum(torso_size * torso_size_multiplier, max_dist)
    return pose_size

In [8]:
def cnn_model(input_shape, num_classes):
    model = keras.Sequential([
        keras.layers.Conv1D(64, 3, activation='relu', input_shape=input_shape),
        keras.layers.MaxPooling1D(2),
        keras.layers.Conv1D(128, 3, activation='relu'),
        keras.layers.MaxPooling1D(2),
        keras.layers.Flatten(),
        keras.layers.Dense(128, activation='relu'),
        keras.layers.Dropout(0.5),
        keras.layers.Dense(num_classes, activation='softmax')
    ])
    return model

In [9]:
X, y, class_names = load_csv('train_data.csv')
X_train, X_val, y_train, y_val = train_test_split(X, y, test_size=0.15)
X_test, y_test, _ = load_csv('test_data.csv')

In [10]:
processed_X_train = preprocess_data(X_train)
processed_X_val =  preprocess_data(X_val)
processed_X_test = preprocess_data(X_test)

In [11]:
# Reshape data for CNN
processed_X_train = tf.expand_dims(processed_X_train, axis=-1)
processed_X_val = tf.expand_dims(processed_X_val, axis=-1)
processed_X_test = tf.expand_dims(processed_X_test, axis=-1)

In [12]:
input_shape = processed_X_train.shape[1:]

In [13]:
model = cnn_model(input_shape, len(class_names))

  super().__init__(


In [14]:
model.compile(
    optimizer='adam',
    loss='categorical_crossentropy',
    metrics=['accuracy']
)

In [15]:
history = model.fit(processed_X_train, y_train,
                    epochs=200,
                    batch_size=16,
                    validation_data=(processed_X_val, y_val))

Epoch 1/200
[1m81/81[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m2s[0m 6ms/step - accuracy: 0.3823 - loss: 1.6853 - val_accuracy: 0.6828 - val_loss: 0.9805
Epoch 2/200
[1m81/81[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m0s[0m 3ms/step - accuracy: 0.6658 - loss: 0.9659 - val_accuracy: 0.7269 - val_loss: 0.6377
Epoch 3/200
[1m81/81[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m0s[0m 3ms/step - accuracy: 0.7593 - loss: 0.6743 - val_accuracy: 0.8590 - val_loss: 0.3696
Epoch 4/200
[1m81/81[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m0s[0m 3ms/step - accuracy: 0.8571 - loss: 0.3994 - val_accuracy: 0.9471 - val_loss: 0.1897
Epoch 5/200
[1m81/81[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m0s[0m 3ms/step - accuracy: 0.9269 - loss: 0.2549 - val_accuracy: 0.9736 - val_loss: 0.1371
Epoch 6/200
[1m81/81[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m0s[0m 3ms/step - accuracy: 0.9368 - loss: 0.1988 - val_accuracy: 0.9692 - val_loss: 0.0882
Epoch 7/200
[1m81/81[0m [32m━━━

[1m81/81[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m0s[0m 3ms/step - accuracy: 0.9950 - loss: 0.0096 - val_accuracy: 0.9956 - val_loss: 0.0050
Epoch 52/200
[1m81/81[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m0s[0m 3ms/step - accuracy: 0.9971 - loss: 0.0133 - val_accuracy: 0.9956 - val_loss: 0.0228
Epoch 53/200
[1m81/81[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m0s[0m 3ms/step - accuracy: 0.9995 - loss: 0.0059 - val_accuracy: 0.9956 - val_loss: 0.0055
Epoch 54/200
[1m81/81[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m0s[0m 3ms/step - accuracy: 0.9970 - loss: 0.0075 - val_accuracy: 0.9956 - val_loss: 0.0207
Epoch 55/200
[1m81/81[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m0s[0m 3ms/step - accuracy: 0.9949 - loss: 0.0125 - val_accuracy: 1.0000 - val_loss: 0.0044
Epoch 56/200
[1m81/81[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m0s[0m 3ms/step - accuracy: 0.9973 - loss: 0.0068 - val_accuracy: 0.9956 - val_loss: 0.0066
Epoch 57/200
[1m81/81[0m [32m━━━━━━━━━

[1m81/81[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m0s[0m 3ms/step - accuracy: 0.9980 - loss: 0.0051 - val_accuracy: 1.0000 - val_loss: 0.0023
Epoch 102/200
[1m81/81[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m0s[0m 4ms/step - accuracy: 0.9958 - loss: 0.0133 - val_accuracy: 1.0000 - val_loss: 0.0038
Epoch 103/200
[1m81/81[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m0s[0m 3ms/step - accuracy: 0.9964 - loss: 0.0054 - val_accuracy: 1.0000 - val_loss: 0.0012
Epoch 104/200
[1m81/81[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m0s[0m 3ms/step - accuracy: 0.9975 - loss: 0.0119 - val_accuracy: 0.9956 - val_loss: 0.0064
Epoch 105/200
[1m81/81[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m0s[0m 3ms/step - accuracy: 0.9959 - loss: 0.0104 - val_accuracy: 1.0000 - val_loss: 2.5410e-04
Epoch 106/200
[1m81/81[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m0s[0m 3ms/step - accuracy: 0.9979 - loss: 0.0062 - val_accuracy: 1.0000 - val_loss: 0.0030
Epoch 107/200
[1m81/81[0m [32

Epoch 150/200
[1m81/81[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m0s[0m 3ms/step - accuracy: 1.0000 - loss: 6.8215e-04 - val_accuracy: 1.0000 - val_loss: 5.5998e-04
Epoch 151/200
[1m81/81[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m0s[0m 3ms/step - accuracy: 0.9965 - loss: 0.0072 - val_accuracy: 1.0000 - val_loss: 1.3823e-04
Epoch 152/200
[1m81/81[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m0s[0m 4ms/step - accuracy: 1.0000 - loss: 0.0034 - val_accuracy: 0.9956 - val_loss: 0.0084
Epoch 153/200
[1m81/81[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m0s[0m 3ms/step - accuracy: 1.0000 - loss: 6.3365e-04 - val_accuracy: 0.9956 - val_loss: 0.0072
Epoch 154/200
[1m81/81[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m0s[0m 3ms/step - accuracy: 1.0000 - loss: 4.1111e-04 - val_accuracy: 0.9956 - val_loss: 0.0077
Epoch 155/200
[1m81/81[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m0s[0m 3ms/step - accuracy: 1.0000 - loss: 4.4556e-04 - val_accuracy: 0.9956 - val_loss: 0.009

[1m81/81[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m0s[0m 3ms/step - accuracy: 0.9947 - loss: 0.0141 - val_accuracy: 1.0000 - val_loss: 4.7984e-04
Epoch 200/200
[1m81/81[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m0s[0m 3ms/step - accuracy: 1.0000 - loss: 0.0024 - val_accuracy: 1.0000 - val_loss: 1.6902e-04


In [17]:
print('-----------------EVALUATION----------------')
loss, accuracy = model.evaluate(processed_X_test, y_test)
print('LOSS: ', loss)
print("Test ACCURACY: ", accuracy)

-----------------EVALUATION----------------
[1m28/28[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m0s[0m 2ms/step - accuracy: 0.9996 - loss: 0.0019   
LOSS:  0.010166078805923462
Test ACCURACY:  0.9977195262908936


In [18]:
loss, accuracy = model.evaluate(processed_X_train, y_train)
print('LOSS: ', loss)
print("Train ACCURACY: ", accuracy)

[1m41/41[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m0s[0m 1ms/step - accuracy: 1.0000 - loss: 2.5230e-06
LOSS:  2.2554600036528427e-06
Train ACCURACY:  1.0


In [19]:
loss, accuracy = model.evaluate(processed_X_val, y_val)
print('LOSS: ', loss)
print("Validation ACCURACY: ", accuracy)

[1m8/8[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m0s[0m 2ms/step - accuracy: 1.0000 - loss: 3.3607e-04 
LOSS:  0.00016902180504985154
Train ACCURACY:  1.0


In [20]:
model.save('CNN_model.keras')

In [22]:
pip install opencv-python

Defaulting to user installation because normal site-packages is not writeableNote: you may need to restart the kernel to use updated packages.

Collecting opencv-python
  Downloading opencv_python-4.9.0.80-cp37-abi3-win_amd64.whl (38.6 MB)
     ---------------------------------------- 38.6/38.6 MB 2.5 MB/s eta 0:00:00
Installing collected packages: opencv-python
Successfully installed opencv-python-4.9.0.80


## Model Prection test

In [23]:
# Preprocessor for a single image

import tensorflow as tf
import numpy as np
from movenet import Movenet
import os

# Load MoveNet model
movenet = Movenet('movenet_thunder.tflite')

def detect_landmarks(image, detection_threshold=0.1):
    # Detect landmarks in the given image
    person = movenet.detect(image)
    
    # Check if any person is detected
    if person is None:
        return None
    
    # Check if all landmarks are above the detection threshold
    min_landmark_score = min([keypoint.score for keypoint in person.keypoints])
    if min_landmark_score < detection_threshold:
        return None
    
    # Get landmarks and scale them to the same size as the input image
    pose_landmarks = np.array([[keypoint.coordinate.x, keypoint.coordinate.y, keypoint.score] for keypoint in person.keypoints], dtype=np.float32)
    
    return pose_landmarks

def preprocess_image(image_path, target_shape):
    # Read image
    image = tf.io.read_file(image_path)
    image = tf.io.decode_jpeg(image)
    
    # Resize image to target shape
    image = tf.image.resize(image, target_shape)
    
    # Convert image to numpy array
    image = image.numpy()
    
    return image

def process_single_image(image_path, target_shape=(256, 256), detection_threshold=0.1):
    # Preprocess the image
    image = preprocess_image(image_path, target_shape)
    
    # Detect landmarks
    landmarks = detect_landmarks(image, detection_threshold)
    
    if landmarks is not None:
        # Convert landmarks to DataFrame
        list_name = [[bodypart.name + '_x', bodypart.name + '_y', bodypart.name + '_score'] for bodypart in BodyPart]
        
        header_name = []
        for columns_name in list_name:
            header_name += columns_name
        header_name = ['filename'] + header_name
        
        landmarks = landmarks.flatten().astype(str).tolist()
        landmarks = [''] + landmarks  # Empty string for filename
        landmarks_dict = {header_name[i]: [landmarks[i]] for i in range(len(header_name))}
        
        df = pd.DataFrame(landmarks_dict)
        
        return df
    else:
        return None

In [50]:
image_path = 'dog.jpg'
landmarks = process_single_image(image_path)
if landmarks is not None:
    print("Landmarks detected successfully:", landmarks)
else:
    print("No person detected or landmarks below threshold.")

No person detected or landmarks below threshold.


In [41]:
landmarks.drop('filename', axis=1, inplace=True)

In [42]:
landmarks.head()

Unnamed: 0,NOSE_x,NOSE_y,NOSE_score,LEFT_EYE_x,LEFT_EYE_y,LEFT_EYE_score,RIGHT_EYE_x,RIGHT_EYE_y,RIGHT_EYE_score,LEFT_EAR_x,...,LEFT_KNEE_score,RIGHT_KNEE_x,RIGHT_KNEE_y,RIGHT_KNEE_score,LEFT_ANKLE_x,LEFT_ANKLE_y,LEFT_ANKLE_score,RIGHT_ANKLE_x,RIGHT_ANKLE_y,RIGHT_ANKLE_score
0,124.0,176.0,0.5762694,118.0,181.0,0.5907945,118.0,180.0,0.5118377,104.0,...,0.898908,181.0,145.0,0.81275725,216.0,207.0,0.8582211,216.0,207.0,0.6017157


In [43]:
landmarks = landmarks.astype(float)

In [44]:
X.shape

(1512, 51)

In [45]:
test = preprocess_data(landmarks)

In [46]:
arr = model.predict(test)
arr

[1m1/1[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m0s[0m 26ms/step


array([[5.3986796e-13, 6.9385823e-19, 1.0000000e+00, 3.0035122e-20,
        1.6094400e-17, 2.4252008e-14, 4.9101904e-22, 1.6396834e-11]],
      dtype=float32)

In [47]:
arr_flat = arr.flatten()

# Find the index of the maximum element
max_index = np.argmax(arr_flat)

# Get the value of the maximum element
max_value = arr_flat[max_index]

print("Index of the highest element:", max_index)
          
print("Value of the highest element:", max_value)

Index of the highest element: 2
Value of the highest element: 1.0


In [48]:
# 0 = chair
# 1 = cobra
# 2 = dog
# 3 = no_pose
# 4 = shoulder_stand
# 5 = triangle
# 6 = tree
# 7 - warrior