In [1]:
######## install dependencies ########

%pip install numpy opencv-python tensorflow
%pip install --user mediapipe

Note: you may need to restart the kernel to use updated packages.
Note: you may need to restart the kernel to use updated packages.


In [1]:
######## import and set up opencv, mediapipe ########

import cv2
import numpy as np

import mediapipe as media

mp_drawing_styles = media.solutions.drawing_styles
mp_drawing = media.solutions.drawing_utils
mp_pose = media.solutions.pose

Track figure poses and generate motion history images from webcam feed

In [2]:
######## set motion history image parameters ########

MHI_DURATION = 4
THRESHOLD = 32
MAX_TIME_DELTA = 2.0
MIN_TIME_DELTA = 0.5

In [3]:
######## image resize method ########

def preprocess_MHI_frame(frame):
    H, W = frame.shape[0], frame.shape[1]
    resizeH = 600
    resizeW = int((W / H) * resizeH)
    
    resize_image = cv2.resize(frame, (resizeW, resizeH))
    #input_image = cv2.cvtColor(input_image, cv2.COLOR_RGB2BGR)    #makes you blue!
    #gray_image = cv2.cvtColor(resize_image, cv2.COLOR_BGR2GRAY)
    return resize_image

In [4]:
model

NameError: name 'model' is not defined

In [24]:
########    PROCESS LIVE WEBCAM DATA    ########
#   MHI of pose data

cap = cv2.VideoCapture(0)

H, W = int(cap.get(cv2.CAP_PROP_FRAME_HEIGHT)), int(cap.get(cv2.CAP_PROP_FRAME_WIDTH))
W = int(W / H * 600)

motion_history = np.zeros((600, W), np.float32)

index = 0
delay = 0.0
init_timestamp = cv2.getTickCount() / cv2.getTickFrequency()
datadir = './data/cabbagepatch/'

with mp_pose.Pose(min_detection_confidence=0.25, min_tracking_confidence=0.25) as pose:

    ret, frame = cap.read()
    prev_frame = 0
    if ret:
        prev_initframe = preprocess_MHI_frame(frame)
        prev_results = pose.process(prev_initframe)

        prev_frame = np.zeros(prev_initframe.shape)
        
        mp_drawing.draw_landmarks(
            prev_frame,
            prev_results.pose_landmarks,
            mp_pose.POSE_CONNECTIONS,
            landmark_drawing_spec=mp_drawing_styles.get_default_pose_landmarks_style()
        )
        
    while cap.isOpened():
        ret, frame = cap.read()
        if not ret:
            print("error reading video feed")
            break

        curr_initframe = preprocess_MHI_frame(frame)
        pose_results = pose.process(curr_initframe)

        curr_frame = np.zeros(curr_initframe.shape)
        
        mp_drawing.draw_landmarks(
            curr_frame,
            pose_results.pose_landmarks,
            mp_pose.POSE_CONNECTIONS,
            landmark_drawing_spec=mp_drawing_styles.get_default_pose_landmarks_style()
        )

        silhouette = cv2.absdiff(curr_frame, prev_frame).astype(np.uint8)
        silhouette = cv2.cvtColor(silhouette, cv2.COLOR_RGB2GRAY)

        ret, motion_mask = cv2.threshold(silhouette, THRESHOLD, 1, cv2.THRESH_BINARY)

        timestamp = cv2.getTickCount() / cv2.getTickFrequency()
        motion_history[motion_mask == 1] = timestamp

        mhi = np.uint8(np.clip(1 - (timestamp - motion_history) / MHI_DURATION, 0, 1) * 255)
        
        # if timestamp - init_timestamp > 10.0 and timestamp > delay:
        #     cv2.imwrite(datadir + f'{index}.png', mhi)
        #     index += 1
        #     delay = timestamp + 1.0
        #     if index >= 32:
        #         break
        
        # FIGURE OUT HOW TO CONVERT VIDEO FEED INTO TRAINABLE DATA

        if timestamp > delay:
            resize = cv2.resize(mhi, (256,256,3))
            print(resize.shape)
            print(model.predict(resize))
            delay = timestamp + 1.0

        cv2.imshow('Motion History Feed', mhi)

        prev_frame = curr_frame

        if cv2.waitKey(1) == ord('q'):
            break

cap.release()
cv2.destroyAllWindows()

(1, 256, 256)




ValueError: Sequential model 'sequential_1' has already been configured to use input shape (None, 256, 256). You cannot build it with input_shape (1, 256, 256)

Pose coordinate analysis

In [9]:
########    match pose landmarks to coordinates     ########

pose_indices = ['nose', 
                'left_eye_inner', 'left_eye', 'left_eye_outer', 
                'right_eye_inner', 'right_eye', 'right_eye_outer',
                'left_ear', 'right_ear',
                'mouth_left', 'mouth_right',
                'left_shoulder', 'right_shoulder',
                'left_elbow', 'right_elbow',
                'left_wrist', 'right_wrist',
                'left_pinky', 'right_pinky',
                'left_index', 'right_index',
                'left_thumb', 'right_thumb',
                'left_hip', 'right_hip',
                'left_knee', 'right_knee',
                'left_ankle', 'right_ankle',
                'left_heel', 'right_heel',
                'left_foot_index', 'right_foot_index']

def setNewCoords(landmark):
    new_coords = [{
                'x': point.x,
                'y': point.y,
                'z': point.z,
                'vis': point.visibility
                }   
                for point in landmark]

    keypoints = {}
    for i in range(len(pose_indices)):
        keypoints[pose_indices[i]] = new_coords[i]

    return keypoints

keypoints = setNewCoords(pose_landmarks.landmark)
for key in keypoints.keys():
    if keypoints[key]['vis'] > 0.5:
        print(key, keypoints[key])

nose {'x': 0.621558427810669, 'y': 0.08921728283166885, 'z': -0.5160354375839233, 'vis': 0.9999528527259827}
left_eye_inner {'x': 0.6334375739097595, 'y': 0.07345728576183319, 'z': -0.484919011592865, 'vis': 0.9999215602874756}
left_eye {'x': 0.6378164291381836, 'y': 0.07472570985555649, 'z': -0.4850243031978607, 'vis': 0.9999128580093384}
left_eye_outer {'x': 0.6419649720191956, 'y': 0.07605085521936417, 'z': -0.4851114749908447, 'vis': 0.999923586845398}
right_eye_inner {'x': 0.6195680499076843, 'y': 0.0685989037156105, 'z': -0.4895278811454773, 'vis': 0.9999054074287415}
right_eye {'x': 0.6142794489860535, 'y': 0.06626370549201965, 'z': -0.4896296560764313, 'vis': 0.9998775124549866}
right_eye_outer {'x': 0.6092092990875244, 'y': 0.06397171318531036, 'z': -0.48960861563682556, 'vis': 0.9998661875724792}
left_ear {'x': 0.645785391330719, 'y': 0.0829315185546875, 'z': -0.2932945787906647, 'vis': 0.9998888373374939}
right_ear {'x': 0.6009838581085205, 'y': 0.06389220803976059, 'z': -0.

In [13]:
########    calculate parameter values     ########

def calcParamValues(keypoints):
    #hip midpoint
    hip_midpoint = (keypoints['right_hip']['x'] - keypoints['left_hip']['x']) / 2

    #calculate cartesian distance between two points
    def calculateDistance(x, y):
        return np.sqrt(x ** 2 + y ** 2)

    #calculate angle created by two cartesian vectors
    #   negative angles : bend outward, i.e. gotta piss
    #   positive angles : bend inward, i.e. sumo squat
    def calculateAngle(A, B, C = None):
        direction = 1
        if not C:
            C = {'x': 1, 'y': 0}
        elif abs(B['x'] - hip_midpoint) < abs(C['x'] - hip_midpoint):
            direction = -1

        vecBA, vecBC = [A['x'] - B['x'], A['y'] - B['y']], [C['x'] - B['x'], C['y'] - B['y']]
        dot_product = vecBA[0] * vecBC[0] + vecBA[1] * vecBC[1]
        magBA, magBC = calculateDistance(vecBA[0], vecBA[1]), calculateDistance(vecBC[0], vecBC[1])
        return np.arccos(dot_product / (magBA * magBC)) * direction

    #calculate body facing
    # -pi : facing to the right
    #  0  : facing the camera
    #  pi : facing to the left
    shoulder_depthdiff = keypoints['left_shoulder']['z'] - keypoints['right_shoulder']['z']
    shoulder_widthdiff = keypoints['left_shoulder']['x'] - keypoints['right_shoulder']['x']
    body_angle = np.arctan(shoulder_depthdiff / shoulder_widthdiff)

    #calculate arm angles
    leftarm_angle = calculateAngle(keypoints['left_shoulder'], keypoints['left_elbow'], keypoints['left_wrist']) 
    rightarm_angle = calculateAngle(keypoints['right_shoulder'], keypoints['right_elbow'], keypoints['right_wrist']) 
    leftleg_angle = calculateAngle(keypoints['left_hip'], keypoints['left_knee'], keypoints['left_ankle'])
    rightleg_angle = calculateAngle(keypoints['right_hip'], keypoints['right_knee'], keypoints['right_ankle'])

    #calculate shoulder dimensions relative to hips
    shoulder_angle = calculateAngle(keypoints['left_shoulder'], keypoints['right_shoulder'])
    hip_angle = calculateAngle(keypoints['left_hip'], keypoints['right_hip'])
    shoulder_relativeangle = shoulder_angle - hip_angle

    shoulder_midpoint = (keypoints['right_shoulder']['x'] - keypoints['left_shoulder']['x']) / 2
    shoulder_relativemidpoint = abs(shoulder_midpoint - hip_midpoint)

    #calculate foot dimensions relative to each other
    foot_depthdiff = keypoints['left_ankle']['z'] - keypoints['left_ankle']['z']
    foot_heightdiff = keypoints['left_ankle']['y'] - keypoints['right_ankle']['y']

    #calculate angle of the foot
    # using knee-heel-toe angle to describe foot direction
    leftfoot_angle = calculateAngle(keypoints['left_knee'], keypoints['left_heel'], keypoints['left_foot_index'])
    rightfoot_angle = calculateAngle(keypoints['right_knee'], keypoints['right_heel'], keypoints['right_foot_index'])

    profile = {'body-ang': body_angle, 
               'LA-ang': leftarm_angle, 'RA-ang': rightarm_angle, 
               'LL-ang': leftleg_angle, 'RL-ang': rightleg_angle,
            'SR-ang': shoulder_relativeangle, 'SR-mp': shoulder_relativemidpoint,
            'F-depth': foot_depthdiff, 'F-height': foot_heightdiff,
            'LF-ang': leftfoot_angle, 'RF-ang': rightfoot_angle}
    
    return profile

parameterValues = calcParamValues(keypoints)
for key in parameterValues.keys():
    print(key, parameterValues[key])

body-ang 0.2968405428715714
LA-ang 1.9901597463662961
RA-ang 2.040441019913823
LL-ang -2.9303926250519243
RL-ang 3.1332781667736183
SR-ang 0.02653235396175402
SR-mp 0.020781755447387695
F-depth 0.0
F-height 0.015932857990264893
LF-ang -2.3055504388264145
RF-ang -2.682542757841427


Build the model with Tensorflow and Keras

In [5]:
######## import tensorflow + keras ########
import tensorflow as tf
from tensorflow.keras.models import Sequential
from tensorflow.keras.layers import Conv2D, MaxPooling2D, Flatten, Dense 

In [5]:
######## preprocess image data from directories ########
data = tf.keras.utils.image_dataset_from_directory('data', batch_size=8)

data = data.map(lambda x,y: (x/255, y))
data_iterator = data.as_numpy_iterator()
batch = data_iterator.next()

print(batch[0].shape)

Found 128 files belonging to 4 classes.
(8, 256, 256, 3)


In [11]:
batch[0].max()

1.0

In [6]:
train_size = int(len(data) * .7)
val_size = int(len(data) * .2) + 1
test_size = int(len(data) * .1)

train = data.take(train_size)
val = data.skip(train_size).take(val_size)
test = data.skip(train_size+val_size).take(test_size)

In [7]:
len(test)

1

In [29]:
######## set up model and convolutional layers ########
model = Sequential()

#16 filters
model.add(Conv2D(16, (2, 2), activation='relu', input_shape=(256, 256, 3)))
model.add(MaxPooling2D(pool_size=(2, 2)))

#32 filters
model.add(Conv2D(32, (2, 2), activation='relu'))
model.add(MaxPooling2D(pool_size=(2, 2)))

#flatten
model.add(Flatten())

#narrow down to 4 identifiers
model.add(Dense(64, activation='relu'))
model.add(Dense(4, activation='softmax'))

model.compile(optimizer='adam', loss='sparse_categorical_crossentropy', metrics=['accuracy'])

model.summary()

In [30]:
######## train  #######

train_size = int(len(data) * .7)
val_size = int(len(data) * .2) + 1
test_size = int(len(data) * .1)

train = data.take(train_size)
val = data.skip(train_size).take(val_size)
test = data.skip(train_size+val_size).take(test_size)

logdir = 'logs'

tensorboard_callback = tf.keras.callbacks.TensorBoard(log_dir=logdir)

hist = model.fit(train, epochs=20, validation_data=val, callbacks=[tensorboard_callback])


Epoch 1/20
[1m11/11[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m2s[0m 71ms/step - accuracy: 0.3855 - loss: 3.0031 - val_accuracy: 0.7812 - val_loss: 0.5187
Epoch 2/20
[1m11/11[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m1s[0m 53ms/step - accuracy: 0.7268 - loss: 0.5164 - val_accuracy: 0.8438 - val_loss: 0.2740
Epoch 3/20
[1m11/11[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m1s[0m 54ms/step - accuracy: 0.9287 - loss: 0.2154 - val_accuracy: 1.0000 - val_loss: 0.1336
Epoch 4/20
[1m11/11[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m1s[0m 55ms/step - accuracy: 1.0000 - loss: 0.0756 - val_accuracy: 1.0000 - val_loss: 0.0305
Epoch 5/20
[1m11/11[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m1s[0m 52ms/step - accuracy: 1.0000 - loss: 0.0171 - val_accuracy: 1.0000 - val_loss: 0.0096
Epoch 6/20
[1m11/11[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m1s[0m 55ms/step - accuracy: 1.0000 - loss: 0.0051 - val_accuracy: 1.0000 - val_loss: 0.0037
Epoch 7/20
[1m11/11[0m [32m━━━━

In [8]:
cap = cv2.VideoCapture(0)

H, W = int(cap.get(cv2.CAP_PROP_FRAME_HEIGHT)), int(cap.get(cv2.CAP_PROP_FRAME_WIDTH))
W = int(W / H * 600)

motion_history = np.zeros((600, W), np.float32)

index = 0
delay = 15.0
init_timestamp = cv2.getTickCount() / cv2.getTickFrequency()

moves_map = ['bart simpson', 'biz markie', 'cabbage patch', 'reject']

with mp_pose.Pose(min_detection_confidence=0.25, min_tracking_confidence=0.25) as pose:

    ret, frame = cap.read()
    prev_frame = 0
    if ret:
        prev_initframe = preprocess_MHI_frame(frame)
        prev_results = pose.process(prev_initframe)

        prev_frame = np.zeros(prev_initframe.shape)
        
        mp_drawing.draw_landmarks(
            prev_frame,
            prev_results.pose_landmarks,
            mp_pose.POSE_CONNECTIONS,
            landmark_drawing_spec=mp_drawing_styles.get_default_pose_landmarks_style()
        )
        
    while cap.isOpened():
        ret, frame = cap.read()
        if not ret:
            print("error reading video feed")
            break

        curr_initframe = preprocess_MHI_frame(frame)
        pose_results = pose.process(curr_initframe)

        curr_frame = np.zeros(curr_initframe.shape)
        
        mp_drawing.draw_landmarks(
            curr_frame,
            pose_results.pose_landmarks,
            mp_pose.POSE_CONNECTIONS,
            landmark_drawing_spec=mp_drawing_styles.get_default_pose_landmarks_style()
        )

        silhouette = cv2.absdiff(curr_frame, prev_frame).astype(np.uint8)
        silhouette = cv2.cvtColor(silhouette, cv2.COLOR_RGB2GRAY)

        ret, motion_mask = cv2.threshold(silhouette, THRESHOLD, 1, cv2.THRESH_BINARY)

        timestamp = cv2.getTickCount() / cv2.getTickFrequency()
        motion_history[motion_mask == 1] = timestamp

        mhi = np.uint8(np.clip(1 - (timestamp - motion_history) / MHI_DURATION, 0, 1) * 255)

        if timestamp > delay:
            resize = cv2.resize(mhi, (256,256))
            resize_rgb = cv2.cvtColor(resize, cv2.COLOR_GRAY2RGB)
            print(resize_rgb.shape)
            print(model.predict(np.expand_dims(resize_rgb, 0)))
            delay = timestamp + 3.0

        cv2.imshow('Motion History Feed', mhi)

        prev_frame = curr_frame

        if cv2.waitKey(1) == ord('q'):
            break

cap.release()
cv2.destroyAllWindows()



(256, 256, 3)
[1m1/1[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m0s[0m 216ms/step
[[0. 0. 1. 0.]]
(256, 256, 3)
[1m1/1[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m0s[0m 21ms/step
[[1.0000000e+00 3.9761057e-37 3.7550594e-09 0.0000000e+00]]
(256, 256, 3)
[1m1/1[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m0s[0m 27ms/step
[[0. 1. 0. 0.]]
(256, 256, 3)
[1m1/1[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m0s[0m 16ms/step
[[1. 0. 0. 0.]]
(256, 256, 3)
[1m1/1[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m0s[0m 15ms/step
[[1. 0. 0. 0.]]
(256, 256, 3)
[1m1/1[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m0s[0m 23ms/step
[[0. 1. 0. 0.]]
(256, 256, 3)
[1m1/1[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m0s[0m 19ms/step
[[0. 1. 0. 0.]]
(256, 256, 3)
[1m1/1[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m0s[0m 16ms/step
[[0. 1. 0. 0.]]
(256, 256, 3)
[1m1/1[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m0s[0m 10ms/step
[[0. 1. 0. 0.]]
(256, 256, 3)
[1m1/1[0m [32m━━━━━━

In [6]:
from tensorflow.keras.models import load_model

In [None]:
model.save('./models/danceid.h5')

In [7]:
model = load_model('./models/danceid.h5')

