In [2]:
import os
import cv2
import random
import numpy as np
import datetime as dt
import tensorflow as tf
import tensorflow_hub as hub
import matplotlib.pyplot as plt
import mediapipe as mp
import time 

%matplotlib inline

from sklearn.model_selection import train_test_split
from sklearn.metrics import confusion_matrix

from tensorflow.keras.layers import *
from tensorflow.keras.models import Sequential
from tensorflow.keras.utils import to_categorical
from tensorflow.keras.callbacks import EarlyStopping
from tensorflow.keras.utils import plot_model


# Histogram Equalization

In [1]:
def histogram_equalization(resized_frame):
    # Send a resized frame to this function, and it will return the histogram equalized frame.
    clahe = cv2.createCLAHE(clipLimit=10, tileGridSize=(8, 8))
    img_gray = cv2.cvtColor(resized_frame, cv2.COLOR_BGR2GRAY)
    alpha = 2.2  # 1.0 - 3.0
    beta = 50  # 0 - 100
    
    new_image = cv2.convertScaleAbs(img_gray, alpha=alpha, beta=beta)
    gamma = 1.2
    look_up_table = np.empty((1, 256), np.uint8)
    for i in range(256):
        look_up_table[0, i] = np.clip(pow(i / 255.0, gamma) * 255.0, 0, 255)
    
    res = cv2.LUT(new_image, look_up_table)
    final_img = clahe.apply(res)
    
    return final_img

# Pose Estimation

### MediaPipe implemetation for a singole person. 32 points per frame

Works for Single person. Have to extend this to multi people . lmList contains keyponts foe all 32 points. 

In [3]:
VIDEO_FILE = "F:\CS3501-Data_Science_and_Engineering_Project\Project Files\Human-Action-Recognition-in-the-dark\datasets\clips_v1.5\Jump\Jump_1_7.mp4"

In [4]:
mpPose = mp.solutions.pose
pose = mpPose.Pose(static_image_mode=False, min_detection_confidence=0.5, min_tracking_confidence = 0.5, model_complexity=2)

In [5]:
mpDraw = mp.solutions.drawing_utils
cap = cv2.VideoCapture(VIDEO_FILE)
ptime = 0
points = []
while True:
    success, img = cap.read()
    if not success:
        break
    
    imgRGB = cv2.cvtColor(img, cv2.COLOR_BGR2RGB)
    imgRGB.flags.writeable = False
    results = pose.process(imgRGB)
    imgRGB.flags.writeable = True
    
    
    if results.pose_landmarks:
        mpDraw.draw_landmarks(img, results.pose_landmarks, mpPose.POSE_CONNECTIONS)
        lmList = [] #list of points for each frame
        for id, lm in enumerate( results.pose_landmarks.landmark):
            h, w, c = img.shape
            cx, cy = int(lm.x*w), int(lm.y*h)
            lmList.append([id, cx, cy])
        if len(lmList) != 0:
            points.append(lmList)       
            cv2.circle(img, (lmList[14][1], lmList[14][2]), 9, (255, 0, 0), cv2.FILLED)
        
    cTime = time.time()
    fps = 1/(cTime-ptime)
    ptime = cTime
    
    font_size = 3
    color = (255, 0, 0)
    cv2.putText(img, str(int(fps)), (10, 70), cv2.FONT_HERSHEY_PLAIN, font_size, color, 3)
    cv2.imshow("Image", img)
    if cv2.waitKey(1) & 0xFF == ord('q'):
        break

plt.imshow(cv2.cvtColor(img, cv2.COLOR_BGR2RGB))  
cap.release()
cv2.destroyAllWindows()

error: OpenCV(4.8.0) D:\a\opencv-python\opencv-python\opencv\modules\imgproc\src\color.cpp:182: error: (-215:Assertion failed) !_src.empty() in function 'cv::cvtColor'


### Multipose Movenet implementation for up to 6 people

We can use the input_keypoints variable as the input to a model we are buidling. It contains 6 arrays which corresponds to maximum of six people this model can predict. Then it has 51 data points x, y and score * 17. 

In [7]:
# optional if you are using a gpu
gpus = tf.config.experimental.list_physical_devices('GPU')
for gpu in gpus:
    tf.config.experimental.set_memory_growth(gpu, True)

In [8]:
gpus

[]

In [None]:
hub_model = hub.load("https://tfhub.dev/google/movenet/multipose/lightning/1")
movenet = hub_model.signatures['serving_default']

In [None]:
cap = cv2.VideoCapture("../vid.mp4")
ptime = 0

while True:
    sucess, frame = cap.read()
    if not sucess:
        print("video is over")
        break
    
    # resize the frame to make prediction faster. Longerside should be > 256 and multiple of 32
    img = frame.copy()
    img = tf.image.resize_with_pad(tf.expand_dims(img, axis=0), 320, 512)
    input_img = tf.cast(img, dtype=tf.int32) 
    
    # make prediction
    
    results = movenet(input_img)
    #each result has 56 points that is x,y,score * 17 + 5 bounding box coordinates
    keypoints = results["output_0"].numpy()[:,:,:51].reshape(6,17,3) # 6 people, 17 key points, 3 for each key point
    
    #make sure it is confident enough to run through the model
    x, y, s = frame.shape
    input_keypoints = np.squeeze(np.multiply(keypoints, [y, x, 1]))
    
    #Render
    loop_through_people(frame, keypoints, EDGES, 0.3)
    
    cv2.imshow("Image", frame)
    if cv2.waitKey(10) & 0xFF == ord('q'):
        break
cap.release()
cv2.destroyAllWindows()

In [None]:
def loop_through_people(frame, keypoints_with_score, edges, confidence_threshold=0.4):
    for person in keypoints_with_score:
        draw_connections(frame, person, edges, confidence_threshold)
        draw_person(frame, person, confidence_threshold)

Draw Keypoints

In [None]:
def draw_person(frame, person, confidence_threshold):
    y, x, c = frame.shape
    shaped = np.squeeze(np.multiply(person, [y, x, 1]))
    for person in shaped:
        #creating an array for preditions
        ky, kx, kp_conf = person
        if kp_conf > confidence_threshold:
            cv2.circle(frame, (int(kx), int(ky)), 4, (0, 255, 0), -1)
    return frame

Draw Edges

In [None]:
EDGES = {
    (0, 1): 'm',
    (0, 2): 'c',
    (1, 3): 'm',
    (2, 4): 'c',
    (0, 5): 'm',
    (0, 6): 'c',
    (5, 7): 'm',
    (7, 9): 'm',
    (6, 8): 'c',
    (8, 10): 'c',
    (5, 6): 'y',
    (5, 11): 'm',
    (6, 12): 'c',
    (11, 12): 'y',
    (11, 13): 'm',
    (13, 15): 'm',
    (12, 14): 'c',
    (14, 16): 'c'
}

In [None]:
def draw_connections(frame, keypoints, edges, confidence_threshold):
    y, x, c = frame.shape
    shaped = np.squeeze(np.multiply(keypoints, [y,x,1]))
    
    for edge, color in edges.items():
        p1, p2 = edge
        y1, x1, c1 = shaped[p1]
        y2, x2, c2 = shaped[p2]
        
        if (c1 > confidence_threshold) & (c2 > confidence_threshold):      
            cv2.line(frame, (int(x1), int(y1)), (int(x2), int(y2)), (0,0,255), 2)