# VIDEO CAPTURE FROM WEBCAM

## Import libraries

In [1]:
import cv2 as cv
import numpy as np
import imutils
import tensorflow as tf
import tensorflow_hub as hub
import csv
import os
import math

## Load movenet model

In [2]:
model = hub.load("https://www.kaggle.com/models/google/movenet/frameworks/TensorFlow2/variations/singlepose-thunder/versions/4")
movenet = model.signatures['serving_default']

## Capture video streaming from webcam

In [3]:
image_width = 640
image_height = 640
image_ar = image_height / image_width

movenet_image_height = 256
movenet_image_width = 256
movenet_image_landscape_height_offset = int((movenet_image_height - (image_ar * movenet_image_width)) / 2)

data_filename = 'rotazione-laterale-capo-singlepose-thunder.csv'
export_data = []

KEY_POINTS = {
    'nose' : 0 , 
    'left_eye' : 1, 
    'right_eye' : 2, 
    'left_ear' : 3, 
    'right_ear' : 4, 
    'left_shoulder' : 5, 
    'right_shoulder' : 6, 
    'left_elbow' : 7, 
    'right_elbow' : 8, 
    'left_wrist' : 9, 
    'right_wrist' : 10, 
    'left_hip' : 11, 
    'right_hip' : 12, 
    'left_knee' : 13, 
    'right_knee' : 14, 
    'left_ankle' : 15, 
    'right_ankle' : 16
    }

KEY_POINTS_NAMES = list(KEY_POINTS.keys())

def get_cordinates_from(normalized_coord):
    return [int(normalized_coord[1] * image_width), int(normalized_coord[0] * image_height) - movenet_image_landscape_height_offset, normalized_coord[2]]
    
def get_keypoint_axes_from_prediction(prediction, kp='nose'):
    coords = prediction[KEY_POINTS.get(kp)]
    return get_cordinates_from(coords[:3])

def compute_key_points(image):
    # Ridimensiona il frame e aggiungi un padding se necessario. Il ridimensionamento trasforma i dati in float32
    tImage = tf.image.resize_with_pad(image, movenet_image_height, movenet_image_width)

    # Converti image data type da float32 -> int32 
    tImage = tf.cast(tImage, dtype=tf.int32)

    # Aggiungi una dimensione (in testa) in modo da essere compatibile con la shape dei dati richiesta dal modello
    tImage = tf.expand_dims(tImage, axis=0)

    # Movenet si aspetta in input: A frame of video or an image, 
    # represented as an int32 tensor of shape: 192x192x3. 
    # Channels order: RGB with values in [0, 255].
    kp = movenet(tImage)
    kp_coords = np.squeeze(kp['output_0'].numpy())

    norm = {}

    for idx in range(len(kp_coords)):
        coord = get_cordinates_from(kp_coords[idx])
        norm[KEY_POINTS_NAMES[idx]] = [(coord[:2]), coord[2]]

    raw = {}
    for idx in range(len(kp_coords)):
        coord = kp_coords[idx]
        raw[KEY_POINTS_NAMES[idx]] = [(coord[:2]), coord[2]]

    return (norm, raw)

def draw_keypoints(image, kp):
    cv.circle(image, kp['nose'][0], 4, (0, 0, 255), -1)
    cv.circle(image, kp['left_eye'][0], 4, (255, 0, 0), -1)
    cv.circle(image, kp['right_eye'][0], 4, (255, 0, 0), -1)
    cv.circle(image, kp['left_shoulder'][0], 4, (0, 255, 0), -1)
    cv.circle(image, kp['right_shoulder'][0], 4, (0, 255, 0), -1)
    
    text = "Distance is ok!" if kp['left_shoulder'][1] > 0.5 and kp['right_shoulder'][1] > 0.5 else "Shoulders not visible"
    cv.putText(image, text, (20,40), cv.FONT_HERSHEY_PLAIN, 2, (255,255,255), 1, cv.LINE_AA) 

# TODO Calcola inclinazione laterale testa
def compute_lateral_head_inclination(kp):
    # m = (y2-y1)/(x2-x1)
    eye_slope = (kp['right_eye'][0][1] - kp['left_eye'][0][1]) / (kp['right_eye'][0][0] - kp['left_eye'][0][0])
    eye_center = (((kp['right_eye'][0][0] + kp['left_eye'][0][0])/2), ((kp['right_eye'][0][1] + kp['left_eye'][0][1])/2))
    shoulder_center = (((kp['left_shoulder'][0][0] + kp['left_shoulder'][0][0])/2), ((kp['right_shoulder'][0][1] + kp['right_shoulder'][0][1])/2))
    pt1 = (int(eye_center[1] * eye_slope + eye_center[0]), 0)
    pt2 = (int((eye_center[1]-image_height) * eye_slope + eye_center[0]), image_height)
    myradians = math.atan2(eye_center[1]-shoulder_center[1], eye_center[0]-shoulder_center[0])
    mydegrees = math.degrees(myradians)

    #cv.line(image,  pt1, pt2, (0, 255, 0), 4)
    #cv.line(image,  kp['left_shoulder'][0],  kp['right_shoulder'][0], (0, 255, 0), 4)
    
    return 0

# TODO Calcola rotazione testa: con il modello 2D non posso calcolarlo
def compute_head_rotation(kp):
    
    return 0

# TODO Calcola inclinazione frontale testa: con il modello 2D non posso calcolarlo
def compute_frontal_head_inclination(kp):
    
    return 0

def add_to_keypoints(kp, pose): 
    export_data.append({
        'nose' : kp['nose'][0], 
        'left_eye' : kp['left_eye'][0], 
        'right_eye' : kp['right_eye'][0], 
        'left_ear' : kp['left_ear'][0], 
        'right_ear' : kp['right_ear'][0], 
        'left_shoulder' : kp['left_shoulder'][0], 
        'right_shoulder' : kp['right_shoulder'][0],
        'pose' : pose
    })

def export_keypoints():
    cwd = os.getcwd()
    filePath = os.path.join(cwd, 'data', data_filename)

    with open(filePath, 'r+', encoding='UTF8') as f:
        writer = csv.writer(f)

        if not f.readline(0).strip():
            # write the header
            writer.writerow(['class', 
                            'x-0', 'y-0', 
                            'x-1', 'y-1',
                            'x-2', 'y-2',
                            'x-3', 'y-3',
                            'x-4', 'y-4',
                            'x-5', 'y-5',
                            'x-6', 'y-6',
                            ])
        
        # write the data
        for data in export_data:
            
            writer.writerow([
                data.get('pose'),
                data.get('nose')[0], data.get('nose')[1],
                data.get('left_eye')[0], data.get('left_eye')[1],
                data.get('right_eye')[0], data.get('right_eye')[1],
                data.get('left_ear')[0], data.get('left_ear')[1],
                data.get('right_ear')[0], data.get('right_ear')[1],
                data.get('left_shoulder')[0], data.get('left_shoulder')[1],
                data.get('right_shoulder')[0], data.get('right_shoulder')[1]
            ])

In [4]:
cameraIndex = 0
cap = cv.VideoCapture(cameraIndex)

while cap.isOpened():
    # Capture frame-by-frame
    ret, frame = cap.read()
    # if frame is read correctly ret is True
    if not ret:
        print("Can't receive frame (stream end?). Exiting ...")
        break
    

    frame = cv.flip(frame, 1)

    # Our operations on the frame come here
    kp_norm, kp_raw = compute_key_points(frame)

    # Resize frame
    #flippedImage = cv.flip(frame, 1)
    resizedImage = tf.cast(tf.image.resize_with_pad(frame, image_height, image_width), dtype=tf.uint8).numpy()

    # Draw keypoints
    draw_keypoints(resizedImage, kp_norm)

    # Display the resulting frame
    cv.imshow('Webcam', resizedImage)
    
    key = cv.waitKey(1) & 0xFF
    if key == ord("q"):
        break
    if key == 2:
        add_to_keypoints(kp_raw, "left")
    if key == 3:
        add_to_keypoints(kp_raw, "right")
    if key == 1:
        add_to_keypoints(kp_raw, "center")


cap.release()
cv.destroyAllWindows()
cv.waitKey(100)

print("Export recorded keypoints")
print(export_data)
export_keypoints()


Can't receive frame (stream end?). Exiting ...
Export recorded keypoints
[]


FileNotFoundError: [Errno 2] No such file or directory: '/Users/giogaspa/Projects/Personali/tesi/ML/data-collection/data/rotazione-laterale-capo-singlepose-thunder.csv'