# Detect Body Points

## Import Libraires

In [None]:
import cv2
import matplotlib.pyplot as plt
from google.colab.patches import cv2_imshow
import numpy as np

## Load Images

In [None]:
from google.colab import drive
drive.mount('/content/drive')

In [None]:
image = cv2.imread('/content/drive/MyDrive/Computer Vision Masterclass/Images/megan.jpg')
cv2_imshow(image)

In [None]:
image.shape, image.shape[0]*image.shape[1]*image.shape[2]

In [None]:
#change order of image.shape to send to NN
image_blob = cv2.dnn.blobFromImage(image=image, scalefactor=1.0/255, size=(image.shape[1], image.shape[0])) #normalise also
image_blob.shape

#batchsize, #channels, #dimensions

## Load pre-trained network
- Caffe Deep Learning Framework : https://caffe.berkeleyvision.org/

In [None]:
#openCv integrated with Caffe
network = cv2.dnn.readNetFromCaffe('/content/drive/MyDrive/Computer Vision Masterclass/Weights/pose_deploy_linevec_faster_4_stages.prototxt',  #path
                                   '/content/drive/MyDrive/Computer Vision Masterclass/Weights/pose_iter_160000.caffemodel')  #weights

In [None]:
network.getLayerNames()

In [None]:
len(network.getLayerNames())

## Predict Body Points

In [None]:
network.setInput(image_blob)
output = network.forward() #image sent to input layer of NN and output at end

In [None]:
output.shape #batchsize, confidences, locations of points in image

In [None]:
position_width = output.shape[3]
position_height = output.shape[2]

In [None]:
num_points = 15 #totally 0-15 points from this NN but here we're not considering point15- background
points =[] #locations points in images
threshold = 0.1 #only return points where confidence is higher than 10%

for i in range(num_points):
  confidence_map = output[0, i, :, :]# 0-first image; i-contains info about detected points
  #each of the 14 point contains a vector of size 43 which represents confidence levels. So max confidence is considered
  _, confidence, _, point = cv2.minMaxLoc(confidence_map) #get max confidence value and the point

  #returned coordinates are to be scaled with respect to original image
  x = int((image.shape[1] * point[0]) / position_width)
  y = int((image.shape[0] * point[1]) / position_height)

  if(confidence > threshold):
    cv2.circle(image, (x, y), 2, (0, 0, 255), thickness = 2)
    cv2.putText(image, '{}'.format(i), (x, y), cv2.FONT_HERSHEY_SIMPLEX, 1, (0, 255, 0), thickness=2)
    points.append((x,y))
    print("Point :", i, "\nConfidence :", confidence, "\nLocation :", (x,y), "\n")
  else:
    points.append(None)

## Display Image with Points

In [None]:
plt.figure(figsize=(20, 20))
plt.axis('off')
plt.imshow(cv2.cvtColor(image, cv2.COLOR_BGR2RGB));

## Draw Lines Connecting Points

In [None]:
point_connections =[[0,1], [1,2], [2,3], [3,4], [1,5], [5,6], [6,7], [1,14], [14,8], [8,9], [9,10], [14,11], [11,12], [12,13]]
point_connections

In [None]:
for connection in point_connections:
  pointA = connection[0]
  pointB = connection[1]
  if(points[pointA] and points[pointB]):  #if connection exist
    cv2.line(image, points[pointA], points[pointB], (255,0,0), 1)  

In [None]:
plt.figure(figsize=(20, 20))
plt.axis('off')
plt.imshow(cv2.cvtColor(image, cv2.COLOR_BGR2RGB));

# Detect Movements

## Arms Above Head (Image)

In [None]:
image2 = cv2.imread('/content/drive/MyDrive/Computer Vision Masterclass/Images/player.jpg')
image_blob2 = cv2.dnn.blobFromImage(image=image2, scalefactor=1.0/255, size=(image2.shape[1], image2.shape[0])) #normalise also

network.setInput(image_blob2)
output2 = network.forward() #image sent to input layer of NN and output at end

position_width = output2.shape[3]
position_height = output2.shape[2]

num_points = 15 #totally 0-15 points from this NN but here we're not considering point15- background
points =[] #locations points in images
threshold = 0.1 #only return points where confidence is higher than 10%

for i in range(num_points):
  confidence_map = output2[0, i, :, :]# 0-first image; i-contains info about detected points
  #each of the 14 point contains a vector of size 43 which represents confidence levels. So max confidence is considered
  _, confidence, _, point = cv2.minMaxLoc(confidence_map) #get max confidence value and the point

  #returned coordinates are to be scaled with respect to original image
  x = int((image2.shape[1] * point[0]) / position_width)
  y = int((image2.shape[0] * point[1]) / position_height)

  if(confidence > threshold):
    cv2.circle(image2, (x, y), 2, (0, 0, 255), thickness = 2)
    cv2.putText(image2, '{}'.format(i), (x, y), cv2.FONT_HERSHEY_SIMPLEX, .3, (0, 255, 0))
    cv2.putText(image2, '{}-{}'.format(point[0], point[1]), (x, y+10), cv2.FONT_HERSHEY_SIMPLEX, .5, (255, 0, 0))
    points.append((x,y))
    #print("Point :", i, "\nConfidence :", confidence, "\nLocation :", (x,y), "\n")
  else:
    points.append(None)

plt.figure(figsize=(20, 20))
plt.axis('off')
plt.imshow(cv2.cvtColor(image2, cv2.COLOR_BGR2RGB));

In [None]:
def verify_arms_up(points):
  head, right_wrist, left_wrist = 0, 0, 0
  for i, point in enumerate(points):
    if(i==0):
      head = point[1]
    elif (i==4):
      right_wrist = point[1]
    elif (i==7):
      left_wrist = point[1]
  #print(head, right_wrist, left_wrist)

  if(right_wrist<head and left_wrist<head):
    return True
  else:
    return False

verify_arms_up(points)

## Arms Above Head (Video)

In [None]:
video = '/content/drive/MyDrive/Computer Vision Masterclass/Videos/gesture1.mp4'
capture = cv2.VideoCapture(video)
status, frame = capture.read()
status

In [None]:
result = '/content/drive/MyDrive/Computer Vision Masterclass/Videos/gesture1_result.mp4'
save_video = cv2.VideoWriter(result, cv2.VideoWriter_fourcc(*'XVID'), 10, (frame.shape[1], frame.shape[0]))

In [None]:
threshold = 0.1
while cv2.waitKey(1) < 0:
  status, frame = capture.read()

  if(not status):
    break

  image_blob = cv2.dnn.blobFromImage(image=frame, scalefactor=1.0/255, size=(256, 256)) #normalise also
  network.setInput(image_blob)
  output = network.forward() #image sent to input layer of NN and output at end

  position_width = output.shape[3]
  position_height = output.shape[2]

  num_points = 15 #totally 0-15 points from this NN but here we're not considering point15- background
  points =[] #locations points in images
  for i in range(num_points):
    confidence_map = output[0, i, :, :]# 0-first image; i-contains info about detected points
    #each of the 14 point contains a vector of size 43 which represents confidence levels. So max confidence is considered
    _, confidence, _, point = cv2.minMaxLoc(confidence_map) #get max confidence value and the point

    #returned coordinates are to be scaled with respect to original image
    x = int((frame.shape[1] * point[0]) / position_width)
    y = int((frame.shape[0] * point[1]) / position_height)

    if(confidence > threshold):
      cv2.circle(frame, (x, y), 2, (0, 0, 255), thickness = 2)
      cv2.putText(frame, '{}'.format(i), (x, y), cv2.FONT_HERSHEY_SIMPLEX, .3, (0, 255, 0))
      cv2.putText(frame, '{}-{}'.format(point[0], point[1]), (x, y+10), cv2.FONT_HERSHEY_SIMPLEX, .5, (255, 0, 0))
      points.append((x,y))
      #print("Point :", i, "\nConfidence :", confidence, "\nLocation :", (x,y), "\n")
    else:
      points.append(None)

  for connection in point_connections:
    pointA = connection[0]
    pointB = connection[1]
    if(points[pointA] and points[pointB]):  #if connection exist
      cv2.line(frame, points[pointA], points[pointB], (255,0,0), 1)

  if(verify_arms_up(points)==True):
    cv2.putText(frame, 'Exercise Done', (50,200), cv2.FONT_HERSHEY_COMPLEX, 3, (0,0,255))

  cv2_imshow(frame)
  save_video.write(frame)
save_video.release()   