**DATASET CREATION**

---



**Platform**: Colab

**Requirement**: GPU

**Written on**: 14 September 2021

**Tested on**: 22 September 2021

**Author**: A.S. Faraz Ahmed

**Description**: 

> Generates dataset for Pose classification

# Record Video

**1. Choose a Pose**
<figure>
<center>
<img src='https://raw.githubusercontent.com/fxrarz/Research/main/Real_Time_Motion_Control_Of_A_Humanoid_Robot_Using_Deep_Learning/Asset/human-pose1.jpg' height="350" width="420" />
<figcaption>Arms Streched</figcaption></center>
</figure>

**2. Record multiple video for that pose**
<figure>
<center>
<img src='https://raw.githubusercontent.com/fxrarz/Research/main/Real_Time_Motion_Control_Of_A_Humanoid_Robot_Using_Deep_Learning/Asset/pose.gif' height="350" width="420" />
</center>
</figure>
(Change background, dress, person, camera angle,  even video size settings)

**3. Create a folder of pose name and paste the video**
(Folder name is the class name)

**4. Repeat Step 1 to 3 till all required pose are obtained**

**5. Zip the folders using .tar extension**

**5. Upload the zip file to Goodle Drive and remember the path**

# Extract Image from Video

Remove existing file

In [None]:
import os
import shutil
os.chdir('/content')
if "sample_data" in os.listdir():
  shutil.rmtree('sample_data/')

Mount Google drive

In [None]:
from google.colab import drive
drive.mount('/gdrive')

Mounted at /gdrive


Copy files from Google drive to Colab

In [None]:
%%bash
gdrive_location="/gdrive/MyDrive/dataset/Videos.tar.gz"
cp $gdrive_location '/content/'
#Extract zip
tar -xvzf "Videos.tar.gz"
#Remove Zip
rm Videos.tar.gz
mv Videos temp

Extract images from videos

In [None]:
import os
import cv2

path = "/content/temp"
print('Available folders are: \n', os.listdir(path))

for folder in os.listdir(path):
  video_folder = os.path.join(path,folder)
  save_folder = video_folder.replace("temp","raw")
  os.makedirs(save_folder)
  print(save_folder)

  files = os.listdir(video_folder)
  i = 0
  for f in files:
    f = os.path.join(video_folder,f)
    
    cap = cv2.VideoCapture(f)
    print("Original Video File",f)
    print("Save Folder location",save_folder)
    
    while 1:
      i+=1
      _,frame = cap.read()
      print(_)
      if  _:
        name = save_folder + "/" + str(i) + ".jpg"
        print(name)
        frame = cv2.rotate(frame,cv2.cv2.ROTATE_90_CLOCKWISE)
        cv2.imwrite(name,frame)
      else:
        i-=1
        break

[1;30;43mStreaming output truncated to the last 5000 lines.[0m
/content/raw/down/731.jpg
True
/content/raw/down/732.jpg
True
/content/raw/down/733.jpg
True
/content/raw/down/734.jpg
True
/content/raw/down/735.jpg
True
/content/raw/down/736.jpg
True
/content/raw/down/737.jpg
True
/content/raw/down/738.jpg
True
/content/raw/down/739.jpg
True
/content/raw/down/740.jpg
True
/content/raw/down/741.jpg
True
/content/raw/down/742.jpg
True
/content/raw/down/743.jpg
True
/content/raw/down/744.jpg
True
/content/raw/down/745.jpg
True
/content/raw/down/746.jpg
True
/content/raw/down/747.jpg
True
/content/raw/down/748.jpg
True
/content/raw/down/749.jpg
True
/content/raw/down/750.jpg
True
/content/raw/down/751.jpg
True
/content/raw/down/752.jpg
True
/content/raw/down/753.jpg
True
/content/raw/down/754.jpg
True
/content/raw/down/755.jpg
True
/content/raw/down/756.jpg
True
/content/raw/down/757.jpg
True
/content/raw/down/758.jpg
True
/content/raw/down/759.jpg
True
/content/raw/down/760.jpg
True
/cont

In [None]:
%%bash
cd /content/
rm -r temp

# Pose Estimation

COCO & TF Pose Estimation Models are Available

## Using COCO Model

Change OpenCV Version

In [None]:
!pip install opencv-python==4.3.0.38

Download COCO Models

In [None]:
%%bash
wget https://raw.githubusercontent.com/spmallick/learnopencv/master/OpenPose/getModels.sh
sh getModels.sh
cd pose/coco/
wget https://raw.githubusercontent.com/spmallick/learnopencv/master/OpenPose/pose/coco/pose_deploy_linevec.prototxt
cd ../../
rm getModels.sh

Set GPU

In [None]:
import cv2
import time
import numpy as np
import random
import os
from matplotlib import pyplot as plt

protoFile = "pose/coco/pose_deploy_linevec.prototxt"
weightsFile = "pose/coco/pose_iter_440000.caffemodel"
nPoints = 18
POSE_PAIRS = [ [1,0],[1,2],[1,5],[2,3],[3,4],[5,6],[6,7],[1,8],[8,9],[9,10],[1,11],[11,12],[12,13],[0,14],[0,15],[14,16],[15,17]]
net = cv2.dnn.readNetFromCaffe(protoFile, weightsFile)

device = "gpu"
if device == "cpu":
    net.setPreferableBackend(cv2.dnn.DNN_TARGET_CPU)
    print("Using CPU device")
elif device == "gpu":
    net.setPreferableBackend(cv2.dnn.DNN_BACKEND_CUDA)
    net.setPreferableTarget(cv2.dnn.DNN_TARGET_CUDA)
    print("Using GPU device")

Initalize Pose Estimation script

In [None]:
path = "/content/raw"
root = os.listdir(path)
print('Available folders are: \n', os.listdir(path))
for folder in root:
  image_folder = os.path.join(path,folder)
  print(image_folder)
  save_folder = image_folder.replace("raw","coco")
  os.makedirs(save_folder)
  num = 0
  while 1:
    num += 1
    image_file = image_folder + "/" + str(num) + ".jpg"
    print(image_file)
    if os.path.isfile(image_file):
      print("read path",image_file)
      savepath = image_file.replace("raw","coco")
      print("save path",savepath)      
      frame = cv2.imread(image_file)
      frameCopy = np.copy(frame)
      frameWidth = frame.shape[1]
      frameHeight = frame.shape[0]
      threshold = 0.1
      sketch = np.zeros((frameHeight,frameWidth,3), np.uint8)
      inWidth = 368
      inHeight = 368
      inpBlob = cv2.dnn.blobFromImage(frame, 1.0 / 255, (inWidth, inHeight),(0, 0, 0), swapRB=False, crop=False)
      net.setInput(inpBlob)
      output = net.forward()
      H = output.shape[2]
      W = output.shape[3]
      points = []
      for i in range(nPoints):
          # confidence map of corresponding body's part.
          probMap = output[0, i, :, :]
          # Find global maxima of the probMap.
          minVal, prob, minLoc, point = cv2.minMaxLoc(probMap)
          # Scale the point to fit on the original image
          x = (frameWidth * point[0]) / W
          y = (frameHeight * point[1]) / H
          if prob > threshold : 
              points.append((int(x), int(y)))
          else :
              points.append(None)
      for pair in POSE_PAIRS:
          partA = pair[0]
          partB = pair[1]
          if points[partA] and points[partB]:
              cv2.line(sketch, points[partA], points[partB], (random.randint(100,255), random.randint(150,255), random.randint(0,255)), 10)
      plt.imshow(sketch)
      frame = sketch
      row = []
      indexs = []
      for i in range(frame.shape[0]):
          index = 0
          for x in frame[i]:
              if x[0] != 0 or x[1] != 0 or x[2] != 0:
                  row.append(i)
                  indexs.append(index)
                  break
              index+=1
      sx = min(indexs)
      sy = row[0]
      row = []
      indexs = []
      for i in range(frame.shape[0]):
          index = 0
          for x in frame[i]:
              if x[0] != 0 or x[1] != 0 or x[2] != 0:
                  row.append(i)
                  indexs.append(index)
              index+=1
      dx = max(indexs)
      dy = row[-1]
      try:
          cropped_image = frame[sy:dy, sx:dx]
          cv2.imwrite(savepath,cropped_image)
          plt.imshow(cropped_image)
      except Exception as e:
          print(e)
    else:
      break
    

## Using TensorFLow Model

Change OpenCV version

In [None]:
!pip install -q imageio
!pip install -q opencv-python
!pip install -q git+https://github.com/tensorflow/docs

  Building wheel for tensorflow-docs (setup.py) ... [?25l[?25hdone


Import required libraries

In [None]:
import tensorflow as tf
import tensorflow_hub as hub
from tensorflow_docs.vis import embed
import numpy as np
import cv2
import os
import time
import random

# Import matplotlib libraries
from matplotlib import pyplot as plt
from matplotlib.collections import LineCollection
import matplotlib.patches as patches

# Some modules to display an animation using imageio.
import imageio
from IPython.display import HTML, display

Download Pose Estimation Model

In [None]:
KEYPOINT_DICT = {
    'nose': 0,
    'left_eye': 1,
    'right_eye': 2,
    'left_ear': 3,
    'right_ear': 4,
    'left_shoulder': 5,
    'right_shoulder': 6,
    'left_elbow': 7,
    'right_elbow': 8,
    'left_wrist': 9,
    'right_wrist': 10,
    'left_hip': 11,
    'right_hip': 12,
    'left_knee': 13,
    'right_knee': 14,
    'left_ankle': 15,
    'right_ankle': 16
}

# Maps bones to a matplotlib color name.
KEYPOINT_EDGE_INDS_TO_COLOR = {
    (0, 1): 'm',
    (0, 2): 'c',
    (1, 3): 'm',
    (2, 4): 'c',
    (0, 5): 'm',
    (0, 6): 'c',
    (5, 7): 'm',
    (7, 9): 'm',
    (6, 8): 'c',
    (8, 10): 'c',
    (5, 6): 'y',
    (5, 11): 'm',
    (6, 12): 'c',
    (11, 12): 'y',
    (11, 13): 'm',
    (13, 15): 'm',
    (12, 14): 'c',
    (14, 16): 'c'
}

if 'pose_model' in os.listdir('/content/'):
  pose_estimation = tf.saved_model.load('pose_model')
else:
  import tensorflow_hub as hub
  pose_estimation = hub.load("https://tfhub.dev/google/movenet/singlepose/lightning/4")

input_size = 192
# Create a object
model = pose_estimation.signatures['serving_default']


frameHeight = 1440 
frameWidth = 1440
sketch = np.zeros((frameHeight,frameWidth,3), np.uint8)
cv2.imwrite("/content/bg.jpg",sketch)

bg_image = tf.io.read_file('/content/bg.jpg')
bg_image = tf.image.decode_jpeg(bg_image)


# Expand Dimensions
display_image_with_black_bg = tf.expand_dims(bg_image, axis=0)
# Resize and pad the image to keep the aspect ratio and fit the expected size.
display_image_with_black_bg = tf.cast(tf.image.resize_with_pad(display_image_with_black_bg, 1280, 1280), dtype=tf.int32)
# Draw prediction on display_image_bg
display_image_with_black_bg = np.squeeze(display_image_with_black_bg.numpy(), axis=0)

Initalize Pose Estimation script

In [None]:
bg_image = tf.io.read_file('/content/bg.jpg')
bg_image = tf.image.decode_jpeg(bg_image)
input_image_bg = tf.expand_dims(bg_image, axis=0)
input_image_bg = tf.image.resize_with_pad(input_image_bg, input_size, input_size)

path = "/content/raw"
root = os.listdir(path)
print('Available folders are: \n', os.listdir(path))
for folder in root:
  image_folder = os.path.join(path,folder)
  print(image_folder)
  save_folder = image_folder.replace("raw","tf")
  os.makedirs(save_folder)
  num = 0
  while 1:
    num += 1
    image_file = image_folder + "/" + str(num) + ".jpg"
    print(image_file)
    if os.path.isfile(image_file):
      print("read path",image_file)
      savepath = image_file.replace("raw","tf")
      print("save path",savepath)      
      
      # Load the input image.
        # Read as io
      image = tf.io.read_file(image_file)
        # Decode io
      image = tf.image.decode_jpeg(image)
        # Expand Dimensions
      input_image = tf.expand_dims(image, axis=0)
        # Resize and pad the image to keep the aspect ratio and fit the expected size.
      input_image = tf.image.resize_with_pad(input_image, input_size, input_size)

      # Run model inference.
        # SavedModel format expects tensor type of int32.
      input_image = tf.cast(input_image, dtype=tf.int32)
      # Run model inference.
      outputs = model(input_image)
      # Output is a [1, 1, 17, 3] tensor.
      keypoints_with_scores = outputs['output_0'].numpy()

      # Visualize the predictions with image_bg.
      height, width, channel = display_image_with_black_bg.shape
      aspect_ratio = float(width) / height
      fig, ax = plt.subplots(figsize=(12 * aspect_ratio, 12))
          # To remove the huge white borders
      fig.tight_layout(pad=-2.50)
          # fig.tight_layout(pad=0)
      ax.margins(0)
      ax.set_yticklabels([])
      ax.set_xticklabels([])
      plt.axis('off')
      im = ax.imshow(display_image_with_black_bg)
      line_segments = LineCollection([], linewidths=(10), linestyle='solid')
      ax.add_collection(line_segments)
          # Turn off tick labels
      scat = ax.scatter([], [], s=60, color='#FF1493', zorder=3)

      # _keypoints_and_edges_for_display
      keypoint_threshold=0.11
      keypoints_all = []
      keypoint_edges_all = []
      edge_colors = []
      num_instances, _, _, _ = keypoints_with_scores.shape
      for idx in range(num_instances):
        kpts_x = keypoints_with_scores[0, idx, :, 1]
        kpts_y = keypoints_with_scores[0, idx, :, 0]
        kpts_scores = keypoints_with_scores[0, idx, :, 2]
        kpts_absolute_xy = np.stack([width * np.array(kpts_x), height * np.array(kpts_y)], axis=-1)
        kpts_above_thresh_absolute = kpts_absolute_xy[kpts_scores > keypoint_threshold, :]
        keypoints_all.append(kpts_above_thresh_absolute)
        for edge_pair, color in KEYPOINT_EDGE_INDS_TO_COLOR.items():
          if (kpts_scores[edge_pair[0]] > keypoint_threshold and kpts_scores[edge_pair[1]] > keypoint_threshold):
            x_start = kpts_absolute_xy[edge_pair[0], 0]
            y_start = kpts_absolute_xy[edge_pair[0], 1]
            x_end = kpts_absolute_xy[edge_pair[1], 0]
            y_end = kpts_absolute_xy[edge_pair[1], 1]
            line_seg = np.array([[x_start, y_start], [x_end, y_end]])
            keypoint_edges_all.append(line_seg)
            edge_colors.append(color)
      if keypoints_all:
        keypoint_locs = np.concatenate(keypoints_all, axis=0)
      else:
        keypoint_locs = np.zeros((0, 17, 2))
      if keypoint_edges_all:
        keypoint_edges = np.stack(keypoint_edges_all, axis=0)
      else:
        keypoint_edges = np.zeros((0, 2, 2))
      line_segments.set_segments(keypoint_edges)
      line_segments.set_color(edge_colors)
      if keypoint_edges.shape[0]:
        line_segments.set_segments(keypoint_edges)
        line_segments.set_color(edge_colors)
      if keypoint_locs.shape[0]:
        scat.set_offsets(keypoint_locs)
        
      fig.canvas.draw()
      image_from_plot = np.frombuffer(fig.canvas.tostring_rgb(), dtype=np.uint8)
      output_overlay_on_bg = image_from_plot.reshape(fig.canvas.get_width_height()[::-1] + (3,))
      plt.close(fig)
      frame = output_overlay_on_bg


      row = []
      indexs = []
      for i in range(frame.shape[0]):
          index = 0
          for x in frame[i]:
              if x[0] != 0 or x[1] != 0 or x[2] != 0:
                  row.append(i)
                  indexs.append(index)
                  break
              index+=1
      sx = min(indexs)
      sy = row[0]
      row = []
      indexs = []
      for i in range(frame.shape[0]):
          index = 0
          for x in frame[i]:
              if x[0] != 0 or x[1] != 0 or x[2] != 0:
                  row.append(i)
                  indexs.append(index)
              index+=1
      dx = max(indexs)
      dy = row[-1]
      try:
          cropped_image = frame[sy:dy, sx:dx]
          cv2.imwrite(savepath,cropped_image)
      except Exception as e:
          print(e)
    else:
      break
    

Available folders are: 
 ['down', 'full', '.ipynb_checkpoints']
/content/raw/down
/content/raw/down/1.jpg
read path /content/raw/down/1.jpg
save path /content/tf/down/1.jpg
/content/raw/down/2.jpg
read path /content/raw/down/2.jpg
save path /content/tf/down/2.jpg
/content/raw/down/3.jpg
read path /content/raw/down/3.jpg
save path /content/tf/down/3.jpg
/content/raw/down/4.jpg
read path /content/raw/down/4.jpg
save path /content/tf/down/4.jpg
/content/raw/down/5.jpg
read path /content/raw/down/5.jpg
save path /content/tf/down/5.jpg
/content/raw/down/6.jpg
read path /content/raw/down/6.jpg
save path /content/tf/down/6.jpg
/content/raw/down/7.jpg
read path /content/raw/down/7.jpg
save path /content/tf/down/7.jpg
/content/raw/down/8.jpg
read path /content/raw/down/8.jpg
save path /content/tf/down/8.jpg
/content/raw/down/9.jpg
read path /content/raw/down/9.jpg
save path /content/tf/down/9.jpg
/content/raw/down/10.jpg
read path /content/raw/down/10.jpg
save path /content/tf/down/10.jpg
/cont

# Upload Dataset

Upload dataset to Google Drive

In [None]:
!tar -zcvf /content/classification_dataset.tar.gz /content/raw content/coco /content/pose
!mv /content/classification_dataset.tar.gz /gdrive/MyDrive/dataset/