### Detected Mask:

In this notebook, a code was investigated and tested which allows extracting the masks of each video clip. This way the ML model doesn't see the whole clip, but only the subject and background masks. This helps the model receive less information and not confuse the classes. Pytorch and Panopticapi are used.

The results of this script can be found at:

Video: video_example/video1.mp4

Result: video_example/video1_result.mp4

________

Video: video_example/video2.mp4

Result: video_example/video2_result.mp4

In [4]:
! pip install torch==1.10.0+cu111 torchvision==0.11.0+cu111 torchaudio==0.10.0 -f \
    https://download.pytorch.org/whl/torch_stable.html

Defaulting to user installation because normal site-packages is not writeable
Looking in links: https://download.pytorch.org/whl/torch_stable.html


In [5]:
from PIL import Image
import requests
import io
import math
import matplotlib.pyplot as plt
%config InlineBackend.figure_format = 'retina'

import torch
from torch import nn
from torchvision.models import resnet50
import torchvision.transforms as T
import numpy
torch.set_grad_enabled(False);

! pip install git+https://github.com/cocodataset/panopticapi.git
! pip install seaborn

import panopticapi
from panopticapi.utils import id2rgb, rgb2id

import itertools
import seaborn as sns

Matplotlib created a temporary config/cache directory at /tmp/matplotlib-7us6nuzm because the default path (/home/arubattino/.config/matplotlib) is not a writable directory; it is highly recommended to set the MPLCONFIGDIR environment variable to a writable directory, in particular to speed up the import of Matplotlib and to better support multiprocessing.


Defaulting to user installation because normal site-packages is not writeable
Collecting git+https://github.com/cocodataset/panopticapi.git
  Cloning https://github.com/cocodataset/panopticapi.git to /tmp/pip-req-build-rv1wwa95
  Running command git clone --filter=blob:none -q https://github.com/cocodataset/panopticapi.git /tmp/pip-req-build-rv1wwa95
  Resolved https://github.com/cocodataset/panopticapi.git to commit 7bb4655548f98f3fedc07bf37e9040a992b054b0
  Preparing metadata (setup.py) ... [?25ldone
Defaulting to user installation because normal site-packages is not writeable


In [6]:
# These are the COCO classes
CLASSES = [
    'N/A', 'person', 'bicycle', 'car', 'motorcycle', 'airplane', 'bus',
    'train', 'truck', 'boat', 'traffic light', 'fire hydrant', 'N/A',
    'stop sign', 'parking meter', 'bench', 'bird', 'cat', 'dog', 'horse',
    'sheep', 'cow', 'elephant', 'bear', 'zebra', 'giraffe', 'N/A', 'backpack',
    'umbrella', 'N/A', 'N/A', 'handbag', 'tie', 'suitcase', 'frisbee', 'skis',
    'snowboard', 'sports ball', 'kite', 'baseball bat', 'baseball glove',
    'skateboard', 'surfboard', 'tennis racket', 'bottle', 'N/A', 'wine glass',
    'cup', 'fork', 'knife', 'spoon', 'bowl', 'banana', 'apple', 'sandwich',
    'orange', 'broccoli', 'carrot', 'hot dog', 'pizza', 'donut', 'cake',
    'chair', 'couch', 'potted plant', 'bed', 'N/A', 'dining table', 'N/A',
    'N/A', 'toilet', 'N/A', 'tv', 'laptop', 'mouse', 'remote', 'keyboard',
    'cell phone', 'microwave', 'oven', 'toaster', 'sink', 'refrigerator', 'N/A',
    'book', 'clock', 'vase', 'scissors', 'teddy bear', 'hair drier',
    'toothbrush'
]

# Detectron2 uses a different numbering scheme, we build a conversion table
coco2d2 = {}
count = 0
for i, c in enumerate(CLASSES):
  if c != "N/A":
    coco2d2[i] = count
    count+=1

# standard PyTorch mean-std input image normalization
transform = T.Compose([
    T.Resize(800),
    T.ToTensor(),
    T.Normalize([0.485, 0.456, 0.406], [0.229, 0.224, 0.225])
])


model, postprocessor = torch.hub.load('facebookresearch/detr', 'detr_resnet101_panoptic', pretrained=True, return_postprocessor=True, num_classes=250)
model.eval();

Using cache found in /home/arubattino/.cache/torch/hub/facebookresearch_detr_main


In [7]:
def mask_frame(path):

  ''' 
      mask_frame() identifies the different masks of an image and replaces it with the original stored image.
      path: str: image address to identify the skins (.jpg / .png / etc) 
  '''
  
  im = Image.open(path)

  # mean-std normalize the input image (batch-size: 1)
  img = transform(im).unsqueeze(0)
  out = model(img)

  # the post-processor expects as input the target size of the predictions (which we set here to the image size)
  result = postprocessor(out, torch.as_tensor(img.shape[-2:]).unsqueeze(0))[0]

  palette = itertools.cycle(sns.color_palette())

  # The segmentation is stored in a special-format png
  panoptic_seg = Image.open(io.BytesIO(result['png_string']))
  panoptic_seg = numpy.array(panoptic_seg, dtype=numpy.uint8).copy()
  # We retrieve the ids corresponding to each mask
  panoptic_seg_id = rgb2id(panoptic_seg)

  # Finally we color each mask individually
  panoptic_seg[:, :, :] = 0
  for id in range(panoptic_seg_id.max() + 1):
    panoptic_seg[panoptic_seg_id == id] = numpy.asarray(next(palette)) * 255
  plt.figure(figsize=(25,15));
  plt.imshow(panoptic_seg);
  plt.axis('off');
  plt.savefig(path);

In [None]:
# Script to convert video to video maskara

import cv2
import os

def video_reverse(pathing, save, name):

    '''pathing: str: direction of the video to invert
       save:    str: address to save the generated video
       name:    str: file name '''
    
    def walkdir(folder):
        ''' Directories function '''
        for dirpath, _, files in os.walk(folder):
            for filename in files:
                yield (dirpath, filename)


    # Generate the video frames in the probe folder
    cap = cv2.VideoCapture(pathing) 
    check , vid = cap.read() 
    counter = 0
    check = True
    frame_list = [] 
    
    while(check == True): 
        
        cv2.imwrite("./probe/0000%d.jpg" %counter , vid) 
        check , vid = cap.read() 
        
        frame_list.append(vid) 
        counter += 1
    
    frame_list.pop()

    # ------------------------------------

    # Empty array
    img_array = []

    # Generate list with frames path
    f = r'./probe/'
    li=[]
    for dir,filename in walkdir(f):
        li.append(dir+filename)

    # Generate a new list in reverse order
    list_in_reverse=[]    
    for q in range(len(li)):
        
        mas=0
        for j in range(len(li)):
            n=int(li[j][len(li[j])-8:len(li[j])-4])
            
            if n >= mas:
                mas = n
                r = li[j]
        list_in_reverse.append(r)
        li.remove(r)
    # To reverse the order or not    
    list_in_reverse.reverse()

    #---------- Frames to mask -----------
    for dir, filename in walkdir('./probe'):
        pathv=(dir+"/"+filename)
        mask_frame(pathv)
    # --------------------------------------

    # "For" to read images from a directory
    for p in list_in_reverse:
        path = p
        img = cv2.imread(path)
        img_array.append(img)

    # Calculate the size of the last image height and width
    height, width  = img.shape[:2]
    # "name" is the name of the file and "know" the directory, everything is a "str"
    # "24" indicates the frames per second of the video
    video = cv2.VideoWriter(save + name ,cv2.VideoWriter_fourcc(*'mp4v'), 24,(width,height))

    # "For" to save frames in a video
    for i in range(len(img_array)):
        video.write(img_array[i])

    video.release()  

    # Delete the frames from the probe folder
    for dir, filename in walkdir('./probe'):
        fol=str(dir+'/'+filename)
        os.remove(fol)
        
        
video_reverse('tt4275910_shot_0001.mp4', './', 'prueba.mp4');