In [1]:
# import key libraries
from PIL import Image, ImageDraw
import torch, torchvision
import detectron2
from time import time
from detectron2.utils.logger import setup_logger
setup_logger()

# import some common libraries
import numpy as np
import cv2
import random
import matplotlib.pyplot as plt
from scipy import misc
import scipy.ndimage

# import some common detectron2 utilities
from detectron2 import model_zoo
from detectron2.engine import DefaultPredictor
from detectron2.config import get_cfg
from detectron2.utils.visualizer import Visualizer
from detectron2.data import MetadataCatalog

# import our utilities
import utils.filters as filters

In [2]:
def collect_mp4_frames(link):
    # create video capture instance
    vidcap = cv2.VideoCapture(link)
    success,image = vidcap.read()
    frames = [image]
    # iterate over video frames, save each in a list
    while success:
        success,image = vidcap.read()
        frames.append(image)
    return frames[:-1]

def create_predictor():
    # create Detectron2 config and Default Predictor to run image inference
    cfg = get_cfg()
    # add project-specific config here if not running a model in Detectron2's core library
    cfg.merge_from_file(model_zoo.get_config_file("COCO-InstanceSegmentation/mask_rcnn_R_50_FPN_3x.yaml"))
    cfg.MODEL.ROI_HEADS.SCORE_THRESH_TEST = .4
    cfg.MODEL.DEVICE = 'cpu'
    # find a model from detectron2's model zoo.
    cfg.MODEL.WEIGHTS = model_zoo.get_checkpoint_url("COCO-InstanceSegmentation/mask_rcnn_R_50_FPN_3x.yaml")
    # build predictor
    predictor = DefaultPredictor(cfg)
    return predictor

def blur_people(img, predictor):
    # image in tensor form
    im = np.array(img)
    # image dimensions
    height, width, channels = im.shape
    # run inference on the image
    outputs = predictor(im)
    # get indices of predicted instances that are labelled as people
    person_idx = (outputs["instances"].pred_classes==0)
    # count of 'people' instances
    person_count = np.sum(np.array(person_idx))
    # get image masks corresponding to each identified person
    person_masks_tensor = outputs["instances"].pred_masks[person_idx,:,:]
    # create PIL image masks
    person_masks = Image.new('L', im.shape[:-1], 0)
    draw = ImageDraw.Draw(person_masks)    
    # create mask and add peoples' shape to it
    sharp_mask = np.zeros((height, width, channels))
    for i in range(person_count):
        sharp_mask[person_masks_tensor[i,:,:]==1]=1
    # filtered image
    result = (np.multiply(filter_image(img),sharp_mask)+img).astype(dtype=np.uint8)            
    return result

def pixelate_people(img, predictor):
    img = Image.fromarray(img)
    im = np.array(img)
    height, width, channels = im.shape
    outputs = predictor(im)
    
    person_idx = (outputs["instances"].pred_classes == 0)
    person_count = np.sum(np.array(person_idx))
    person_masks_tensor = outputs["instances"].pred_masks[person_idx, :, :]
    
    person_masks = Image.new('L', im.shape[:-1], 0)
    draw = ImageDraw.Draw(person_masks)
    
    sharp_mask = np.zeros((height, width, channels))
    for i in range(person_count):
        sharp_mask[person_masks_tensor[i,:,:]==1]=1
    
    mask = sharp_mask.astype(np.bool)
    result = (filters.contour(img) * mask + img * ~mask).astype(dtype=np.uint8)
        
    return result
                          
def filter_image(sharp_image):
    # does what it says on the tin
    blurred_image = scipy.ndimage.filters.sobel(sharp_image, mode='constant')
    blurred_image = scipy.ndimage.filters.prewitt(blurred_image, mode='reflect')
    return blurred_image

In [3]:
link = 'video/choreoshort.mp4'
frames = collect_mp4_frames(link)

# use default detectron2 predictor
predictor = create_predictor()

In [5]:
blurred_frames = []
for i, frame in enumerate(frames):
    if i % 100 == 0: 
        print ( f'{i/len(frames)*100}% done' )
    blurred_frames.append(pixelate_people(frame, predictor))

0.0% done
13.245033112582782% done
26.490066225165563% done
39.735099337748345% done
52.980132450331126% done
66.22516556291392% done
79.47019867549669% done
92.71523178807946% done


In [10]:
folder_save = 'video/contour_choreo'
video_name = 'contour_choreo4.avi'

for i, frame in enumerate(blurred_frames):
    Image.fromarray(frame).save(f'{folder_save}/{i}.png')
    if i % 100 == 0:
        print(i)

0
100
200
300
400
500
600
700


In [11]:
import cv2
import os

images = [img for img in os.listdir(folder_save) if img.endswith(".png")]
images.sort()
frame = cv2.imread(os.path.join(folder_save, images[0]))
print(frame.shape)

height, width, layers = (720,1280,3)

fourcc = cv2.VideoWriter_fourcc(*'mp4v') 
video = cv2.VideoWriter(video_name, fourcc, 30, (width,height))

for i in range(len(frames)):
    img_name =  f'{i}.png'
    video.write(cv2.imread(os.path.join(folder_save, img_name)))
    if i % 100 == 0:
        print(i)
    
cv2.destroyAllWindows()
video.release()

(720, 1280, 3)
0
100
200
300
400
500
600
700
