# Video Processing with Object Segmentation and Style Transfer

Antonin Wattel, Pierre Pages, 12/2021

In [None]:
import os
import importlib.util
import sys
import numpy as np
import matplotlib.pyplot as plt
import cv2
from IPython.display import Video

In [None]:
#current working directory (this should be the base directory of the project)
CWD = os.getcwd()

#to import modules
def module_from_file(module_name, file_path):
    spec = importlib.util.spec_from_file_location(module_name, file_path)
    module = importlib.util.module_from_spec(spec)
    spec.loader.exec_module(module)
    return module

# 1. Video Choice
The first thing to do is to choose out video to be processed. The longer it lasts, and the greater the resolution is, the longer the process will take.

## Custom choice 

If you want to use a preprocesed video, skip this part.

### Loading the video

In [None]:
#choose an mp4 video and place it in the unprocessed_videos folder
#set seq_name to the video name (without .mp4 extension)
#avoid spaces in the video name

seq_name = 'skate_sage_elsesser'
video_path = os.path.join(CWD, 'unprocessed_videos/{}.mp4'.format(seq_name))

In [None]:
#visualize video
Video('unprocessed_videos/{}.mp4'.format(seq_name))

### Trimming the video 

The longer the video is, the longer the process, so keeping a video of only a few seconds is good

In [None]:
#choose timecodes for trimming
time1 = '00:00:17.8'
time2 = '00:00:24'
output_path = os.path.join(CWD, 'unprocessed_videos', '{}_trimmed.mp4'.format(seq_name))
#command = '! ffmpeg -i {} -ss {} -to {} -c copy {} -y'.format(video_path, time1, time2, output_path)
command = '! ffmpeg -i {} -ss {} -to {} {} -y'.format(video_path, time1, time2, output_path)
print(command)

In [None]:
%%capture 
#paste the above output and run
! <insert command here>

In [None]:
#visualize trimmed video
Video('unprocessed_videos/{}_trimmed.mp4'.format(seq_name))

### Conversion to image sequence

In [None]:
#now convert the mp4 into a sequence of images, and put this sequence in the appropriate folder, ready to be processed
input_video = output_path
output_sequence = os.path.join(CWD, 'OSVOS-PyTorch/DAVIS-data/DAVIS/JPEGImages/480p/{}'.format(seq_name))
if not os.path.exists(output_sequence):
    os.makedirs(output_sequence)

command = '! ffmpeg  -i {} -qscale:v 2 -start_number 0 {}/%05d.jpg -y'.format(input_video, output_sequence)
print(command)

In [None]:
%%capture
#paste the above output and run
! <insert command here>

### Masking

Now that we have the sequence of images, we need to have a mask of the object we want to segment for the first image

- Method 1 : use external software 

In [None]:
#Manually create the mask of the first image of the sequence with a software (photoshop, gimp...) and save it as 00000.png in the followig folder

mask_path = os.path.join(CWD, 'OSVOS-PyTorch/DAVIS-data/DAVIS/Annotations/480p/{}'.format(seq_name))
if not os.path.exists(mask_path):
    os.makedirs(mask_path)

print(mask_path)

In [None]:
#check it looks good
plt.figure(figsize =(20, 10))
background_image = cv2.imread(os.path.join(output_sequence, '00000.jpg'))
background_image = cv2.cvtColor(background_image, cv2.COLOR_BGR2RGB)
mask_image = cv2.imread(os.path.join(mask_path, '00000.png'))

plt.subplot(1, 2, 1)
plt.imshow(background_image)

plt.subplot(1, 2, 2)
plt.imshow(mask_image)

plt.show()

* Method 2 : Foreground extraction using GrabCut Algorithm

https://docs.opencv.org/4.x/d8/d83/tutorial_py_grabcut.html

to be improved by manually marking the mask image

In [None]:
#play with the reclangle coordinates to have a good mask
#(rq: this will work well will well defined/contrasted foreground/background)

from matplotlib.patches import Rectangle

img =  cv2.imread(os.path.join(output_sequence, '00000.jpg'))
img = cv2.cvtColor(img, cv2.COLOR_BGR2RGB)
mask = np.zeros(img.shape[:2],np.uint8)

bgdModel = np.zeros((1,65),np.float64)
fgdModel = np.zeros((1,65),np.float64)

rect = (430,120,260,320)

fig, ax = plt.subplots(figsize = (20, 20))
plt.gca().add_patch(Rectangle((rect[0], rect[1]), rect[2], rect[3], edgecolor='red', facecolor='none', lw=1))

cv2.grabCut(img,mask,rect,bgdModel,fgdModel,5,cv2.GC_INIT_WITH_RECT)

mask2 = np.where((mask==2)|(mask==0),0,1).astype('uint8')
img = img*mask2[:,:,np.newaxis]*1000

plt.xticks([])  
plt.yticks([])  


ax.imshow(img)
plt.show()

## Preprocessed video

The DAVIS dataset already provides short videos put into sequences of images, as well as the associated masks.

In [None]:
#we can use preprocessed videos from the validation set (choose among the following)

# blackswan, bmx-trees, breakdance, camel, car-roundabout,
# car-shadow, cows, dance-twirl, dog, drift-chicane, 
# drift-straight, goat, horsejump-high, kite-surf,
# libby, motocross-jump, paragliding-launch, parkour, 
# scooter-black, soapbox

seq_name = 'blackswan'

In [None]:
# Make video out of sequence (for demonstration)

data_path = 'OSVOS-PyTorch/DAVIS-data/DAVIS/JPEGImages/480p/{} '.format(seq_name)
out_path = os.path.join(CWD, 'Results/in_videos/{}.mp4'.format(seq_name))
if not os.path.exists(out_path):
    os.makedirs(out_path)

command = '! cd {} & ffmpeg -r 25 -f image2 -s 720*480 -i %05d.jpg -vcodec libx264 -crf 25  -pix_fmt yuv420p -y {}'.format(data_path, out_path)
print(command)


In [None]:
%%capture 
#paste the above output and run
! cd OSVOS-PyTorch/DAVIS-data/DAVIS/JPEGImages/480p/blackswan  & ffmpeg -r 25 -f image2 -s 720*480 -i %05d.jpg -vcodec libx264 -crf 25  -pix_fmt yuv420p -y c:\Users\A1234\Documents\INF573\project\Results/in_videos/blackswan.mp4

In [None]:
Video('Results/in_videos/{}.mp4'.format(seq_name))

# 2. Object segmentation

The object segmentaion is based on the PyTorch implementation of One-Shot Video Object Segmentation (OSVOS)
https://github.com/kmaninis/OSVOS-PyTorch.
<br/>
We use a pretrained parent model, available at https://data.vision.ee.ethz.ch/kmaninis/share/OSVOS/Downloads/models/pth_parent_model.zip.

In [None]:
#import from other files
sys.path.append(os.path.join(CWD, 'OSVOS-Pytorch'))
train_path = os.path.join(CWD, 'OSVOS-Pytorch', 'train_online.py')
train_online = module_from_file("train_online", train_path)

In [None]:
#a few training parameters
#for more parameters, see OSVOS-Pytorch/train_online.py

seq_name=seq_name
save_dir_res= os.path.join(CWD, 'Results', 'masks', seq_name, 'sequences')
nAveGrad=5
nEpochs= 300 * nAveGrad #the more epochs we take, the better the segementation results

train_online.train(seq_name, save_dir_res, nAveGrad, nEpochs)
print('done')

In [None]:
res_path = 'Results/masks/{}/sequences'.format(seq_name)
out_path = os.path.join(CWD, 'Results/in_videos/{}.mp4'.format(seq_name))
if not os.path.exists(out_path):
    os.makedirs(out_path)
command = '! cd {} & ffmpeg -r 25 -f image2 -s 720*480 -i %05d.png -vcodec libx264 -crf 25  -pix_fmt yuv420p -y mask_video.mp4'.format(res_path)
print(command)

In [None]:
%%capture 
#paste the above output and run
! <insert command here>

In [None]:
Video("Results/masks/{}/sequences/mask_video.mp4".format(seq_name))

# 2. Style transfer

This part is based on the PyTorch implementation https://github.com/rrmina/fast-neural-style-pytorch following the style transfer approach outlined in Perceptual Losses for Real-Time Style Transfer and Super-Resolution paper by Justin Johnson, Alexandre Alahi, and Fei-Fei Li, along with the supplementary paper detailing the exact model architecture of the mentioned paper.

In [None]:
#import files
sys.path.append(os.path.join(CWD, 'fast-neural-style-pytorch'))

train_path = os.path.join(CWD, 'fast-neural-style-pytorch', 'train.py')
style_train = module_from_file("train", train_path)

stylize_path = os.path.join(CWD, 'fast-neural-style-pytorch', 'stylize.py')
stylize = module_from_file("stylize", stylize_path)


## Training style transfer network
This step will take some time to run.
You can alternatively use pretrained models (see next step)

We train the models on a portion (2000 images) of the coco2014 dataset, available at https://cocodataset.org/#download.<br/>
The training dataset must be placed in the folder fast-neural-style-pytorch/data. </br>
<br/>
This model is trained on the pretrained vgg model available at https://web.eecs.umich.edu/~justincj/models/vgg16-00b39a1b.pth. <br/>
This model must be placed in fast-neural-style-pytorch/models

Here, we use a TransformerNetwrok (see transformer.py for architecture).
Some other experimental transformer architectures can be tested (see experimental.py)

* Choice of style image

In [None]:
#choose an image to train the style on
#in the folder images, we provide a few images for the style
#To choose your own, choose a jpg image and place it in the fast-neural-style-pytorch/images folder
#careful: too large images will lead to long training times

In [None]:
style_image_name = 'picabia' # (this is an image made with 'Vision of Chaos' software)
style_image_path = os.path.join(CWD, 'fast-neural-style-pytorch', 'images', '{}.jpg'.format(style_image_name))

style_image = cv2.imread(style_image_path)
style_image = cv2.cvtColor(style_image, cv2.COLOR_BGR2RGB)

plt.figure(figsize=(20, 10))
plt.imshow(style_image)
plt.show()

In [None]:
#200 iterations ->

num_epochs = 100
dataset_path = os.path.join(CWD, 'fast-neural-style-pytorch', 'data', 'train2014' )#to fill
style_image_path = os.path.join(CWD, 'fast-neural-style-pytorch', 'images', '{}.jpg'.format(style_image_name))
save_checkpoints_path = os.path.join(CWD, 'fast-neural-style-pytorch', 'models')
save_model_path_final = os.path.join(CWD, 'fast-neural-style-pytorch', 'transforms', '{}.pth'.format(style_image_name))
save_image_path = os.path.join(CWD, 'fast-neural-style-pytorch', 'images/out')
save_model_every = 10
plt_loss = 1
show_images = 1

args = [num_epochs, dataset_path, style_image_path, save_checkpoints_path, save_model_path_final, save_image_path, save_model_every, plt_loss, show_images]
style_train.train(args)

## Pretrained style transfer models
You can choose among the follwowing pretrained styles.


In [None]:
#see README inside fast-neural-style-pytorch/transforms for more pretrained + experimental models

# As seen on https://github.com/rrmina/fast-neural-style-pytorch
# bayanihan, lazy, mosaic, starry, 
# tokyo_ghoul, udnie, wave, mosaic_TransformerResNetwork

# I also trained the following
# multicolor
# picabia
# pointillism


In [None]:
#run this cell to use a pretrained model of your choice
style_image_name = 'wave'

## Styling videos
We style images one by one

In [None]:
style_model_foreground =  None
style_model_background = None

### Foreground

In [None]:
style_model_foreground = 'wave'
style_path_foreground = os.path.join(CWD, 'fast-neural-style-pytorch/transforms/{}.pth'.format(style_model_foreground))
preserve_color = False

In [None]:
content_folder = os.path.join(CWD, 'OSVOS-PyTorch/DAVIS-data/DAVIS/JPEGImages/480p/{}'.format(seq_name))
save_folder = os.path.join(CWD,'Results/stylized/{}/{}'.format(seq_name, style_model_foreground))
#create the folders if they don't exist 

if not os.path.exists(save_folder):
    os.makedirs(save_folder)

if style_model_foreground is not None:
    stylize.stylize_folder_single(style_path_foreground, content_folder , save_folder, preserve_color)

print('done')

In [None]:
#save and play the video

if style_model_foreground is not None:
    res_path = save_folder
    command = '! cd {} & ffmpeg -r 25 -f image2 -s 720*480 -i %05d.jpg -vcodec libx264 -crf 25  -pix_fmt yuv420p -y video.mp4'.format(res_path)
    print(command)


In [None]:
%%capture
#paste previous line here
! <insert command here>

### Background

In [None]:
style_model_background = 'multicolor'
style_path_foreground = os.path.join(CWD, 'fast-neural-style-pytorch/transforms/{}.pth'.format(style_model_background))
preserve_color = False

In [None]:
content_folder = os.path.join(CWD, 'OSVOS-PyTorch/DAVIS-data/DAVIS/JPEGImages/480p/{}'.format(seq_name))
save_folder = os.path.join(CWD,'Results/stylized/{}/{}'.format(seq_name, style_model_background))
#create the folders if they don't exist 
if not os.path.exists(save_folder):
    os.makedirs(save_folder)

if style_model_background is not None:
    stylize.stylize_folder_single(style_path_foreground, content_folder , save_folder, preserve_color)
print('done')

In [None]:
#save and play the video

if style_model_background is not None:
    res_path = save_folder
    command = '! cd {} & ffmpeg -r 25 -f image2 -s 720*480 -i %05d.jpg -vcodec libx264 -crf 25  -pix_fmt yuv420p -y video.mp4'.format(res_path)
    print(command)

In [None]:
%%capture
#paste previous line here
! cd c:\Users\A1234\Documents\INF573\project\Results/stylized/skate_sage_elsesser/multicolor & ffmpeg -r 25 -f image2 -s 720*480 -i %05d.jpg -vcodec libx264 -crf 25  -pix_fmt yuv420p -y video.mp4

# 3. Merging videos

In [None]:
import os

def mix_images(background_image_path, mask_path, foreground_path, out, mask_only):
    
    background_image = cv2.imread(background_image_path)
    background_image = background_image.astype(float)#[:, :-2, :] 
    print('background: ', background_image.shape)

    mask  = cv2.imread(mask_path) 
    print('mask: ', mask.shape)
    mask = cv2.GaussianBlur(mask, (3, 3), 0)
    mask = mask.astype(float)/255

    foreground  = cv2.imread(foreground_path) 
    foreground = foreground.astype(float)[:, :-2, :] #careful with dimensions
    print('foreground: ', foreground.shape)
    
    tmp1 =  cv2.multiply(mask, foreground)
    tmp2 = cv2.multiply(1.0 - mask, background_image)

    if mask_only:
        out_image = tmp1
    else:
        outImage = cv2.add(tmp1, tmp2)

    cv2.imwrite(out, outImage)

In [None]:
def mix_images_batch(background_path, mask_path, foreground_path, out_folder, mask_only=False):
    
    images = [img for img in os.listdir(background_path) if img.endswith(".jpg") or img.endswith(".png")]
    print(images)
    for image_name in images:
        background = os.path.join(background_path, image_name)
        print(background)
        foreground = os.path.join(foreground_path, image_name)
        print(foreground)
        mask = os.path.join(mask_path, image_name[:-4]+'.png')
        print(mask)
        #print('mask:', mask)
        out = os.path.join(out_folder, image_name[:-4]+'.png')

        mix_images(background, mask, foreground, out, mask_only)
    

In [None]:
original_path =  os.path.join(CWD, 'OSVOS-PyTorch/DAVIS-data/DAVIS/JPEGImages/480p/{}'.format(seq_name))
background_path = original_path
foreground_path = original_path
#style_model_foreground = None
style_model_background = None


if style_model_background is not None:
    background_path = os.path.join(CWD,'Results/stylized/{}/{}'.format(seq_name, style_model_background))

if style_model_foreground is not None:
    foreground_path = os.path.join(CWD,'Results/stylized/{}/{}'.format(seq_name, style_model_foreground))

mask_path = os.path.join(CWD, 'Results/masks/{}/sequences'.format(seq_name))
out_path = os.path.join(CWD, 'Results/final/{}/b_{}+f_{}/sequences'.format(seq_name, style_model_background, style_model_foreground)) #might want to add the background and foreground name

if not os.path.exists(out_path):
    os.makedirs(out_path)

mix_images_batch(background_path, mask_path, foreground_path, out_path)
print('done')

In [None]:
command = '! cd {} & ffmpeg -r 25 -f image2 -s 720*480 -i %05d.png -vcodec libx264 -crf 25  -pix_fmt yuv420p -y final_video.mp4'.format(out_path)
print(command)

In [None]:
%%capture 
#paste the above output and run
! <insert command here>

In [None]:
from IPython.display import Video
Video('Results/final/{}/b_{}+f_{}/sequences/final_video.mp4'.format(seq_name, style_model_background, style_model_foreground))