<a href="https://colab.research.google.com/github/fzantalis/colab_collection/blob/master/alpha_matte_mask_from_video.ipynb" target="_parent"><img src="https://colab.research.google.com/assets/colab-badge.svg" alt="Open In Colab"/></a>

# This notebook takes a video input and creates an inverted alpha masked video as an output. Due to GPU memory limitations, please use up to 480p videos.

In [None]:
#@markdown **2. Upload your video**
from google.colab import files 
uploaded = files.upload() 
for name, data in uploaded.items():
  with open('video_file.mp4', 'wb') as f:
    f.write(data)

%mkdir -p input
%mkdir -p output


In [None]:
#@title **Break the video to frames**
import cv2

# Open the video file
video = cv2.VideoCapture("video_file.mp4")

# Get the video frames per second (fps)
fps = video.get(cv2.CAP_PROP_FPS)

# Get the total number of frames in the video
num_frames = int(video.get(cv2.CAP_PROP_FRAME_COUNT))

# Create a variable to keep track of the current frame number
frame_num = 0

# Iterate through each frame in the video
while True:
    # Read the next frame
    ret, frame = video.read()

    # Break the loop if we've reached the end of the video
    if not ret:
        break

    # Save the current frame as an image
    filename = f"input/frame_{frame_num:04}.jpg"
    cv2.imwrite(filename, frame)

    # Increment the frame number
    frame_num += 1

# Release the video capture object
video.release()

In [None]:
#@title **Display a sample frame**
from PIL import Image
import matplotlib.pyplot as plt
import torch
import cv2

img = cv2.imread("input/frame_0001.jpg", cv2.IMREAD_COLOR)
plt.imshow(img); plt.show()


In [None]:
#@title **Define helper functions and download segmentation model deeplabv3_resnet101**
from google.colab import files
import torchvision.transforms as T
import numpy as np

# Define the helper function
def decode_segmap(image, nc=21):
  
  label_colors = np.array([(0, 0, 0),  # 0=background
               # 1=aeroplane, 2=bicycle, 3=bird, 4=boat, 5=bottle
               (0, 0, 0), (0, 0, 0), (0, 0, 0), (0, 0, 0), (0, 0, 0),
               # 6=bus, 7=car, 8=cat, 9=chair, 10=cow
                (0, 0, 0), (0, 0, 0), (0, 0, 0), (0, 0, 0), (0, 0, 0),
               # 11=dining table, 12=dog, 13=horse, 14=motorbike, 15=person
               (0, 0, 0), (0, 0, 0), (0, 0, 0), (0, 0, 0), (255, 255, 255),
               # 16=potted plant, 17=sheep, 18=sofa, 19=train, 20=tv/monitor
              (0, 0, 0), (0, 0, 0), (0, 0, 0), (0, 0, 0), (0, 0, 0)])

  r = np.zeros_like(image).astype(np.uint8)
  g = np.zeros_like(image).astype(np.uint8)
  b = np.zeros_like(image).astype(np.uint8)
  
  for l in range(0, nc):
    idx = image == l
    r[idx] = label_colors[l, 0]
    g[idx] = label_colors[l, 1]
    b[idx] = label_colors[l, 2]
    
  rgb = np.stack([r, g, b], axis=2)
  return rgb

def segment(net, path, show_orig=False, dev='cuda'):
  output_file = path.split('/')[-1]
  img = Image.open(path)
  if show_orig: plt.imshow(img); plt.axis('off'); plt.show()
  # Comment the Resize and CenterCrop for better inference results
  trf = T.Compose([#T.Resize(640), 
                   #T.CenterCrop(224), 
                   T.ToTensor(), 
                   T.Normalize(mean = [0.485, 0.456, 0.406], 
                               std = [0.229, 0.224, 0.225])])
  inp = trf(img).unsqueeze(0).to(dev)
  out = net.to(dev)(inp)['out']
  om = torch.argmax(out.squeeze(), dim=0).detach().cpu().numpy()
  rgb = decode_segmap(om)
  rgb = 255 - rgb
  plt.imshow(rgb); plt.axis('off'); plt.show()
  cv2.imwrite('output/'+output_file , rgb)
  return rgb

from torchvision import models

dlab = models.segmentation.deeplabv3_resnet101(pretrained=1).eval()


In [None]:
#@title create alpha matte mask for every frame
import os

folder_path = "input"

# Get a list of all files in the folder
files = os.listdir(folder_path)

# Sort the list of files in alphabetical order
files.sort()

# Loop through the files
for file in files:
    # Get the full path of the file
    file_path = os.path.join(folder_path, file)
    # Check if the file is a file and not a directory
    if os.path.isfile(file_path):
        rgb=segment(dlab, file_path)
        print(file)


In [None]:
#@title **Create a video from masked frames**
!ffmpeg -framerate 24 -pattern_type glob -i 'output/*.jpg' -c:v libx264 -pix_fmt yuv420p output.mp4


In [None]:
#@title **Display the masked video**
from IPython.display import HTML
from base64 import b64encode
mp4 = open('output.mp4','rb').read()
data_url = "data:video/mp4;base64," + b64encode(mp4).decode()
HTML("""
<video width=720 controls>
      <source src="%s" type="video/mp4">
</video>
""" % data_url)

In [None]:
#@title Delete all files to mask a new video (edit and uncomment first)
#!rm -f input/*
#!rm -f outpu/*
#!rm -f video_file.mp4 output.mp4