In [None]:
%load_ext autoreload
%autoreload 2

In [None]:
import matplotlib.pyplot as plt
import cv2

import torch
from torchvision import models
import torchvision.transforms as T

import numpy as np
import utils
from utils import ImagePair, ImageMatcher
import h5py
from scipy.spatial.transform import Rotation as Rot
from glob import glob

In [None]:
device = torch.device("cuda" if torch.cuda.is_available() else "cpu")
print('Device:', device)

# Load a pre-trained DeepLabV3 model
model = models.segmentation.deeplabv3_resnet101(
    weights=models.segmentation.DeepLabV3_ResNet101_Weights.DEFAULT
).eval().to(device)

In [None]:
def segment_frames_in_batches(frames, batch_size, device):
    num_frames = len(frames)
    human_segmentation_masks = []
    transform = T.Compose([T.ToTensor()])

    for i in range(0, num_frames, batch_size):
        # Get the batch of frames
        batch_frames = frames[i:i + batch_size]

        # Apply transformation and stack frames into a batch
        batch = torch.stack([transform(frame) for frame in batch_frames]).to(device)

        with torch.no_grad():
            outputs = model(batch)['out']

        # Get the segmentation masks for the human class and move them back to the CPU
        batch_masks = [(output.argmax(0) == 15).cpu().numpy() for output in outputs]
        human_segmentation_masks.extend(batch_masks)

    return np.array(human_segmentation_masks)


In [None]:
DATA_DIR = '/Users/richard/Desktop/TestDataset/'
vid_paths = glob(DATA_DIR + '*.mp4')

for i, vid_path in enumerate(vid_paths, start=1):
    print(f'({i}/{len(vid_paths)}) {vid_path}...')
    frames = utils.load_video(vid_path, grayscale=False)
    masks = segment_frames_in_batches(frames, batch_size=8, device=device)

    # Save the masks as compressed npz
    out_path = vid_path.replace('.mp4', '-masks.npz')
    np.savez_compressed(out_path, masks=masks)