In [None]:
import os
import json

import numpy as np
import matplotlib.pyplot as plt
import cv2 as cv
from skimage.io import imread, imsave
from skimage.util import img_as_float, img_as_ubyte
from tqdm.notebook import tqdm, trange

from mh_utils import ImgBareSet

In [None]:
def _downscale_to_256p(img):
    """
    What we know: 
        All images are RGB
        All images are landscape oriented
        All images are larger than 256p
    """
    h, w = img.shape[:2]
    wp = np.round(256/h*w).astype(int)
    img2 = cv.resize(img, (wp,256), interpolation=cv.INTER_AREA)
    return img2

def _crop_to_128patch(img):
    """
    What we know:
        All images are RGB
        All images are landscape oriented
        All images are 256p
    """
    h, w = img.shape[:2]
    assert h==256
    m = 2
    n = w//128
    x0 = (w-n*128)//2
    img2 = img[:,x0:x0+n*128]
    patch_list = img2.reshape(m, 128, n, 128, 3).transpose(0,2,1,3,4).reshape(m*n, 128, 128, 3)
    return patch_list

def _make_dataset(src_folder, tgt_folder, subfolder_list):
    """
    Make train/valid dataset for PATvidx4
    """
    if not os.path.exists(tgt_folder):
        os.mkdir(tgt_folder)
    
    seq_idx = 0
    source_track = {}
    for subf in tqdm(subfolder_list):
        # load sequence
        subpath = os.path.join(src_folder, subf)
        ibs = ImgBareSet(subpath, "{:05d}.jpg", as_float=True)
        # split patches
        patch_list_seq = []
        for a in trange(len(ibs), desc='process frame', leave=False):
            img = ibs[a].astype(np.float32)
            img_s = _downscale_to_256p(img)
            patch_list = _crop_to_128patch(img_s)
            patch_list_seq.append(patch_list)
        patch_list_seq = np.stack(patch_list_seq, axis=0) #(seq_len, patch_len, 128, 128, 3)
        # save patch sequences
        patch_seq_list = patch_list_seq.transpose(1, 0, 2, 3, 4) #(patch_len, seq_len, 128, 128, 3)
        patch_seq_list = np.clip(patch_seq_list, 0, 1)
        for patch_seq in tqdm(patch_seq_list, desc='save sequence', leave=False):
            # prepare folder
            patch_seq_path = os.path.join(tgt_folder, "{:04d}".format(seq_idx))
            os.mkdir(patch_seq_path)
            # save source track
            source_track["{:04d}".format(seq_idx)] = subf
            # save patches
            for b in trange(len(ibs), desc='save frame', leave=False):
                imsave(os.path.join(patch_seq_path, '{:03d}.png'.format(b)), 
                       img_as_ubyte(patch_seq[b]), 
                       check_contrast=False)
            # update index
            seq_idx += 1
    
    # save source track file
    with open(os.path.join(tgt_folder, 'source_track.json'), 'w') as fp:
        json.dump(source_track, fp, indent=4)

In [None]:
# david dataset folder
davis_folder = './DAVIS' #change it to your downloaded DAVIS dataset location

# target dataset folder
davis_pat_folder = './DAVIS_PAT_trainval_test' #change it to your target dataset location
if not os.path.exists(davis_pat_folder):
    os.mkdir(davis_pat_folder)

In [None]:
# peek source status
src_folder = os.path.join(davis_folder, 'JPEGImages', 'Full-Resolution')

with open(os.path.join(davis_folder, 'ImageSets', '2017', 'train.txt'), 'r') as fp:
    train_subfolder_list = fp.readlines()
train_subfolder_list = [t[:-1] if t.endswith('\n') else t for t in train_subfolder_list]

with open(os.path.join(davis_folder, 'ImageSets', '2017', 'val.txt'), 'r') as fp:
    valid_subfolder_list = fp.readlines()
valid_subfolder_list = [t[:-1] if t.endswith('\n') else t for t in valid_subfolder_list]

for subf in train_subfolder_list + valid_subfolder_list:
    subpath = os.path.join(src_folder, subf)
    ibs = ImgBareSet(subpath, "{:05d}.jpg", as_float=True)
    img = ibs[0]
    print("{:s} contains {:02d} images, shape {}".format(subf.ljust(20), len(ibs), img.shape[:2]))

In [None]:
# change split
train_subfolder_list = train_subfolder_list + valid_subfolder_list[:15]
valid_subfolder_list = valid_subfolder_list[15:]

In [None]:
# make validation set
_make_dataset(src_folder, 
              os.path.join(davis_pat_folder, 'valid'), 
              valid_subfolder_list)

In [None]:
# make training set
_make_dataset(src_folder, 
              os.path.join(davis_pat_folder, 'train'), 
              train_subfolder_list)