### Notebook description:

This notebooks should: 
1. download the data from public gstorage
1. unzip all videos
1. cut each video on frames and put it in separate directory

# Download dataset

In [1]:
# !pip install -U --no-cache-dir gdown --pre

In [2]:
# !pip install gsutil==5.17 -q

In [3]:
!mkdir data

In [4]:
!gsutil cp -r gs://drone_vehicle_footage_dataset_public data/

Copying gs://drone_vehicle_footage_dataset_public/MIT-LICENSE.txt...
Copying gs://drone_vehicle_footage_dataset_public/video_105.zip...              
Copying gs://drone_vehicle_footage_dataset_public/video_106.zip...              
Copying gs://drone_vehicle_footage_dataset_public/video_121.zip...              
/ [4 files][  3.2 MiB/  3.2 MiB]                                                
==> NOTE: You are performing a sequence of gsutil operations that may
run significantly faster if you instead use gsutil -m cp ... Please
see the -m section under "gsutil help options" for further information
about when gsutil -m can be advantageous.

Copying gs://drone_vehicle_footage_dataset_public/video_123.zip...
Copying gs://drone_vehicle_footage_dataset_public/video_124.zip...              
Copying gs://drone_vehicle_footage_dataset_public/video_127.zip...              
Copying gs://drone_vehicle_footage_dataset_public/video_130.zip...              
Copying gs://drone_vehicle_footage_dataset_pu

# Unzip all videos

In [5]:
from pathlib import Path
Path.ls = lambda x: list(x.iterdir())

from tqdm import tqdm
import shutil

In [6]:
DATA_DIR = Path('data')

DIR_WITH_ZIP_VIDS = DATA_DIR / 'drone_vehicle_footage_dataset_public'
DIR_VIDS = DATA_DIR / 'drone_vids'
DIR_IMGS = DATA_DIR / 'drone_imgs'

DIR_VIDS.mkdir(exist_ok=True, parents=True)
DIR_IMGS.mkdir(exist_ok=True, parents=True)

In [7]:
DIR_WITH_ZIP_VIDS.ls()[:5]

[PosixPath('data/drone_vehicle_footage_dataset_public/video_142.zip'),
 PosixPath('data/drone_vehicle_footage_dataset_public/video_192.zip'),
 PosixPath('data/drone_vehicle_footage_dataset_public/video_41.zip'),
 PosixPath('data/drone_vehicle_footage_dataset_public/video_92.zip'),
 PosixPath('data/drone_vehicle_footage_dataset_public/video_137.zip')]

In [8]:
for file in tqdm(DIR_WITH_ZIP_VIDS.ls()):
    if file.suffix == '.zip':
        !unzip -o -q {file} -d {DIR_VIDS} 

100%|███████████████████████████████████████████████████████████████████████████████████████████████████| 61/61 [00:59<00:00,  1.02it/s]


In [9]:
(DIR_VIDS / 'annotations.json').unlink()
(DIR_VIDS / 'task.json').unlink()

In [10]:
for file in (DIR_VIDS / 'data').ls():
    shutil.move(file, DIR_VIDS / file.name)
    
!rm -rf {DIR_VIDS / 'data'}

In [11]:
len(DIR_VIDS.ls())

58

# Cut each video on frames and put it in separate directory

In [13]:
# !pip install opencv-python==4.6.0.66

Collecting opencv-python==4.6.0.66
  Downloading opencv_python-4.6.0.66-cp36-abi3-manylinux_2_17_x86_64.manylinux2014_x86_64.whl (60.9 MB)
[K     |████████████████████████████████| 60.9 MB 264 kB/s eta 0:00:01
Installing collected packages: opencv-python
Successfully installed opencv-python-4.6.0.66


In [50]:
import cv2
import numpy as np
import torch
import torchvision.transforms as T
import uuid
from torchvision.utils import save_image

In [54]:
def read_video(file, frame_read_freq=1, n_frames=None, verbose=True):
    file = str(file)
    capture = cv2.VideoCapture(file)
    fps = capture.get(cv2.CAP_PROP_FPS)
    if n_frames is None:
        n_frames = capture.get(cv2.CAP_PROP_FRAME_COUNT)

    frames = []
    
    pb = np.arange(n_frames)
    if verbose:
        pb = tqdm(pb)

    for i in pb:
        success, image_cv = capture.read()

        if not success:
            break
            
        if i % frame_read_freq == 0:
            frame = cv2.cvtColor(image_cv, cv2.COLOR_BGR2RGB)
            frame = torch.tensor(frame/255, dtype=torch.float32)
            frames.append(frame.permute(2,0,1)[None])

    return frames, fps, n_frames

def torch_scale_resize(img, scale_factor):
    width = int(img.shape[3] * scale_factor)
    height = int(img.shape[2] * scale_factor)
    dim = (height, width)
    
    resize_f = T.Resize(dim)

    # resize image
    resized = resize_f(img)
    return resized

def torch_resize(img, size):
    """
    img: torch.tensor B, C, H, W
    size: tuple H, W
    """
    resize_f = T.Resize(size)
    return resize_f(img)

In [55]:
!rm -rf {DIR_IMGS}
DIR_IMGS.mkdir(exist_ok=True, parents=True)

In [57]:
for vid_file in tqdm(DIR_VIDS.ls()):
    if file.suffix != '.mp4':
        continue
        
    frames, fps, total_vid_n_frames = read_video(vid_file, frame_read_freq=10, verbose=False)
    
    for frame in frames:
        resized_frame = torch_resize(frame, (224,224)) # 224 is a default clip input image size
        save_image(resized_frame, DIR_IMGS / f'{str(uuid.uuid4())}.png')

100%|███████████████████████████████████████████████████████████████████████████████████████████████████| 58/58 [04:35<00:00,  4.75s/it]


In [59]:
len(DIR_IMGS.ls())

8201