# 0. Set up environment

In [None]:
!pip install open_clip_torch

Collecting open_clip_torch
  Downloading open_clip_torch-2.20.0-py3-none-any.whl (1.5 MB)
[?25l     [90m━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━[0m [32m0.0/1.5 MB[0m [31m?[0m eta [36m-:--:--[0m[2K     [91m━━[0m[90m╺[0m[90m━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━[0m [32m0.1/1.5 MB[0m [31m2.5 MB/s[0m eta [36m0:00:01[0m[2K     [91m━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━[0m[91m╸[0m [32m1.5/1.5 MB[0m [31m21.9 MB/s[0m eta [36m0:00:01[0m[2K     [90m━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━[0m [32m1.5/1.5 MB[0m [31m18.5 MB/s[0m eta [36m0:00:00[0m
Collecting huggingface-hub (from open_clip_torch)
  Downloading huggingface_hub-0.17.3-py3-none-any.whl (295 kB)
[2K     [90m━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━[0m [32m295.0/295.0 kB[0m [31m33.4 MB/s[0m eta [36m0:00:00[0m
[?25hCollecting sentencepiece (from open_clip_torch)
  Downloading sentencepiece-0.1.99-cp310-cp310-manylinux_2_17_x86_64.manylinux2014_x86_64.whl (1.3 MB)
[2K     [90m━━━━━━━━━

In [None]:
import os
import cv2
import json
import shutil
import copy
import zipfile
import numpy as np
from tqdm import tqdm
import torch
import torchvision
from torch import nn
from torchvision import transforms
from torch.utils.data import Dataset
from torch.utils.data import DataLoader
import multiprocessing
from multiprocessing.pool import Pool
import glob
import math
from PIL import Image
import open_clip

In [None]:
os.environ['CUDA_LAUNCH_BLOCKING'] = '1'

In [None]:
from google.colab import drive
drive.mount('/content/drive', force_remount=True)

Mounted at /content/drive


In [None]:
# Setup device agnostic code
device = "cuda" if torch.cuda.is_available() else "cpu"
device

'cuda'

In [None]:
DATA_PATH = 'data'
SAVE_PATH = 'result'
DRIVE_PATH = 'drive/MyDrive/AI_Challenge'
if not os.path.exists(DATA_PATH):
    os.mkdir(DATA_PATH)
if not os.path.exists(SAVE_PATH):
    os.mkdir(SAVE_PATH)

# 1. Load data

In [None]:
VID_TO_HANDLE = (1, 3, 4)

In [None]:
def extract_data(path, save_path):
    with zipfile.ZipFile(path, 'r') as zip_ref:
        zip_ref.extractall(save_path)
    os.remove(path)

# Copy videos from drive and extract
def download_vid(i):
    # p = None
    vid_path = f'Videos_L0{i}.zip' if i < 10 else f'Videos_L{i}.zip'
    if os.path.exists(vid_path):
        os.remove(vid_path)
    shutil.copy(os.path.join(DRIVE_PATH, 'TransNetV2_result', vid_path), '.')

    save_path = os.path.join(DATA_PATH, vid_path.split('.')[0])
    if os.path.exists(save_path):
        shutil.rmtree(save_path)
    os.mkdir(save_path)

    with zipfile.ZipFile(vid_path, 'r') as zip_ref:
        zip_ref.extractall(save_path)
    os.remove(vid_path)

    #     if p is not None:
    #         p.join()
    #     p = multiprocessing.Process(target=extract_data, args=(vid_path, save_path))
    #     p.start()
    # if p is not None:
    #     p.join()

# 2. Dataloader

In [None]:
class CLIPDataset(Dataset):
    def __init__(
        self,
        img_paths,
        transform,
    ):
        self.img_paths = img_paths
        self.transform = transform

    def __len__(self):
        return len(self.img_paths)

    def __getitem__(self, idx):
        img_path = self.img_paths[idx]
        img = self.transform(Image.open(img_path))
        return img

# 3. CLIP image encoder

In [None]:
def zip2drive(vid_pack):
    shutil.rmtree(os.path.join(DATA_PATH, vid_pack))
    res_path = os.path.join(SAVE_PATH, vid_pack)

    shutil.make_archive(res_path, 'zip', res_path)
    shutil.rmtree(res_path)

    save_dir = os.path.join(DRIVE_PATH, 'clip_vitl14_result')
    if not os.path.exists(save_dir):
        os.mkdir(save_dir)

    shutil.move(res_path + '.zip', save_dir)

In [None]:
model, _, preprocess = open_clip.create_model_and_transforms('ViT-L-14', pretrained='datacomp_xl_s13b_b90k')
model.to(device).eval()

In [None]:
batch_size = 512
is_first = True
downloading = None
for vid_id in VID_TO_HANDLE:
    downloading = multiprocessing.Process(target=download_vid, args=(vid_id, ))
    downloading.start()

    if not is_first:
        move2drive = []
        for video_pack in os.listdir(DATA_PATH):
            full_video_pack = os.path.join(DATA_PATH, video_pack)
            for video in os.listdir(full_video_pack):
                # load dataset
                clip_dataset = CLIPDataset(
                    img_paths = sorted(glob.glob(os.path.join(full_video_pack, video, 'frames/*.png'))),
                    transform = preprocess
                )
                clip_dataloader = DataLoader(
                    clip_dataset,
                    batch_size=batch_size,
                    shuffle=False,
                    drop_last=False,
                    pin_memory=True,
                    num_workers=4,
                )
                save_path = os.path.join(SAVE_PATH, video_pack)
                if not os.path.exists(save_path):
                    os.mkdir(save_path)
                save_path = os.path.join(save_path, video)

                results = []
                with torch.inference_mode():
                    for batch, img in enumerate(tqdm(clip_dataloader, desc=f"{os.path.join(video_pack, video)}: ")):
                        results.append(model.encode_image(img.to(device)).float().cpu().numpy())

                results = np.concatenate(results, axis = 0)

                np.save(f'{save_path}.npy', results)

            p = multiprocessing.Process(target=zip2drive, args=(video_pack, ))
            p.start()
            move2drive.append(p)

        for p in move2drive:
            p.join()

    else:
        is_first = False
    downloading.join()
