# 0. Set up environment

In [13]:
import os
import numpy as np
import shutil
import zipfile
import multiprocessing
from multiprocessing.pool import Pool
from PIL import Image
from tqdm import tqdm
import math
import glob

In [2]:
from google.colab import drive
drive.mount('/content/drive', force_remount=True)

Mounted at /content/drive


In [3]:
DATA_PATH = 'data'
SAVE_PATH = 'result'
DRIVE_PATH = 'drive/MyDrive/AI_Challenge'
if not os.path.exists(DATA_PATH):
    os.mkdir(DATA_PATH)
if not os.path.exists(SAVE_PATH):
    os.mkdir(SAVE_PATH)

# 1. Function for loading data

In [36]:
VID_TO_HANDLE = (29, 30, 31, 32,)

In [5]:
# Copy videos from drive and extract
def download_vid(i):
    vid_path = f'Videos_L0{i}.zip' if i < 10 else f'Videos_L{i}.zip'
    if os.path.exists(vid_path):
        os.remove(vid_path)
    shutil.copy(os.path.join(DRIVE_PATH, 'TransNetV2_result', vid_path), '.')

    save_path = os.path.join(DATA_PATH, vid_path.split('.')[0])
    if os.path.exists(save_path):
        shutil.rmtree(save_path)
    os.mkdir(save_path)

    with zipfile.ZipFile(vid_path, 'r') as zip_ref:
        zip_ref.extractall(save_path)
    os.remove(vid_path)

# 2. Resize and compress images

In [6]:
def zip2drive(vid_pack):
    shutil.rmtree(os.path.join(DATA_PATH, vid_pack))
    res_path = os.path.join(SAVE_PATH, vid_pack)

    shutil.make_archive(res_path, 'zip', res_path)
    shutil.rmtree(res_path)

    save_dir = os.path.join(DRIVE_PATH, 'compressed_images')
    if not os.path.exists(save_dir):
        os.mkdir(save_dir)

    shutil.move(res_path + '.zip', save_dir)

In [25]:
def compress_images(img_list, quality=80):
    for img_path in img_list:
        img = Image.open(img_path)
        save_path = SAVE_PATH
        for folder in img_path.split('/')[-4:-1]:
            save_path = os.path.join(save_path, folder)
            if not os.path.exists(save_path):
                os.mkdir(save_path)

        img.save(os.path.join(save_path, f"{img_path.split('/')[-1][:-4]}.jpg"),
                 "JPEG",
                 optimize = True,
                 quality = 80)

In [37]:
is_first = True
downloading = None
num_processes = multiprocessing.cpu_count() * 3
with Pool(num_processes) as pool:
    for vid_id in VID_TO_HANDLE:
        downloading = multiprocessing.Process(target=download_vid, args=(vid_id, ))
        downloading.start()

        if not is_first:
            move2drive = []
            compressing = None
            for video_pack in os.listdir(DATA_PATH):
                full_video_pack = os.path.join(DATA_PATH, video_pack)
                for video in tqdm(os.listdir(full_video_pack), desc=f"{video_pack}: "):
                    img_list = glob.glob(os.path.join(full_video_pack, video, 'frames/*.png'))

                    length = len(img_list)
                    step = math.ceil(length/num_processes)
                    args = [(img_list[i:i+step],) for i in range(0, length, step)]

                    if compressing is not None:
                        compressing.wait()

                    compressing = pool.starmap_async(
                        compress_images,
                        args,
                    )

                if compressing is not None:
                    compressing.wait()
                    compressing = None

                p = multiprocessing.Process(target=zip2drive, args=(video_pack, ))
                p.start()
                move2drive.append(p)

            for p in move2drive:
                p.join()
        else:
            is_first = False
        downloading.join()

Videos_L29: 100%|██████████| 31/31 [19:04<00:00, 36.93s/it]
Videos_L30: 100%|██████████| 30/30 [20:08<00:00, 40.29s/it]
Videos_L31: 100%|██████████| 28/28 [17:59<00:00, 38.57s/it]


In [38]:
with Pool(num_processes) as pool:
    move2drive = []
    compressing = None
    for video_pack in os.listdir(DATA_PATH):
        full_video_pack = os.path.join(DATA_PATH, video_pack)
        for video in tqdm(os.listdir(full_video_pack), desc=f"{video_pack}: "):
            img_list = glob.glob(os.path.join(full_video_pack, video, 'frames/*.png'))

            length = len(img_list)
            step = math.ceil(length/num_processes)
            args = [(img_list[i:i+step],) for i in range(0, length, step)]

            if compressing is not None:
                compressing.wait()

            compressing = pool.starmap_async(
                compress_images,
                args,
            )

        if compressing is not None:
            compressing.wait()
            compressing = None

        p = multiprocessing.Process(target=zip2drive, args=(video_pack, ))
        p.start()
        move2drive.append(p)

Videos_L32: 100%|██████████| 30/30 [14:50<00:00, 29.69s/it]
