In [1]:
import os
import pickle
import numpy as np
import wget
import shutil
from pathlib import Path
from concurrent.futures import ThreadPoolExecutor
from functools import partial
from zipfile import ZipFile
import os

In [2]:
IMPRESSIONV2_DIR = Path("data/impressionv2")
URL = "http://158.109.8.102/FirstImpressionsV2/"

In [3]:
def unzip_encrypted(file_dir, file_name, pwd):
    with ZipFile(file_dir / file_name) as zf:
        zf.extractall(file_dir, pwd=pwd)

In [4]:
def download(file_name, local_dir):
    url = URL + file_name
    local = local_dir / file_name
    wget.download(url, str(local))
    return local
    
def download_extract(file_name, local_dir):
    local = download(file_name, local_dir)
    if local.suffix == '.zip':
        shutil.unpack_archive(local, local_dir)
        
def download_extract_encrypted(file_name, local_dir, pwd):
    local = download(file_name, local_dir)
    unzip_encrypted(local_dir, file_name, pwd)

In [5]:
download_extract("eth_gender_annotations_dev.csv", IMPRESSIONV2_DIR)
download_extract("eth_gender_annotations_test.csv", IMPRESSIONV2_DIR)

100% [..............................................................................] 74038 / 74038

In [6]:
split = "val" # "train", "val,", "test"
n_video_zips = 6 if split == "train" else 2
n_internal_video_zips = 75 if split == "train" else 25
split_long = {"train": "training",
                 "val": "validation",
                 "test": "test"}[split]
local_dir = IMPRESSIONV2_DIR / split
os.makedirs(local_dir, exist_ok=True)
download_extract_split = partial(download_extract, local_dir=local_dir)

In [11]:
if split == "train":
    files = (f"{split}-transcription.zip", f"{split}-annotation.zip")
elif split == "val":
    files = (f"{split}-transcription.zip", f"{split}-annotation-e.zip")
elif split == "test":
    files = (f"{split}-transcription-e.zip", f"{split}-annotation-e.zip")

download_extract_test_pkl = partial(download_extract_encrypted, local_dir=local_dir, pwd=b'zeAzLQN7DnSIexQukc9W')
f = download_extract_test_pkl if split == "test" else download_extract_split
with ThreadPoolExecutor(6) as executor:
    results = executor.map(f, files)
    
if split == "val":
    download_extract_test_pkl(files[1])

100% [..............................................................................] 79764 / 79764

In [8]:
def download_videos(i):
    download_extract_split(f'{split}-{i}{"e" if split == "test" else ""}.zip')
    
with ThreadPoolExecutor(6) as executor:
    results = executor.map(download_videos, list(range(1,n_video_zips+1)))

100% [....................................................................] 3020533440 / 3020533440

In [9]:
def extract_dev(i):
    local_zip = local_dir / f'{split}-{i}{"e" if split == "test" else ""}.zip'
    shutil.unpack_archive(local_zip, local_dir)
def extract_test(i):
    file_name = f'test-{i}e.zip'
    unzip_encrypted(local_dir, file_name, b'zeAzLQN7DnSIexQukc9W')
    d = local_dir / f'test-{i}'
    for f in os.listdir(d):
        shutil.move(d / f, local_dir / f"{f}_ext")
    
f = extract_test if split == "test" else extract_dev
with ThreadPoolExecutor(6) as executor:
    results = executor.map(f, list(range(1,n_video_zips+1)))

In [10]:
def extract_internal(i):
    local_zip = local_dir / f'{split_long}80_{i:02d}.zip'
    shutil.unpack_archive(local_zip, local_dir)
    
def extract_internal_test(i):
    unzip_encrypted(local_dir, f'test80_{i:02d}.zip_ext', b'.chalearnLAPFirstImpressionsSECONDRoundICPRWorkshop2016.')
    extract_internal(i)
    
f = extract_internal_test if split == "test" else extract_internal
with ThreadPoolExecutor(6) as executor:
    results = executor.map(f, list(range(1,n_internal_video_zips+1)))