In [1]:
import os
import gdown
import subprocess
import zipfile

In [2]:
base_path = "/home/haggenmueller/asl_detection/machine_learning/datasets/how2sign"

def create_folders():
    paths = [
        f"{base_path}/video_level/train/rgb_front",
        f"{base_path}/video_level/val/rgb_front",
        f"{base_path}/video_level/test/rgb_front",
    ]
    for path in paths:
        try:
            os.makedirs(path, exist_ok=True)
            print(f"✅ Successfully created {base_path}")
        except Exception as e:
            print(f"❌ Error: {e}")


In [None]:
def download_file(file_id, file_name, max_retries=5):
    url = f"https://drive.google.com/uc?id={file_id}"
    for attempt in range(max_retries):
        try:
            print(f"Attempting to download {file_name} (Attempt {attempt + 1}/{max_retries})...")
            gdown.download(url, file_name, quiet=False, fuzzy=True)
            if os.path.exists(file_name):
                print(f"✅ Successfully downloaded {file_name}")
                return True
        except Exception as e:
            print(f"❌ Error downloading {file_name}: {e}")
    print(f"⚠️ Failed to download {file_name} after {max_retries} attempts.")
    return False

In [None]:
def print_file_pathes(file_id, output):
    url = f"https://drive.google.com/uc?id={file_id}"
    print(url)

In [None]:
file_ids = {
        "train_raw_videos.z02": "1krtYdpK_LQFgEUCnHxoYAW7EyhLMLWq0",
        "train_raw_videos.z03": "1fXpWRNFhpuVm3ym7lT9vF_bnDjHkvP_K",
        "train_raw_videos.z04": "1IFetFt4AzsxNCMZ0VVpX7YRgFAm58X48",
        "train_raw_videos.z05": "1ZHuuun6Ae-AOLBns3LmuH7w8C9YCB4gH",
        "train_raw_videos.z06": "1FQQIPblk-oLH_vu7h2tDO0oJaZ3xkp5N",
        "train_raw_videos.z07": "19XNgERcolGAMPPgX-Gx_GebSTx3W4o0r",
        "train_raw_videos.z08": "1YN-SA9uzrogEdKeT6UdQUIcuGEyYJILg",
        "train_raw_videos.z09": "1SZQ2GzPLCkRqvsImAjULAPBiuAKi9DE9",
        "train_raw_videos.zip": "1Xe1T5okJiopMXUiH3sc0mdCWNDYSBopd",
        "test_raw_videos.zip": "1z0i6BBGHQ12ChY63hZH56QnczvQ0JfTb",
    }
    
for file_name, file_id in file_ids.items():
    print_file_pathes(file_id, file_name)

In [None]:
def download_rgb_front_videos():
    print("Downloading Green Screen RGB videos (Frontal View)...")

    # file_ids = {
    #     "train_raw_videos.z02": "1krtYdpK_LQFgEUCnHxoYAW7EyhLMLWq0",
    #     "train_raw_videos.z03": "1fXpWRNFhpuVm3ym7lT9vF_bnDjHkvP_K",
    #     "train_raw_videos.z04": "1IFetFt4AzsxNCMZ0VVpX7YRgFAm58X48",
    #     "train_raw_videos.z05": "1ZHuuun6Ae-AOLBns3LmuH7w8C9YCB4gH",
    #     "train_raw_videos.z06": "1FQQIPblk-oLH_vu7h2tDO0oJaZ3xkp5N",
    #     "train_raw_videos.z07": "19XNgERcolGAMPPgX-Gx_GebSTx3W4o0r",
    #     "train_raw_videos.z08": "1YN-SA9uzrogEdKeT6UdQUIcuGEyYJILg",
    #     "train_raw_videos.z09": "1SZQ2GzPLCkRqvsImAjULAPBiuAKi9DE9",
    #     "train_raw_videos.zip": "1Xe1T5okJiopMXUiH3sc0mdCWNDYSBopd",
    #     "test_raw_videos.zip": "1z0i6BBGHQ12ChY63hZH56QnczvQ0JfTb",
    # }
    
    # remaining_files = list(file_ids.items())
    # while remaining_files:
    #     file_name, file_id = remaining_files.pop(0)
    #     success = download_file(file_id, file_name)
    #     if not success:
    #         remaining_files.append((file_name, file_id))  # Retry later if it failed
    
    print("Download completed. Extracting files...")
    os.system("cat train_raw_videos.z* > train_raw_videos_all.zip")
    os.system("unzip train_raw_videos_all.zip -d ./How2Sign/video_level/train/rgb_front && rm train_raw_videos_all.zip")
    os.system("unzip val_raw_videos.zip -d ./How2Sign/video_level/val/rgb_front && rm val_raw_videos.zip")
    os.system("unzip test_raw_videos.zip -d ./How2Sign/video_level/test/rgb_front && rm test_raw_videos.zip")
    
create_folders()
download_rgb_front_videos()


In [5]:
download_dir = base_path

train_zip_parts = "train_raw_videos.z*"
train_zip_full = "train_raw_videos_all.zip"

print(f"Merging ZIP parts: {train_zip_parts} -> {train_zip_full}")
subprocess.run(f"cat {train_zip_parts} > {train_zip_full}", shell=True)
print("Merging completed.")

def unzip_and_remove(zip_path, extract_to):
    print(f"Checking existence of {zip_path}...")
    if not os.path.exists(zip_path):
        print(f"Error: {zip_path} not found!")
        return
    
    print(f"Extracting: {zip_path} -> {extract_to}")
    try:
        with zipfile.ZipFile(zip_path, 'r') as zip_ref:
            zip_ref.extractall(extract_to)
        print(f"Successfully extracted: {zip_path}")
    except zipfile.BadZipFile:
        print(f"Error: {zip_path} is not a valid ZIP file!")
        return
    
    os.remove(zip_path)
    print(f"Deleted ZIP file: {zip_path}")

unzip_and_remove("train_raw_videos_all.zip", "./How2Sign/video_level/train/rgb_front")
unzip_and_remove("val_raw_videos.zip", "./How2Sign/video_level/val/rgb_front")
unzip_and_remove("test_raw_videos.zip", "./How2Sign/video_level/test/rgb_front")

print("All ZIP files have been extracted and removed.")


Merging ZIP parts: train_raw_videos.z* -> train_raw_videos_all.zip
Merging completed.
Checking existence of train_raw_videos_all.zip...
Extracting: train_raw_videos_all.zip -> ./How2Sign/video_level/train/rgb_front
Error: train_raw_videos_all.zip is not a valid ZIP file!
Checking existence of val_raw_videos.zip...
Extracting: val_raw_videos.zip -> ./How2Sign/video_level/val/rgb_front
Successfully extracted: val_raw_videos.zip
Deleted ZIP file: val_raw_videos.zip
Checking existence of test_raw_videos.zip...
Extracting: test_raw_videos.zip -> ./How2Sign/video_level/test/rgb_front
Error: test_raw_videos.zip is not a valid ZIP file!
All ZIP files have been extracted and removed.


# Download dataset from private google drive

curl -H "Authorization: Bearer ya29.a0AXeO80TtQJ0SEGDKjzTA1EJuGh-tSfxLKxFde5o-pRC42Lw2q5ynkegEPCcpN88pk-iXA46IxvdgBnAbO47clDE8fptpAgCirHaoxQBDbavFVrgmM0MbuiVvRcIIVMOpntb9nnvDB0ct_oWXO6QOPw7XH1Pn86-rdijvyOyoaCgYKARESARESFQHGX2MihsqwLzSgli2lgb4SdmzXuw0175" https://www.googleapis.com/drive/v3/files/1fXpWRNFhpuVm3ym7lT9vF_bnDjHkvP_K?alt=media -o train_raw_videos.z03 

curl -H "Authorization: Bearer ya29.a0AXeO80TtQJ0SEGDKjzTA1EJuGh-tSfxLKxFde5o-pRC42Lw2q5ynkegEPCcpN88pk-iXA46IxvdgBnAbO47clDE8fptpAgCirHaoxQBDbavFVrgmM0MbuiVvRcIIVMOpntb9nnvDB0ct_oWXO6QOPw7XH1Pn86-rdijvyOyoaCgYKARESARESFQHGX2MihsqwLzSgli2lgb4SdmzXuw0175" https://www.googleapis.com/drive/v3/files/1IFetFt4AzsxNCMZ0VVpX7YRgFAm58X48?alt=media -o train_raw_videos.z04 

curl -H "Authorization: Bearer ya29.a0AXeO80TtQJ0SEGDKjzTA1EJuGh-tSfxLKxFde5o-pRC42Lw2q5ynkegEPCcpN88pk-iXA46IxvdgBnAbO47clDE8fptpAgCirHaoxQBDbavFVrgmM0MbuiVvRcIIVMOpntb9nnvDB0ct_oWXO6QOPw7XH1Pn86-rdijvyOyoaCgYKARESARESFQHGX2MihsqwLzSgli2lgb4SdmzXuw0175" https://www.googleapis.com/drive/v3/files/1ZHuuun6Ae-AOLBns3LmuH7w8C9YCB4gH?alt=media -o train_raw_videos.z05 

curl -H "Authorization: Bearer ya29.a0AXeO80TtQJ0SEGDKjzTA1EJuGh-tSfxLKxFde5o-pRC42Lw2q5ynkegEPCcpN88pk-iXA46IxvdgBnAbO47clDE8fptpAgCirHaoxQBDbavFVrgmM0MbuiVvRcIIVMOpntb9nnvDB0ct_oWXO6QOPw7XH1Pn86-rdijvyOyoaCgYKARESARESFQHGX2MihsqwLzSgli2lgb4SdmzXuw0175" https://www.googleapis.com/drive/v3/files/1FQQIPblk-oLH_vu7h2tDO0oJaZ3xkp5N?alt=media -o train_raw_videos.z06 

curl -H "Authorization: Bearer ya29.a0AXeO80TtQJ0SEGDKjzTA1EJuGh-tSfxLKxFde5o-pRC42Lw2q5ynkegEPCcpN88pk-iXA46IxvdgBnAbO47clDE8fptpAgCirHaoxQBDbavFVrgmM0MbuiVvRcIIVMOpntb9nnvDB0ct_oWXO6QOPw7XH1Pn86-rdijvyOyoaCgYKARESARESFQHGX2MihsqwLzSgli2lgb4SdmzXuw0175" https://www.googleapis.com/drive/v3/files/19XNgERcolGAMPPgX-Gx_GebSTx3W4o0r?alt=media -o train_raw_videos.z07 

curl -H "Authorization: Bearer ya29.a0AXeO80TtQJ0SEGDKjzTA1EJuGh-tSfxLKxFde5o-pRC42Lw2q5ynkegEPCcpN88pk-iXA46IxvdgBnAbO47clDE8fptpAgCirHaoxQBDbavFVrgmM0MbuiVvRcIIVMOpntb9nnvDB0ct_oWXO6QOPw7XH1Pn86-rdijvyOyoaCgYKARESARESFQHGX2MihsqwLzSgli2lgb4SdmzXuw0175" https://www.googleapis.com/drive/v3/files/1YN-SA9uzrogEdKeT6UdQUIcuGEyYJILg?alt=media -o train_raw_videos.z08 

curl -H "Authorization: Bearer ya29.a0AXeO80TtQJ0SEGDKjzTA1EJuGh-tSfxLKxFde5o-pRC42Lw2q5ynkegEPCcpN88pk-iXA46IxvdgBnAbO47clDE8fptpAgCirHaoxQBDbavFVrgmM0MbuiVvRcIIVMOpntb9nnvDB0ct_oWXO6QOPw7XH1Pn86-rdijvyOyoaCgYKARESARESFQHGX2MihsqwLzSgli2lgb4SdmzXuw0175" https://www.googleapis.com/drive/v3/files/1SZQ2GzPLCkRqvsImAjULAPBiuAKi9DE9?alt=media -o train_raw_videos.z09 

curl -H "Authorization: Bearer ya29.a0AXeO80TtQJ0SEGDKjzTA1EJuGh-tSfxLKxFde5o-pRC42Lw2q5ynkegEPCcpN88pk-iXA46IxvdgBnAbO47clDE8fptpAgCirHaoxQBDbavFVrgmM0MbuiVvRcIIVMOpntb9nnvDB0ct_oWXO6QOPw7XH1Pn86-rdijvyOyoaCgYKARESARESFQHGX2MihsqwLzSgli2lgb4SdmzXuw0175" https://www.googleapis.com/drive/v3/files/1Xe1T5okJiopMXUiH3sc0mdCWNDYSBopd?alt=media -o train_raw_videos.zip

curl -H "Authorization: Bearer ya29.a0AXeO80TtQJ0SEGDKjzTA1EJuGh-tSfxLKxFde5o-pRC42Lw2q5ynkegEPCcpN88pk-iXA46IxvdgBnAbO47clDE8fptpAgCirHaoxQBDbavFVrgmM0MbuiVvRcIIVMOpntb9nnvDB0ct_oWXO6QOPw7XH1Pn86-rdijvyOyoaCgYKARESARESFQHGX2MihsqwLzSgli2lgb4SdmzXuw0175" https://www.googleapis.com/drive/v3/files/1z0i6BBGHQ12ChY63hZH56QnczvQ0JfTb?alt=media -o test_raw_videos.zip 

# Notebooks am 20.02.25 abends

curl -H "Authorization: Bearer ya29.a0AXeO80TQ_CfHXMK56M8lFxwIB-zmgKNdjKv5CUlEF75GyjJTNKlHT4OshcHgizzUdK_5fyeYCrvcQ1SBy6wD2ZH7EHdSofcKZ2pcGHAzV-bj0MMYK11lmLB3NcMXfhVK1ch-jzsKfhUZdko6jl3gIruiUxrKV13L0R2oIDF_aCgYKAcISARESFQHGX2MiIEqyBUvfTCE1SYtzUKI1bQ0175" https://www.googleapis.com/drive/v3/files/1TBX7hLraMiiLucknM1mhblNVomO9-Y0r?alt=media -o train_2D_keypoints.tar.gz

curl -H "Authorization: Bearer ya29.a0AXeO80TQ_CfHXMK56M8lFxwIB-zmgKNdjKv5CUlEF75GyjJTNKlHT4OshcHgizzUdK_5fyeYCrvcQ1SBy6wD2ZH7EHdSofcKZ2pcGHAzV-bj0MMYK11lmLB3NcMXfhVK1ch-jzsKfhUZdko6jl3gIruiUxrKV13L0R2oIDF_aCgYKAcISARESFQHGX2MiIEqyBUvfTCE1SYtzUKI1bQ0175" https://www.googleapis.com/drive/v3/files/1g8tzzW5BNPzHXlamuMQOvdwlHRa-29Vp?alt=media -o test_2D_keypoints.tar.gz

curl -H "Authorization: Bearer ya29.a0AXeO80TQ_CfHXMK56M8lFxwIB-zmgKNdjKv5CUlEF75GyjJTNKlHT4OshcHgizzUdK_5fyeYCrvcQ1SBy6wD2ZH7EHdSofcKZ2pcGHAzV-bj0MMYK11lmLB3NcMXfhVK1ch-jzsKfhUZdko6jl3gIruiUxrKV13L0R2oIDF_aCgYKAcISARESFQHGX2MiIEqyBUvfTCE1SYtzUKI1bQ0175" https://www.googleapis.com/drive/v3/files/1JmEsU0GYUD5iVdefMOZpeWa_iYnmK_7w?alt=media -o val_2D_keypoints.tar.gz

curl -H "Authorization: Bearer ya29.a0AXeO80TQ_CfHXMK56M8lFxwIB-zmgKNdjKv5CUlEF75GyjJTNKlHT4OshcHgizzUdK_5fyeYCrvcQ1SBy6wD2ZH7EHdSofcKZ2pcGHAzV-bj0MMYK11lmLB3NcMXfhVK1ch-jzsKfhUZdko6jl3gIruiUxrKV13L0R2oIDF_aCgYKAcISARESFQHGX2MiIEqyBUvfTCE1SYtzUKI1bQ0175" https://www.googleapis.com/drive/v3/files/1AgwBZW26kFHS4CWNMQTCMPGkBPkH3qCu?alt=media -o how2sign_realigned_test.csv

curl -H "Authorization: Bearer ya29.a0AXeO80TQ_CfHXMK56M8lFxwIB-zmgKNdjKv5CUlEF75GyjJTNKlHT4OshcHgizzUdK_5fyeYCrvcQ1SBy6wD2ZH7EHdSofcKZ2pcGHAzV-bj0MMYK11lmLB3NcMXfhVK1ch-jzsKfhUZdko6jl3gIruiUxrKV13L0R2oIDF_aCgYKAcISARESFQHGX2MiIEqyBUvfTCE1SYtzUKI1bQ0175" https://www.googleapis.com/drive/v3/files/1dUHSoefk9OxKJnHrHPX--I4tpm9QD0ok?alt=media -o how2sign_realigned_train.csv

curl -H "Authorization: Bearer ya29.a0AXeO80TQ_CfHXMK56M8lFxwIB-zmgKNdjKv5CUlEF75GyjJTNKlHT4OshcHgizzUdK_5fyeYCrvcQ1SBy6wD2ZH7EHdSofcKZ2pcGHAzV-bj0MMYK11lmLB3NcMXfhVK1ch-jzsKfhUZdko6jl3gIruiUxrKV13L0R2oIDF_aCgYKAcISARESFQHGX2MiIEqyBUvfTCE1SYtzUKI1bQ0175" https://www.googleapis.com/drive/v3/files/1Vpag7VPfdTCCJSao8Pz14rlPfekRMggI?alt=media -o how2sign_realigned_val.csv

In [5]:
import tarfile

# Define file paths
keypoints_tar = "val_2D_keypoints.tar.gz"
extract_path = f"{base_path}/keypoints/val/"

# Ensure extraction directory exists
os.makedirs(extract_path, exist_ok=True)

# Extract tar.gz file
print(f"Extracting {keypoints_tar} to {extract_path}...")
try:
    with tarfile.open(keypoints_tar, "r:gz") as tar:
        tar.extractall(path=extract_path)
    print("Extraction completed successfully.")
except Exception as e:
    print(f"Error during extraction: {e}")


Extracting val_2D_keypoints.tar.gz to /home/haggenmueller/asl_detection/machine_learning/datasets/how2sign/keypoints/val/...


  tar.extractall(path=extract_path)


Extraction completed successfully.
