In [1]:
import os
from pathlib import Path

from dotenv import load_dotenv

In [2]:
load_dotenv(".env", override=True)
if Path(".env.local").exists():
  load_dotenv(".env.local", override=True)

Used environment variables:
- `DATASETS_DIRPATH`: Datasets desination directory path.
- `TMP_DIRPATH`: Temporary directory path.

In [3]:
tmp_dirpath = Path(os.environ["TMP_DIRPATH"]).resolve()
datasets_dirpath = Path(os.environ["DATASETS_DIRPATH"]).resolve()

# Download

Dataset is hosted on the DEEL platform: [here](https://share.deel.ai/s/H4iLKRmLkdBWqSt?path=%2Flard%2F1.0.0).

In [4]:
# html root address of dataset storage
html_root = "https://share.deel.ai/s/H4iLKRmLkdBWqSt/download?path=%2Flard%2F1.0.0&files="
# list of zip files to download (train / test datasets) (zipfile, csv entry point relative file path)
train_archives = [
    ("LARD_train_BIRK_LFST.zip", "LARD_train_BIRK_LFST.csv"),
    ("LARD_train_DAAG_DIAP.zip", "LARD_train_DAAG_DIAP.csv"),
    ("LARD_train_KMSY.zip", "LARD_train_KMSY.csv"),
    ("LARD_train_LFMP_LFPO.zip", "LARD_train_LFMP_LFPO.csv"),
    ("LARD_train_LFQQ.zip", "LARD_train_LFQQ.csv"),
    ("LARD_train_LPPT_SRLI.zip", "LARD_train_LPPT_SRLI.csv"),
    ("LARD_train_VABB.zip", "LARD_train_VABB.csv"),
]
valid_archives = [
    ("LARD_test_real.zip", "LARD_test_real_nominal/Test_Real_Nominal.csv"),
    ("LARD_test_real.zip", "LARD_test_real_edge_cases/Test_Real_Edge_Cases.csv"),
    ("LARD_test_synth.zip", "LARD_test_synth.csv"),
]

Download dataset archive files:

In [5]:
os.makedirs(tmp_dirpath.as_posix(), exist_ok=True)

In [6]:
for source_archive, _ in train_archives:
    print(f"Downloading ... {source_archive}")
    os.system(f'wget -nc "{html_root+source_archive}" -O {(tmp_dirpath / source_archive).as_posix()} > /dev/null 2>&1')
for source_archive, _ in valid_archives:
    print(f"Downloading ... {source_archive}")
    os.system(f'wget -nc "{html_root+source_archive}" -O {(tmp_dirpath / source_archive).as_posix()} > /dev/null 2>&1')

Downloading ... LARD_train_BIRK_LFST.zip
Downloading ... LARD_train_DAAG_DIAP.zip
Downloading ... LARD_train_KMSY.zip
Downloading ... LARD_train_LFMP_LFPO.zip
Downloading ... LARD_train_LFQQ.zip
Downloading ... LARD_train_LPPT_SRLI.zip
Downloading ... LARD_train_VABB.zip
Downloading ... LARD_test_real.zip
Downloading ... LARD_test_real.zip
Downloading ... LARD_test_synth.zip


Unzip dataset archive files:

In [7]:
for source_archive, _ in train_archives:
    print(f"Unzipping ... {source_archive}")
    dest_dirpath = tmp_dirpath / source_archive.rpartition('.')[0]
    if not dest_dirpath.exists():
        os.system(f'unzip -q -o {(tmp_dirpath / source_archive).as_posix()} -d {tmp_dirpath.as_posix()}')
    else:
        print(f"Target directory already exists! Please delete it to avoid overriding existing files.")
for source_archive, _ in valid_archives:
    print(f"Unzipping ... {source_archive}")
    dest_dirpath = tmp_dirpath / source_archive.rpartition('.')[0]
    if not dest_dirpath.exists():
        os.system(f'unzip -q -o {(tmp_dirpath / source_archive).as_posix()} -d {tmp_dirpath.as_posix()}')
    else:
        print(f"Target directory already exists! Please delete it to avoid overriding existing files.")

Unzipping ... LARD_train_BIRK_LFST.zip
Target directory already exists! Please delete it to avoid overriding existing files.
Unzipping ... LARD_train_DAAG_DIAP.zip
Target directory already exists! Please delete it to avoid overriding existing files.
Unzipping ... LARD_train_KMSY.zip
Target directory already exists! Please delete it to avoid overriding existing files.
Unzipping ... LARD_train_LFMP_LFPO.zip
Target directory already exists! Please delete it to avoid overriding existing files.
Unzipping ... LARD_train_LFQQ.zip
Target directory already exists! Please delete it to avoid overriding existing files.
Unzipping ... LARD_train_LPPT_SRLI.zip
Target directory already exists! Please delete it to avoid overriding existing files.
Unzipping ... LARD_train_VABB.zip
Target directory already exists! Please delete it to avoid overriding existing files.
Unzipping ... LARD_test_real.zip
Target directory already exists! Please delete it to avoid overriding existing files.
Unzipping ... LARD_te