# Filecache downloader

Downloads files to the configured cache directory to accelerate file loading during training.

In [None]:
import pathlib
from datetime import datetime
import logging

from usl_models.atmo_ml import dataset

logging.getLogger().setLevel(logging.INFO)

# https://console.cloud.google.com/storage/browser/climateiq-study-area-feature-chunks/NYC_Heat_Test
sim_names = [
    "NYC_Heat_Test/NYC_summer_2000_01p",
    "NYC_Heat_Test/NYC_summer_2010_99p",
    "NYC_Heat_Test/NYC_summer_2015_50p",
    "NYC_Heat_Test/NYC_summer_2017_25p",
    "NYC_Heat_Test/NYC_summer_2018_75p",
    "PHX_Heat_Test/PHX_summer_2008_25p",
    "PHX_Heat_Test/PHX_summer_2009_50p",
    "PHX_Heat_Test/PHX_summer_2011_99p",
    "PHX_Heat_Test/PHX_summer_2015_75p",
    "PHX_Heat_Test/PHX_summer_2020_01p",
]

filecache_path = pathlib.Path("/home/shared/climateiq/filecache")

In [None]:
# Download the entire dataset to disk.
dataset.download_dataset(sim_names, filecache_path)

In [None]:
# Try loading a single day.
dataset.load_day_inputs_cached(
    filecache_dir=filecache_path,
    sim_name= sim_names[0],
    config= dataset.Config(),
    date=datetime.strptime("2000-06-03", dataset.DATE_FORMAT),
)

In [None]:
# Benchmark loading a day over and over.
%%timeit -n 20
dataset.load_day_cached(
    path=filecache_path / sim_names[0],
    date=datetime.strptime("2000-05-25", dataset.DATE_FORMAT),
)