# HDF download

In [11]:
import numpy as np
import pandas as pd
from datetime import datetime, timedelta

import fire.downloader as fdown
import fire.utils.io as uio
import fire.utils.modis as umod

# Get URLs to download

Get pre-collected urls of all MOD14A1 hdf files.

In [2]:
mod14a1_urls = uio.read_lines("../data/_urls/MOD14A1.006.txt")
print("Count:  ", len(mod14a1_urls))
mod14a1_urls[:5]

Count:   269856


['https://e4ftl01.cr.usgs.gov/MOLT/MOD14A1.006/2000.02.18/MOD14A1.A2000049.h00v08.006.2015041132347.hdf',
 'https://e4ftl01.cr.usgs.gov/MOLT/MOD14A1.006/2000.02.18/MOD14A1.A2000049.h00v09.006.2015041132244.hdf',
 'https://e4ftl01.cr.usgs.gov/MOLT/MOD14A1.006/2000.02.18/MOD14A1.A2000049.h00v10.006.2015041132152.hdf',
 'https://e4ftl01.cr.usgs.gov/MOLT/MOD14A1.006/2000.02.18/MOD14A1.A2000049.h01v07.006.2015041130011.hdf',
 'https://e4ftl01.cr.usgs.gov/MOLT/MOD14A1.006/2000.02.18/MOD14A1.A2000049.h01v08.006.2015041131418.hdf']

In [3]:
hdf_index = umod.make_hdf_index_from_paths(mod14a1_urls)
hdf_index.head()

Unnamed: 0,url,fname,sat_name,fname_date,h,v
0,https://e4ftl01.cr.usgs.gov/MOLT/MOD14A1.006/2...,MOD14A1.A2000049.h00v08.006.2015041132347.hdf,MOD,2000-02-18,0,8
1,https://e4ftl01.cr.usgs.gov/MOLT/MOD14A1.006/2...,MOD14A1.A2000049.h00v09.006.2015041132244.hdf,MOD,2000-02-18,0,9
2,https://e4ftl01.cr.usgs.gov/MOLT/MOD14A1.006/2...,MOD14A1.A2000049.h00v10.006.2015041132152.hdf,MOD,2000-02-18,0,10
3,https://e4ftl01.cr.usgs.gov/MOLT/MOD14A1.006/2...,MOD14A1.A2000049.h01v07.006.2015041130011.hdf,MOD,2000-02-18,1,7
4,https://e4ftl01.cr.usgs.gov/MOLT/MOD14A1.006/2...,MOD14A1.A2000049.h01v08.006.2015041131418.hdf,MOD,2000-02-18,1,8


We are interested in Spain, thus MODIS tile (v=4, h=17). Also we are only interested in data since 2010. Filter!

In [4]:
# MODIS HDF files typically have 8 days of data, thus subtract 8 days of 2010-01-01
earliest_modis_date_2010 = datetime.strptime("2010-01-01", r"%Y-%m-%d")
earliest_modis_date_2010 -= timedelta(days=8)
print("Earliest hdf file date for 2010:  ", earliest_modis_date_2010)

hdf_urls_spain_since_2010 = (
    hdf_index
    .query("h==17 & v==4")
    .query("fname_date >= @earliest_modis_date_2010")
    ["url"]
    .to_list()
)

print("Count:  ", len(hdf_urls_spain_since_2010))
hdf_urls_spain_since_2010[:5]

Earliest hdf file date for 2010:   2009-12-24 00:00:00
Count:   466


['https://e4ftl01.cr.usgs.gov/MOLT/MOD14A1.006/2009.12.27/MOD14A1.A2009361.h17v04.006.2015198031301.hdf',
 'https://e4ftl01.cr.usgs.gov/MOLT/MOD14A1.006/2010.01.01/MOD14A1.A2010001.h17v04.006.2015198041536.hdf',
 'https://e4ftl01.cr.usgs.gov/MOLT/MOD14A1.006/2010.01.09/MOD14A1.A2010009.h17v04.006.2015198101258.hdf',
 'https://e4ftl01.cr.usgs.gov/MOLT/MOD14A1.006/2010.01.17/MOD14A1.A2010017.h17v04.006.2015198095435.hdf',
 'https://e4ftl01.cr.usgs.gov/MOLT/MOD14A1.006/2010.01.25/MOD14A1.A2010025.h17v04.006.2015198191738.hdf']

# Make target paths

In [5]:
# Make target paths
target_paths = [umod.default_target_path_scheme(url, "../data/") for url in hdf_urls_spain_since_2010]
target_paths[:5]

['../data/MOD14A1.006/2009.12.27/MOD14A1.A2009361.h17v04.006.2015198031301.hdf',
 '../data/MOD14A1.006/2010.01.01/MOD14A1.A2010001.h17v04.006.2015198041536.hdf',
 '../data/MOD14A1.006/2010.01.09/MOD14A1.A2010009.h17v04.006.2015198101258.hdf',
 '../data/MOD14A1.006/2010.01.17/MOD14A1.A2010017.h17v04.006.2015198095435.hdf',
 '../data/MOD14A1.006/2010.01.25/MOD14A1.A2010025.h17v04.006.2015198191738.hdf']

# Download

In [6]:
# get user and password for LPDAAC
user_pwd = fdown.get_auth_from_netrc('urs.earthdata.nasa.gov')

In [13]:
successes = fdown.fetch_many_files(hdf_urls_spain_since_2010, target_paths, user_pwd)

Downloading 466 files.

processed 457/466, est. time left: 0.0 m                                           
   total time: 0.0 m
processed 455/466, est. time left: 0.0 m   


466/466 files downloaded successfully (100.0 %)
