In [25]:
from pathlib import Path
from datetime import datetime

import geowombat as gw
from geowombat.core import ndarray_to_xarray
from geowombat.radiometry import QAMasker

import numpy as np
import xarray as xr
from pyhdf.SD import SD, SDC

In [16]:
def hdf_to_geotiff(filename, output, mask_items=None, n_workers=1, n_threads=1, **kwargs):
    
    if not mask_items:
        mask_items = ['cirrus', 'cloud', 'adjacent', 'shadow']
    
    file = SD(filename, SDC.READ)
    
    data = []

    band_names = list(file.datasets().keys())
    n_bands = len(band_names) - 1

    for key in band_names:

        sds_obj = file.select(key)
        data.append(sds_obj.get())    

    with gw.open(filename, **kwargs) as src:

        data = ndarray_to_xarray(src, np.array(data), band_names)
        data.attrs['nodatavals'] = [data.attrs['nodatavals'][0]] * n_bands
        data.attrs['scales'] = [data.attrs['scales'][0]] * n_bands
        data.attrs['offsets'] = [data.attrs['offsets'][0]] * n_bands
        data.attrs['bands'] = n_bands

        attrs = data.attrs.copy()

        mask = QAMasker(data.sel(band='QA'), 
                        'hls', 
                        mask_items).to_mask()

        data = data.sel(band=[b for b in band_names if b != 'QA'])
        data = xr.where(mask.sel(band='mask') < 2, data, 0).transpose('band', 'y', 'x')
        data.attrs = attrs

        data.gw.to_raster(output,
                          n_workers=n_workers,
                          n_threads=n_threads,
                          overwrite=True,
                          compress='lzw')

In [22]:
mainpath = Path('/media/jcgr/data/projects/global_fields/test')

In [37]:
hdf_file = str(mainpath / 'L30/2018/21JXM/HLS.L30.T21JXM.2018325.v1.4.hdf')
gtiff_file = str(mainpath / 'L30/2018/21JXM/HLS.L30.T21JXM.2018325.v1.4.tif')

In [39]:
hdf_to_geotiff(hdf_file, gtiff_file, n_workers=6, n_threads=1, chunks=512)

100%|██████████| 64/64 [00:33<00:00,  1.89it/s]
100%|██████████| 64/64 [00:06<00:00,  9.20it/s]


In [29]:
dt = datetime.strptime('20181121', '%Y%m%d')
tt = dt.timetuple()
tt.tm_yday

325