In [16]:
import xarray as xr
import fsspec
import s3fs
import os
import matplotlib.pyplot as plt
import dask
from dask.distributed import Client, LocalCluster, progress
import datetime

In [25]:
env = dict(GDAL_DISABLE_READDIR_ON_OPEN='EMPTY_DIR', 
           AWS_NO_SIGN_REQUEST='YES',
           GDAL_MAX_RAW_BLOCK_CACHE_SIZE='200000000',
           GDAL_SWATH_SIZE='200000000',
           VSI_CURL_CACHE_SIZE='200000000')
os.environ.update(env)

In [26]:
cluster = LocalCluster(processes=False, local_directory='/tmp') 
client = Client(cluster) 

In [33]:
def convert_full_date_to_continous_day(year, month, day):
    return datetime.datetime(year, month, day).timetuple().tm_yday

def get_geo_uri(year, month, day):
    fs = s3fs.S3FileSystem(anon=True)
    continous_day = convert_full_date_to_continous_day(year, month, day)
    filepath = "s3://noaa-goes17/ABI-L2-SSTF/%s/%s/*/*.nc" % (str(year).zfill(4), str(day).zfill(3)) 
    objects, var = fs.glob(filepath), 'SST'
    
    if len(objects) < 1:
        raise Exception("No files found")

    images = ['s3://' + obj for obj in objects]

    return images

def lazy_open(href):
    chunks=dict(band=1, x=2745, y=2745)
    return xr.open_rasterio(href, chunks=chunks)

def get_dask_arrays(images):
    data_arrays = dask.compute(*[dask.delayed(lazy_open)(href) for href in images])
    return data_arrays