In [4]:
import xarray as xr
import fsspec
import s3fs
import os
import matplotlib.pyplot as plt
import dask
import rasterio
from dask.distributed import Client, LocalCluster, progress
import datetime
import tempfile
import boto3

In [5]:
env = dict(GDAL_DISABLE_READDIR_ON_OPEN='EMPTY_DIR', 
           AWS_NO_SIGN_REQUEST='YES',
           GDAL_MAX_RAW_BLOCK_CACHE_SIZE='200000000',
           GDAL_SWATH_SIZE='200000000',
           VSI_CURL_CACHE_SIZE='200000000')
os.environ.update(env)

In [52]:
def convert_full_date_to_continous_day(year, month, day):
    """
    Helper function if you wish to use month, day vs julian day
    """
    return datetime.datetime(year, month, day).timetuple().tm_yday

def get_geo_uri(year, day):
    """
    returns list of geo uris
    """
    fs = s3fs.S3FileSystem(anon=True)
    files = []
    
    filepath = "s3://noaa-goes17/ABI-L1b-RadC/%s/%s/*/*.nc" % (str(year).zfill(4), str(day).zfill(3)) 
    files = fs.glob(filepath)
    
    if len(files) < 1:
        raise Exception("No files found")
    
    return files

def download_to_xarray(uri):
    """
    Downloads file and directly loads it into xarray in memory
    """
    s3 = boto3.client("s3")
    
    with tempfile.NamedTemporaryFile() as temp_file:
        s3.download_file(Bucket=uri[:11], Key=uri[12:], Filename=temp_file.name)
        datastore = xr.open_dataset(temp_file.name)
        
    return datastore

def download_to_disk(uri):
    s3 = boto3.client("s3")
    filename = uri[12:].replace("/", "-")
    if not os.path.exists(filename):
        s3.download_file(Bucket=uri[:11], Key=uri[12:], Filename=filename)
        
    return filename

In [44]:
#TUBBS = {"year": 2017, "day1":220, "day2":243}
#CAMP = {"year": 2018, "day1":312, "day2":329}
#WOOLSEY = {"year": 2018, "day1":312, "day2":325}

#below are incorrect to deal with data source
CAMP = {"year": 2018, "day1":317, "day2":329}
WOOLSEY = {"year": 2018, "day1":317, "day2":325}

Downloading a single goes file from s3 takes 1.7s, and as the data we want to look at is larger than our memory, we first download to disk the portions of time we're interested in 

In [34]:
tubbs_uris = []
camp_uris = []
woolsey_uris = []

In [39]:
for i in range(TUBBS["day2"] - TUBBS["day1"]):
    day = i + TUBBS["day1"]
    tubbs_uris += get_geo_uri(TUBBS["year"], day)

Exception: No files found

In [45]:
for i in range(CAMP["day2"] - CAMP["day1"]):
    day = i + CAMP["day1"]
    camp_uris += get_geo_uri(CAMP["year"], day)

s3://noaa-goes17/ABI-L1b-RadC/2018/317/*/*.nc
s3://noaa-goes17/ABI-L1b-RadC/2018/318/*/*.nc
s3://noaa-goes17/ABI-L1b-RadC/2018/319/*/*.nc
s3://noaa-goes17/ABI-L1b-RadC/2018/320/*/*.nc
s3://noaa-goes17/ABI-L1b-RadC/2018/321/*/*.nc
s3://noaa-goes17/ABI-L1b-RadC/2018/322/*/*.nc
s3://noaa-goes17/ABI-L1b-RadC/2018/323/*/*.nc
s3://noaa-goes17/ABI-L1b-RadC/2018/324/*/*.nc
s3://noaa-goes17/ABI-L1b-RadC/2018/325/*/*.nc
s3://noaa-goes17/ABI-L1b-RadC/2018/326/*/*.nc
s3://noaa-goes17/ABI-L1b-RadC/2018/327/*/*.nc
s3://noaa-goes17/ABI-L1b-RadC/2018/328/*/*.nc


In [46]:
for i in range(WOOLSEY["day2"] - WOOLSEY["day1"]):
    day = i + WOOLSEY["day1"]
    woolsey_uris += get_geo_uri(WOOLSEY["year"], day)

s3://noaa-goes17/ABI-L1b-RadC/2018/317/*/*.nc
s3://noaa-goes17/ABI-L1b-RadC/2018/318/*/*.nc
s3://noaa-goes17/ABI-L1b-RadC/2018/319/*/*.nc
s3://noaa-goes17/ABI-L1b-RadC/2018/320/*/*.nc
s3://noaa-goes17/ABI-L1b-RadC/2018/321/*/*.nc
s3://noaa-goes17/ABI-L1b-RadC/2018/322/*/*.nc
s3://noaa-goes17/ABI-L1b-RadC/2018/323/*/*.nc
s3://noaa-goes17/ABI-L1b-RadC/2018/324/*/*.nc


In [49]:
camp_filepaths = []
woolsey_filepaths = []

In [None]:
for key in camp_uris:
    camp_filepaths += download_to_disk(key)
for key in woolsey_uris:
    woolsey_filepaths += download_to_disk(key)