# Direct S3 Data Access - Rough PODAAC ECCO SSH Example

---

### Import Required Packages

In [None]:
%matplotlib inline
import matplotlib.pyplot as plt
from datetime import datetime
import os
import subprocess
import requests
import boto3
import s3fs
import pandas as pd
import numpy as np
import xarray as xr
import rasterio as rio
from rasterio.session import AWSSession
from rasterio.plot import show
import rioxarray
import geopandas
import pyproj
from pyproj import Proj
from shapely.ops import transform
import geoviews as gv
from cartopy import crs
import hvplot.xarray
import holoviews as hv
gv.extension('bokeh', 'matplotlib')

In [None]:
from pystac_client import Client

In [None]:
s3_cred_endpoint = {
    'podaac':'https://archive.podaac.earthdata.nasa.gov/s3credentials',
    'lpdaac':'https://data.lpdaac.earthdatacloud.nasa.gov/s3credentials'
}

In [None]:
def get_temp_creds():
    temp_creds_url = s3_cred_endpoint['podaac']
    return requests.get(temp_creds_url).json()

In [None]:
temp_creds_req = get_temp_creds()

In [None]:
session = boto3.Session(aws_access_key_id=temp_creds_req['accessKeyId'], 
                        aws_secret_access_key=temp_creds_req['secretAccessKey'],
                        aws_session_token=temp_creds_req['sessionToken'],
                        region_name='us-west-2')

In [None]:
rio_env = rio.Env(AWSSession(session),
                  GDAL_DISABLE_READDIR_ON_OPEN='EMPTY_DIR',
                  GDAL_HTTP_COOKIEFILE=os.path.expanduser('~/cookies.txt'),
                  GDAL_HTTP_COOKIEJAR=os.path.expanduser('~/cookies.txt'))
rio_env.__enter__()

In [None]:
podaac_cat = Client.open('https://cmr.earthdata.nasa.gov/stac/POCLOUD/')

In [None]:
search = podaac_cat.search(
    collections=['AVHRR_OI-NCEI-L4-GLOB-v2.1'],
    datetime='2016/2018'
)

In [None]:
search.matched()

In [None]:
items = search.get_all_items()
list(items)

In [None]:
sst_https = items[1].get_assets()['data'].href
sst_https[:10]

In [None]:
sst_s3 = sst_https.replace('https://archive.podaac.earthdata.nasa.gov/', 's3://')
sst_s3

## Single file in-region direct S3 access of netcdf file

In [None]:
fs_s3 = s3fs.S3FileSystem(anon=False, key=temp_creds_req['accessKeyId'], secret=temp_creds_req['secretAccessKey'], token=temp_creds_req['sessionToken'])

In [None]:
s3_file_obj = fs_s3.open(sst_s3, mode='rb')

In [None]:
sst_xr = xr.open_dataset(s3_file_obj, engine='h5netcdf')
sst_xr

## Multi-file in-region direct S3 access of netcdf files

In [None]:
sst_https_urls = [x.get_assets()['data'].href for x in items]
sst_https_urls[:10]

In [None]:
sst_s3_urls = [x.replace('https://archive.podaac.earthdata.nasa.gov/', 's3://') for x in sst_https_urls]
sst_s3_urls[:10]

In [None]:
# Iterate through remote_files to create a fileset
fileset = [fs_s3.open(file) for file in sst_s3_urls]

In [None]:
# This works
sst_xr_ts = xr.open_mfdataset(fileset, engine='h5netcdf', chunks= {'time':1096, 'lat':100, 'lon':100})

In [None]:
sst_xr_ts

In [None]:
#sst_xr_ts.analysed_sst.hvplot.image()

---

Read in point locations

In [None]:
import pandas as pd

In [None]:
df = pd.read_csv('../data/sample_point_pairs_trim.csv')

In [None]:
df.head()

In [None]:
def getdf2_ns(ras, pts):
    ind_x = xr.DataArray(df["lon.ns"], dims=['i'])
    ind_y = xr.DataArray(df["lat.ns"], dims=['i'])
    xr_new = ras.analysed_sst.sel(lon=ind_x, lat=ind_y, method='nearest')
    #ind_x = xr.DataArray(df["lon.os"], dims=['i'])
    #ind_y = xr.DataArray(df["lat.os"], dims=['i'])
    #xr_new = np.vstack((xr_new, ras.analysed_sst.sel(lon=ind_x, lat=ind_y, method='nearest').values))
    return xr_new

In [None]:
def getdf2_os(ras, pts):
    #ind_x = xr.DataArray(df["lon.ns"], dims=['i'])
    #ind_y = xr.DataArray(df["lat.ns"], dims=['i'])
    ind_x = xr.DataArray(df["lon.os"], dims=['i'])
    ind_y = xr.DataArray(df["lat.os"], dims=['i'])
    xr_new = ras.analysed_sst.sel(lon=ind_x, lat=ind_y, method='nearest')
    #xr_new = np.vstack((xr_new, ras.analysed_sst.sel(lon=ind_x, lat=ind_y, method='nearest').values))
    return xr_new

In [None]:
#sst_sel = getdf2(sst_xr_ts.isel(time=slice(0,10)), df)

In [None]:
sst_ns = getdf2_ns(sst_xr_ts, df)

In [None]:
sst_ns

In [None]:
sst_os = getdf2_os(sst_xr_ts, df)

In [None]:
sst_os

In [None]:
sst_dif = sst_os - sst_ns

In [None]:
sst_dif

In [None]:
upwelling_index = xr.where(sst_dif > 2, 1, 0).compute()

In [None]:
upwelling_index

In [None]:
upwelling_index.hvplot()