<h1>Table of Contents<span class="tocSkip"></span></h1>
<div class="toc"><ul class="toc-item"><li><span><a href="#Imports-and-setup-functions" data-toc-modified-id="Imports-and-setup-functions-1"><span class="toc-item-num">1&nbsp;&nbsp;</span>Imports and setup functions</a></span></li><li><span><a href="#Read-netCDF-files" data-toc-modified-id="Read-netCDF-files-2"><span class="toc-item-num">2&nbsp;&nbsp;</span>Read netCDF files</a></span><ul class="toc-item"><li><span><a href="#Create-image-IDs" data-toc-modified-id="Create-image-IDs-2.1"><span class="toc-item-num">2.1&nbsp;&nbsp;</span>Create image IDs</a></span></li></ul></li></ul></div>

# Imports and setup functions

In [1]:
import os
from matplotlib import pyplot as plt
import numpy as np
import netCDF4 as nc
import xarray

In [2]:
def dataDir(x):
    cwd = os.getcwd()
    
    return cwd + "/data/" + x

# Read netCDF files

## Create image IDs

In [None]:
# Hierarchy:

# |data/
# |--- |SST_ATL_PHY.../
# |----|-------------- |2021/
# |----|---------------|--- |01/
# |----|---------------|----|- 20210101_image.nc  # (day 1)
# |----|---------------|----|- 20210102_image.nc  # (day 2)
# |----|---------------|----|- 20210103_image.nc  # (day 3) ...

# Image ID: "yyyy-mm-dd" from file paths

top_path = dataDir("SST_ATL_PHY_L3S_NRT_010_037/")

y_paths = [top_path + y + "/" for y in os.listdir(top_path)]
m_paths = [y + m for y in y_paths for m in os.listdir(y)]

data_dict = {}

for m in m_paths:
    day_files = os.listdir(m)
    
    for d in day_files:
        fid = "sst_" + d[:8] # yyyymmdd
        
        full_path = m + "/" + d 
        d_nc = nc.Dataset(full_path)
        data = d_nc["sea_surface_temperature"][:]
        
        data_dict[fid] = data

In [None]:
# TODOs:
# 1. Look at masked_array class and see if just numeric values can be extracted
#    Link: https://numpy.org/doc/stable/reference/maskedarray.html
# 2. Load extracted numeric values into PyTorch Dataset
#    Link: https://pytorch.org/docs/stable/data.html#torch.utils.data.Dataset
# 3. Clean up imports

In [None]:
os.listdir(m_paths[0])[0][:8]

In [None]:
data_dict

In [None]:
test_fp = dataDir("SST_ATL_PHY_L3S_NRT_010_037/2021/01/20210101000000-IFR-L3S_GHRSST-SSTfnd-ODYSSEA-ATL_002-v02.1-fv01.0.nc")
test_nc = nc.Dataset(test_fp)

print(test_nc.data_model)
print(test_nc.groups)
print(test_nc)

In [None]:
test_nc["sea_surface_temperature"]

In [None]:
sst = test_nc["sea_surface_temperature"]
print(sst[:])

In [None]:
sst[0,]

In [None]:
plt.imshow(sst[0,])  # flipped coordinates: ny, nx

https://www.youtube.com/@climateunboxed/videos

https://www.earthinversion.com/utilities/reading-NetCDF4-data-in-python/

https://stackoverflow.com/questions/36360469/read-nc-netcdf-files-using-python

https://unidata.github.io/netcdf4-python/#creatingopeningclosing-a-netcdf-file

https://marine.copernicus.eu/user-corner/user-notification-service/gh-416-technical-issue-impacting-new

https://www.earthdatascience.org/courses/use-data-open-source-python/hierarchical-data-formats-hdf/use-netcdf-in-python-xarray/

https://docs.xarray.dev/en/stable/generated/xarray.open_mfdataset.html

https://stackoverflow.com/questions/35422862/speeding-up-reading-of-very-large-netcdf-file-in-python

https://numpy.org/doc/stable/reference/maskedarray.html