<center>
<table>
  <tr>
    <td><img src="https://portal.nccs.nasa.gov/datashare/astg/training/python/logos/nasa-logo.svg" width="100"/> </td>
     <td><img src="https://portal.nccs.nasa.gov/datashare/astg/training/python/logos/ASTG_logo.png?raw=true" width="80"/> </td>
     <td> <img src="https://www.nccs.nasa.gov/sites/default/files/NCCS_Logo_0.png" width="130"/> </td>
    </tr>
</table>
</center>

        
<center>
<h2><font color= "blue" size="+3">PyCon 2024 Tutorial</font></h2>
</center>

---

<center>
    <h3>Python Workflows to Extract and Plot Satellite Data Products along Tracks</h3>
    <h2><font color="red" size="+3">Tracking the Movement of the Aura Satellite</font></h2>
</center>

_______

# <font color="red"> Objectives</font>


---

## Required Packages


- __Matplotlib__: for basic plots.
- __Pandas__: Manipulation and exploratory data analysis of tabular data.
- __Shapely__: For manipulation and analysis of planar geometric objects
- __GeosPandas__: Combines the capabilities of Pandas and Shapely for geospatial operations
- __MovingPandas__: Handling the movement of geospatial objects.
- __h5py__: Read HDF5 files.

----

In [None]:
import warnings
warnings.filterwarnings("ignore")

In [None]:
import datetime as dt
from pathlib import Path

In [None]:
%matplotlib inline
import matplotlib
import matplotlib.pyplot as plt
import matplotlib.ticker as mticker

In [None]:
import numpy as np
import h5py
import pandas as pd
import geopandas as gpd

In [None]:
from shapely import geometry as shpgeom
from shapely import wkt as shpwkt

In [None]:
import movingpandas as mpd

In [None]:
import holoviews as hv

In [None]:
import hvplot.pandas 

In [None]:
plot_defaults = {'linewidth':5, 'capstyle':'round', 'figsize':(9,3), 'legend':True}
hv.opts.defaults(hv.opts.Overlay(active_tools=['wheel_zoom'], 
                              frame_width=500, frame_height=400))
hvplot_defaults = {'tiles':None, 'cmap':'Viridis', 'colorbar':True}

In [None]:
mpd.show_versions()

## <font color="blue">Measurement of `NO2` by the Ozone Monitoring Instrument (OMI)</font>

- [The Ozone Monitoring Instrument (OMI)](https://www.earthdata.nasa.gov/learn/find-data/near-real-time/omi) aboard NASA's Aura satellite (launched in 2004) measures ozone from Earth's surface to top-of-atmosphere. 
  - OMI is a nadir-viewing wide-field-imaging spectrometer, giving daily global coverage.
  - OMI measures the key air quality components such as nitrogen dioxide (NO$_2$), sulfur dioxide (SO$_2$), bromine oxide (BrO), OClO, and aerosol characteristics.
  - OMI provides mapping of pollution products from an urban to super-regional scale.
- Near real-time (NRT) OMI data are available through LANCE generally within three hours after a satellite observation.

Here we focus on the [Nitrogen Dioxide (NO2) Total and Tropospheric Column](https://disc.gsfc.nasa.gov/datasets/OMNO2_003/summary) 1-orbit L2 Swath.

## <font color='red'> What is HDF5?</font>
* HDF5 is a file format and library for storing scientific data. 
* It supports files larger than 2 GB and  parallel I/O. 
* An HDF5 file is a container for two kinds of objects: 
   1. **Datasets**:, Array-like collections of data.
   2. **Groups**: Folder-like containers that hold datasets and other groups.

In [None]:
data_dir = "/Users/jkouatch/myTasks/PythonTraining/ASTG606/Materials/sat_data/OMI_Data/"
#data_dir = "/tljh-data/sat_data/OMI_Data"

In [None]:
list_files =[
    "OMI-Aura_L2-OMNO2_2023m0709t0114-o100959_v003-2023m0710t052026.he5",
    "OMI-Aura_L2-OMNO2_2023m0709t0253-o100960_v003-2023m0710t052055.he5",
    "OMI-Aura_L2-OMNO2_2023m0709t0432-o100961_v003-2023m0710t060000.he5",
    "OMI-Aura_L2-OMNO2_2023m0709t0610-o100962_v003-2023m0710t124018.he5",
    "OMI-Aura_L2-OMNO2_2023m0709t0749-o100963_v003-2023m0710t141856.he5",
    "OMI-Aura_L2-OMNO2_2023m0709t0928-o100964_v003-2023m0710t141539.he5",
    "OMI-Aura_L2-OMNO2_2023m0709t1107-o100965_v003-2023m0710t143421.he5",
    "OMI-Aura_L2-OMNO2_2023m0709t1246-o100966_v003-2023m0710t171304.he5",
    "OMI-Aura_L2-OMNO2_2023m0709t1425-o100967_v003-2023m0710t171303.he5",
    "OMI-Aura_L2-OMNO2_2023m0709t1603-o100968_v003-2023m0710t171256.he5",
    "OMI-Aura_L2-OMNO2_2023m0709t1742-o100969_v003-2023m0710t171227.he5",
    "OMI-Aura_L2-OMNO2_2023m0709t1921-o100970_v003-2023m0710t224725.he5",
    "OMI-Aura_L2-OMNO2_2023m0709t2100-o100971_v003-2023m0710t224852.he5",
    "OMI-Aura_L2-OMNO2_2023m0709t2239-o100972_v003-2023m0710t224703.he5"
]

In [None]:
fname = Path(data_dir) / list_files[0]

#### List all the datasets and their attributes: use `visititems` (mimicking `h5ls`)

In [None]:
fname2 = "/Users/jkouatch/Downloads/TROPESS_OMI-Aura_L2_Standard_O3_20240101_MUSES_R1p22_FS_F0p9_J1.nc"

In [None]:
def print_attrs(name, obj):
    shift = name.count('/') * '    '
    print(shift + name)
    if isinstance(obj, h5py.Dataset):
        print(shift + '    ' + f"Shape: {obj[()].shape}")
    for key, val in obj.attrs.items():
        print(shift + '    ' + f"{key}: {val}")
        
with h5py.File(fname, mode='r') as fid:
    fid.visititems(print_attrs)  

#### How do you get a specific information from the file?

In [None]:
with h5py.File(fname, mode='r') as fid:
    geo_group = fid['HDFEOS']['SWATHS']['ColumnAmountNO2']['Geolocation Fields']
    data_group = fid['HDFEOS/SWATHS/ColumnAmountNO2/Data Fields']
    tropo = data_group['TropopausePressure'][()]
    time = geo_group['Time'][()]
    lats = geo_group['SpacecraftLatitude'][()]
    lons = geo_group['SpacecraftLongitude'][()]

In [None]:
print(f"Shape of tropo:     {tropo.shape}")
print(f"Shape of time:      {time.shape}")
print(f"Shape of latitude:  {lats.shape}")
print(f"Shape of longitude: {lons.shape}")

### Create the Pandas DataFrame

In [None]:
def convert_dict_dtype(sample_dict):
    '''
    Converts attribute dictionary from NumPy data types 
    to general Python data types

    Parameters
    ----------
    sample_dict : dict
         A dictionary of attributes
         
    Returns
    sample_dict : dictt
         A dictionary of attributes
    '''
    for key, item in sample_dict.items():
        if isinstance(item, np.ndarray):   # Converts np arrays to a list to, if applicable, an int or float
            item = list(item)
        
            if len(item) == 1:
                item = item[0]
        elif isinstance(item, np.bytes_):   # Converts np bytes to an np string to a Python string
            item = str(item.astype('str'))
        
            if item[0] == '(' or item[0] == '{':   # Converts to tuple or dict if applicable
                item = eval(item)
            # **eval() relaiability??**
            
        sample_dict[key] = item   # Updates any changes to the key value
        
    return sample_dict

In [None]:
def get_ds_attrs(ds):
    """
       Give a dataset identifier, return the dataset attribute.
       
       Input Parameters:
          - ds: dataset identifier
       Returned value:
          - ds_attrs: a dictionary
    """
    ds_attrs = dict(ds.attrs)
    ds_attrs = convert_dict_dtype(ds_attrs)
    
    return ds_attrs

In [None]:
def get_ds_attribute_value(ds_attrs, attr_name):
    '''
    Obtain the value of a specified attribute in a dataset.
    
    Parameter
    ---------
    ds_attrs : dict
         A dictionary of dataset attributes
    attr_name : str
         Attribute name    
    
    Returns
    --------
    value: float, int, str, list
         Value of the attribute. If attribute not available, None.
    '''
    for key, value in ds_attrs.items():
        if key == attr_name:
            return value 
    return None

In [None]:
def restore_data(ds):
    '''
    Restore the dataset data using the dataset attributes.
      
    Parameters
    ----------
    ds : h5py dataset identifier
    
    Returns:
    data : numpy array
    '''
    ds_attrs = get_ds_attrs(ds)
    
    _FillValue = get_ds_attribute_value(ds_attrs, '_FillValue')
    scale_factor = get_ds_attribute_value(ds_attrs, 'scale_factor')
    add_offset = get_ds_attribute_value(ds_attrs, 'add_offset')
    
    data = ds[()]#.astype('float')
    
    data = np.where(data != _FillValue, data, np.nan)
    if add_offset:
        data -= add_offset
    if scale_factor:
        data *= scale_factor

    return data

In [None]:
def get_arrays(fname):
    with h5py.File(fname, 'r') as fid:
        geo_grp = fid['HDFEOS']['SWATHS']['ColumnAmountNO2']['Geolocation Fields']
        data_grp = fid['HDFEOS']['SWATHS']['ColumnAmountNO2']['Data Fields']
        #NO2 = restore_data(data_grp['ColumnAmountNO2Trop'])[:,0]
        NO2 = restore_data(data_grp['TropopausePressure'])[:,0]
        time = geo_grp['Time'][()]
        lats = geo_grp['SpacecraftLatitude'][()]
        lons = geo_grp['SpacecraftLongitude'][()]
    return NO2, time, lats, lons

In [None]:
num_files = len(list_files)
first_iter = True
for i in range(1):
    fname = Path(data_dir) / list_files[i]
    print(f"Reading: {fname}")
    X, Y, Z, W = get_arrays(fname)
    if first_iter:
        first_iter = False
        NO2, time, lats, lons = X, Y, Z, W
    else:
        NO2 = np.concatenate((NO2, X), axis=0)
        time = np.concatenate((time, Y), axis=0)
        lats = np.concatenate((lats, Z), axis=0)
        lons = np.concatenate((lons, W), axis=0)

In [None]:
NO2.shape

Convert the time (GPS unit) to a datetime object:

In [None]:
Times = np.zeros_like(time, object)
gps_epoch = dt.datetime(1980, 1, 6)
for j, t in enumerate(time):
    Times[j] = (gps_epoch + dt.timedelta(seconds=time[j] - (35 - 19))).strftime("%Y-%m-%d %H:%M:%S.%f")

In [None]:
df_omi = pd.DataFrame(
    dict(latitude=lats, longitude=lons, 
         NO2TropSurf=NO2, t=Times))
df_omi

In [None]:
df_omi.info()

In [None]:
#df_omi = df_omi.set_index('t')
#df_omi

In [None]:
df_omi['longitude'] = df_omi['longitude']%360

### Visualization

Timeseries plot:

In [None]:
df_omi.plot(x='t', y='NO2TropSurf')
plt.xticks(rotation=90);

Histogram:

In [None]:
df_omi['NO2TropSurf'].plot(kind='hist', figsize=(12,8));

Trajectory:

In [None]:
traj_omi = mpd.Trajectory(df_omi,
                          traj_id=1,
                          x = "longitude", y="latitude",
                          t="t")

In [None]:
traj_omi.plot();

In [None]:
fig, ax = plt.subplots(figsize=(12,10))

traj_omi.plot(legend=True, 
           column="NO2TropSurf", 
           capstyle='round', 
              cmap="jet", ax=ax);

In [None]:
traj_omi.hvplot(tiles="ESRI")

In [None]:
hv_kwargs = dict(hover_cols=["latitude", "longitude"], frame_height=300, frame_width=300)

traj_omi.hvplot(**hv_kwargs)