# Pull in PACE data (Kd, AVW, BBP) for May 22 2024 at the following lat/lon points:

## Coastal points:
## 37.586667, -76.300556
## 37.241667, -76.371389
## 36.987222, -76.279722

## Coordinates
### General Points
| Label   | Latitude  | Longitude |
|---------|-----------|-----------|
| Point 1 | 37.864444 | -76.762222|
| Point 2 | 37.504444 | -76.787500|
| Point 3 | 37.221389 | -76.893611|
### Coastal Points
| Label         | Latitude  | Longitude |
|---------------|-----------|-----------|
| Coastal Point 1 | 37.586667 | -76.300556|
| Coastal Point 2 | 37.241667 | -76.371389|
| Coastal Point 3 | 36.987222 | -76.279722|

### We need L2 AOP for AVW and L2 IOP for Kd and BBP

In [1]:
# Setup
import earthaccess
import xarray as xr
from xarray.backends.api import open_datatree
import matplotlib.pyplot as plt
import cartopy.crs as ccrs
import numpy as np
import pandas as pd

In [2]:
# earthdata authentication
auth = earthaccess.login(persist=True)

In [3]:
# get OCI data
results = earthaccess.search_datasets(instrument="oci")

In [4]:
for item in results:
    summary = item.summary()
    print(summary["short-name"])

PACE_OCI_L0_SCI
PACE_OCI_L1A_SCI
PACE_OCI_L1B_SCI
PACE_OCI_L1C_SCI
PACE_OCI_L2_AOP_NRT
PACE_OCI_L2_BGC_NRT
PACE_OCI_L2_IOP_NRT
PACE_OCI_L2_PAR_NRT
PACE_OCI_L3B_CHL_NRT
PACE_OCI_L3B_IOP_NRT
PACE_OCI_L3B_KD_NRT
PACE_OCI_L3B_PAR_NRT
PACE_OCI_L3B_POC_NRT
PACE_OCI_L3B_RRS_NRT
PACE_OCI_L3M_CHL_NRT
PACE_OCI_L3M_IOP_NRT
PACE_OCI_L3M_KD_NRT
PACE_OCI_L3M_PAR_NRT
PACE_OCI_L3M_POC_NRT
PACE_OCI_L3M_RRS_NRT


In [5]:
# search for the day of interest
tspan = ("2024-05-22", "2024-05-22")
bbox = (-76.75, 36.97, -75.74, 39.01)
clouds = (0, 50)

## Start with IOP to get Kd and BBP

In [6]:
# pull granules for these parameters
results = earthaccess.search_data(
    short_name="PACE_OCI_L2_IOP_NRT",
    temporal=tspan,
    bounding_box=bbox,
    cloud_cover=clouds
)

In [7]:
results

[Collection: {'Version': '2.0', 'ShortName': 'PACE_OCI_L2_IOP_NRT'}
 Spatial coverage: {'HorizontalSpatialDomain': {'Geometry': {'GPolygons': [{'Boundary': {'Points': [{'Latitude': 47.47746, 'Longitude': -61.89199}, {'Latitude': 41.52162, 'Longitude': -95.06815}, {'Latitude': 24.36406, 'Longitude': -86.92741}, {'Latitude': 29.65719, 'Longitude': -60.46736}, {'Latitude': 47.47746, 'Longitude': -61.89199}]}}]}}}
 Temporal coverage: {'RangeDateTime': {'BeginningDateTime': '2024-05-22T17:37:48Z', 'EndingDateTime': '2024-05-22T17:42:47Z'}}
 Size(MB): 53.02047824859619
 Data: ['https://obdaac-tea.earthdatacloud.nasa.gov/ob-cumulus-prod-public/PACE_OCI.20240522T173748.L2.OC_IOP.V2_0.NRT.nc']]

In [8]:
results[0]

In [9]:
# open the granule using xarray
paths = earthaccess.open(results)

QUEUEING TASKS | :   0%|          | 0/1 [00:00<?, ?it/s]

PROCESSING TASKS | :   0%|          | 0/1 [00:00<?, ?it/s]

COLLECTING RESULTS | :   0%|          | 0/1 [00:00<?, ?it/s]

In [10]:
paths

[<File-like object S3FileSystem, ob-cumulus-prod-public/PACE_OCI.20240522T173748.L2.OC_IOP.V2_0.NRT.nc>]

In [11]:
dataset = xr.open_dataset(paths[0])

In [12]:
dataset

In [13]:
datatree = open_datatree(paths[0])

In [14]:
datatree

In [15]:
dataset = xr.merge(datatree.to_dict().values())

In [16]:
dataset

In [17]:
dataset = dataset.set_coords(("longitude", "latitude"))

## Point 1

In [18]:
# # get closest match PACE coordinate to independently selected coordinate
# latlon = np.stack([lat,lon],-1)
# dists = np.linalg.norm(latlon - np.array(list(coordinates['point1'])),axis=-1)
# min_dist_idx = np.argmin(dists)
# min_dist_y = min_dist_idx // dists.shape[1]
# min_dist_x = min_dist_idx % dists.shape[1]

In [19]:
# # get the lat lon coordinates that have the minimum distance
# latlon[min_dist_y,min_dist_x]

In [20]:
# # filter the Kd data for the points
# df_kd = df[(df['longitude']==-76.30104) & (df['latitude']==37.583176)]
# df_kd

In [21]:
# df_kd.index = df_kd.index.droplevel(0)
# df_kd = df_kd.reset_index()

In [22]:
# df_kd = df_kd[df_kd['wavelength_3d']==490.0]

In [23]:
# df_kd[['wavelength_3d','Kd','longitude','latitude']]

# General Setup

In [24]:
# independently selected coordinates 
# format: (lat,lon)
coordinates = {
    'point1': (37.586667, -76.300556),
    'point2': (37.241667, -76.371389),
    'point3': (36.987222, -76.279722)
}

In [25]:
# assign lat, lon variables the latitude and longitude values
lat, lon = dataset['latitude'], dataset['longitude']

## Kd - Find closest match lat,lon coordinates in PACE using independently selected lat,lon points

In [26]:
# set the parameter of interest 
# IOP: Kd or bbp_442, 
# AOP: avw
param = 'Kd'

In [27]:
# set wavelength of interest
wave = 490.0

## Kd - Create the dataframe using specified parameters

In [28]:
df = dataset[[param]].to_dataframe()

In [29]:
df

Unnamed: 0_level_0,Unnamed: 1_level_0,Unnamed: 2_level_0,Kd,longitude,latitude
number_of_lines,pixels_per_line,wavelength_3d,Unnamed: 3_level_1,Unnamed: 4_level_1,Unnamed: 5_level_1
0,0,351.0,,-86.927414,24.364056
0,0,361.0,,-86.927414,24.364056
0,0,385.0,,-86.927414,24.364056
0,0,413.0,,-86.927414,24.364056
0,0,425.0,,-86.927414,24.364056
...,...,...,...,...,...
1708,1271,640.0,,-61.891991,47.477455
1708,1271,655.0,,-61.891991,47.477455
1708,1271,665.0,,-61.891991,47.477455
1708,1271,678.0,,-61.891991,47.477455


## Kd - Generalized solution to pull any independently selected list of points

In [30]:
# loop it for every independently selected set of coordinates

# store the parameter values in a list
param_val = []
lat_val = []
lon_val = []
wavelength_val = []
param_name = []

for i in coordinates:
    
    # get closest match PACE coordinate to independently selected coordinate
    latlon = np.stack([lat,lon],-1)
    dists = np.linalg.norm(latlon - np.array(list(coordinates[i])),axis=-1)
    min_dist_idx = np.argmin(dists)
    min_dist_y = min_dist_idx // dists.shape[1]
    min_dist_x = min_dist_idx % dists.shape[1]
    
    # get the lat lon coordinates that have the minimum distance
    coord_lat,coord_lon = latlon[min_dist_y,min_dist_x]
    
    # filter the paramteter data for the points
    df_param = df[(df['longitude']==coord_lon) & (df['latitude']==coord_lat)]

    # drop the number of lines index column
    df_param.index = df_param.index.droplevel(0)
    # reset the index
    df_param = df_param.reset_index()

    # select the wavelength of interest
    df_param = df_param[df_param['wavelength_3d']==wave]

    # append the latitude
    lat_val.append(df_param['latitude'].values[0])

    # append the longitude
    lon_val.append(df_param['longitude'].values[0])

    # append the wavelength
    wavelength_val.append(df_param['wavelength_3d'].values[0])

    # append the parameter name
    param_name.append(param)

    # append the parameter value
    param_val.append(df_param[param].values[0])

    # show the final dataframe
    print(df_param[['wavelength_3d',param,'longitude','latitude']])

    # user messaging
    print("Generated parameter values for " + i)

   wavelength_3d      Kd  longitude   latitude
8          490.0  0.4244 -76.301041  37.583176
Generated parameter values for point1
   wavelength_3d     Kd  longitude   latitude
8          490.0  0.393  -76.37368  37.245449
Generated parameter values for point2
   wavelength_3d     Kd  longitude   latitude
8          490.0  0.473 -76.284828  36.983372
Generated parameter values for point3


In [31]:
# create a dataframe using arrays created above
df_final = pd.DataFrame({'latitude':lat_val, 'longitude':lon_val, 'wavelength_3d':wavelength_val, 'parameter':param_name, 'parameter_value':param_val})
df_final

Unnamed: 0,latitude,longitude,wavelength_3d,parameter,parameter_value
0,37.583176,-76.301041,490.0,Kd,0.4244
1,37.245449,-76.37368,490.0,Kd,0.393
2,36.983372,-76.284828,490.0,Kd,0.473


In [32]:
# add normalized parameter value
# method: subtract minimum and divide by range

# find minimum and range
min_param = min(df_final['parameter_value'])
max_param = max(df_final['parameter_value'])
range_param = max_param - min_param

# normalize each value
df_final['parameter_value_normalized'] = (df_final['parameter_value'] - min_param)/range_param

In [33]:
df_final

Unnamed: 0,latitude,longitude,wavelength_3d,parameter,parameter_value,parameter_value_normalized
0,37.583176,-76.301041,490.0,Kd,0.4244,0.3925
1,37.245449,-76.37368,490.0,Kd,0.393,0.0
2,36.983372,-76.284828,490.0,Kd,0.473,1.0


In [34]:
# export the data to CSV
df_final.to_csv('pace_'+param+'.csv', index=False, header=True)

## BBP - Find closest match lat,lon coordinates in PACE using independently selected lat,lon points

In [35]:
# set the parameter of interest 
# IOP: Kd or bbp_442, 
# AOP: avw
param = 'bbp_442'

## BBP - Create the dataframe using specified parameters

In [36]:
df = dataset[[param]].to_dataframe()

In [37]:
df

Unnamed: 0_level_0,Unnamed: 1_level_0,bbp_442,longitude,latitude
number_of_lines,pixels_per_line,Unnamed: 2_level_1,Unnamed: 3_level_1,Unnamed: 4_level_1
0,0,,-86.927414,24.364056
0,1,,-86.859642,24.378946
0,2,,-86.792641,24.393661
0,3,,-86.726387,24.408211
0,4,,-86.660858,24.422594
...,...,...,...,...
1708,1267,,-62.263226,47.446732
1708,1268,,-62.172028,47.454357
1708,1269,,-62.079781,47.462009
1708,1270,,-61.986439,47.469711


## BBP - Generalized solution to pull any independently selected list of points

In [38]:
# loop it for every independently selected set of coordinates

# store the parameter values in a list
param_val = []
lat_val = []
lon_val = []
param_name = []

for i in coordinates:
    
    # get closest match PACE coordinate to independently selected coordinate
    latlon = np.stack([lat,lon],-1)
    dists = np.linalg.norm(latlon - np.array(list(coordinates[i])),axis=-1)
    min_dist_idx = np.argmin(dists)
    min_dist_y = min_dist_idx // dists.shape[1]
    min_dist_x = min_dist_idx % dists.shape[1]
    
    # get the lat lon coordinates that have the minimum distance
    coord_lat,coord_lon = latlon[min_dist_y,min_dist_x]
    
    # filter the paramteter data for the points
    df_param = df[(df['longitude']==coord_lon) & (df['latitude']==coord_lat)]

    # drop the number of lines index column
    df_param.index = df_param.index.droplevel(0)
    # reset the index
    df_param = df_param.reset_index()


    # append the latitude
    lat_val.append(df_param['latitude'].values[0])

    # append the longitude
    lon_val.append(df_param['longitude'].values[0])


    # append the parameter name
    param_name.append(param)

    # append the parameter value
    param_val.append(df_param[param].values[0])

    # show the final dataframe
    print(df_param[[param,'longitude','latitude']])

    # user messaging
    print("Generated parameter values for " + i)

    bbp_442  longitude   latitude
0  0.019705 -76.301041  37.583176
Generated parameter values for point1
    bbp_442  longitude   latitude
0  0.017855  -76.37368  37.245449
Generated parameter values for point2
   bbp_442  longitude   latitude
0   0.0265 -76.284828  36.983372
Generated parameter values for point3


In [39]:
# create a dataframe using arrays created above
df_final = pd.DataFrame({'latitude':lat_val, 'longitude':lon_val, 'parameter':param_name, 'parameter_value':param_val})
df_final

Unnamed: 0,latitude,longitude,parameter,parameter_value
0,37.583176,-76.301041,bbp_442,0.019705
1,37.245449,-76.37368,bbp_442,0.017855
2,36.983372,-76.284828,bbp_442,0.0265


In [40]:
# add normalized parameter value
# method: subtract minimum and divide by range

# find minimum and range
min_param = min(df_final['parameter_value'])
max_param = max(df_final['parameter_value'])
range_param = max_param - min_param

# normalize each value
df_final['parameter_value_normalized'] = (df_final['parameter_value'] - min_param)/range_param

In [41]:
df_final

Unnamed: 0,latitude,longitude,parameter,parameter_value,parameter_value_normalized
0,37.583176,-76.301041,bbp_442,0.019705,0.213997
1,37.245449,-76.37368,bbp_442,0.017855,0.0
2,36.983372,-76.284828,bbp_442,0.0265,1.0


In [42]:
# export the data to CSV
df_final.to_csv('pace_'+param+'.csv', index=False, header=True)

# AVW - using AOP

In [43]:
# pull granules for these parameters
results = earthaccess.search_data(
    short_name="PACE_OCI_L2_AOP_NRT",
    temporal=tspan,
    bounding_box=bbox,
    cloud_cover=clouds
)

In [44]:
results

[Collection: {'ShortName': 'PACE_OCI_L2_AOP_NRT', 'Version': '2.0'}
 Spatial coverage: {'HorizontalSpatialDomain': {'Geometry': {'GPolygons': [{'Boundary': {'Points': [{'Latitude': 47.47746, 'Longitude': -61.89199}, {'Latitude': 41.52162, 'Longitude': -95.06815}, {'Latitude': 24.36406, 'Longitude': -86.92741}, {'Latitude': 29.65719, 'Longitude': -60.46736}, {'Latitude': 47.47746, 'Longitude': -61.89199}]}}]}}}
 Temporal coverage: {'RangeDateTime': {'EndingDateTime': '2024-05-22T17:42:47Z', 'BeginningDateTime': '2024-05-22T17:37:48Z'}}
 Size(MB): 285.33649730682373
 Data: ['https://obdaac-tea.earthdatacloud.nasa.gov/ob-cumulus-prod-public/PACE_OCI.20240522T173748.L2.OC_AOP.V2_0.NRT.nc']]

In [45]:
results[0]

In [46]:
# open the granule using xarray
paths = earthaccess.open(results)

QUEUEING TASKS | :   0%|          | 0/1 [00:00<?, ?it/s]

PROCESSING TASKS | :   0%|          | 0/1 [00:00<?, ?it/s]

COLLECTING RESULTS | :   0%|          | 0/1 [00:00<?, ?it/s]

In [47]:
paths

[<File-like object S3FileSystem, ob-cumulus-prod-public/PACE_OCI.20240522T173748.L2.OC_AOP.V2_0.NRT.nc>]

In [48]:
dataset = xr.open_dataset(paths[0])

In [49]:
dataset

In [50]:
datatree = open_datatree(paths[0])

In [51]:
datatree

In [52]:
dataset = xr.merge(datatree.to_dict().values())

In [53]:
dataset = dataset.set_coords(("longitude", "latitude"))

In [54]:
dataset

## AVW - Find closest match lat,lon coordinates in PACE using independently selected lat,lon points

In [55]:
# set the parameter of interest 
# IOP: Kd or bbp_442, 
# AOP: avw
param = 'avw'

## AVW - Create the dataframe using specified parameters

In [56]:
df = dataset[[param]].to_dataframe()

In [57]:
df

Unnamed: 0_level_0,Unnamed: 1_level_0,avw,longitude,latitude
number_of_lines,pixels_per_line,Unnamed: 2_level_1,Unnamed: 3_level_1,Unnamed: 4_level_1
0,0,,-86.927414,24.364056
0,1,,-86.859642,24.378946
0,2,,-86.792641,24.393661
0,3,,-86.726387,24.408211
0,4,,-86.660858,24.422594
...,...,...,...,...
1708,1267,,-62.263226,47.446732
1708,1268,,-62.172028,47.454357
1708,1269,,-62.079781,47.462009
1708,1270,,-61.986439,47.469711


## AVW - Generalized solution to pull any independently selected list of points

In [58]:
# loop it for every independently selected set of coordinates

# store the parameter values in a list
param_val = []
lat_val = []
lon_val = []
param_name = []

for i in coordinates:
    
    # get closest match PACE coordinate to independently selected coordinate
    latlon = np.stack([lat,lon],-1)
    dists = np.linalg.norm(latlon - np.array(list(coordinates[i])),axis=-1)
    min_dist_idx = np.argmin(dists)
    min_dist_y = min_dist_idx // dists.shape[1]
    min_dist_x = min_dist_idx % dists.shape[1]
    
    # get the lat lon coordinates that have the minimum distance
    coord_lat,coord_lon = latlon[min_dist_y,min_dist_x]
    
    # filter the paramteter data for the points
    df_param = df[(df['longitude']==coord_lon) & (df['latitude']==coord_lat)]

    # drop the number of lines index column
    df_param.index = df_param.index.droplevel(0)
    # reset the index
    df_param = df_param.reset_index()


    # append the latitude
    lat_val.append(df_param['latitude'].values[0])

    # append the longitude
    lon_val.append(df_param['longitude'].values[0])


    # append the parameter name
    param_name.append(param)

    # append the parameter value
    param_val.append(df_param[param].values[0])

    # show the final dataframe
    print(df_param[[param,'longitude','latitude']])

    # user messaging
    print("Generated parameter values for " + i)

          avw  longitude   latitude
0  538.645508 -76.301041  37.583176
Generated parameter values for point1
          avw  longitude   latitude
0  552.625549  -76.37368  37.245449
Generated parameter values for point2
          avw  longitude   latitude
0  562.832275 -76.284828  36.983372
Generated parameter values for point3


In [59]:
# create a dataframe using arrays created above
df_final = pd.DataFrame({'latitude':lat_val, 'longitude':lon_val, 'parameter':param_name, 'parameter_value':param_val})
df_final

Unnamed: 0,latitude,longitude,parameter,parameter_value
0,37.583176,-76.301041,avw,538.645508
1,37.245449,-76.37368,avw,552.625549
2,36.983372,-76.284828,avw,562.832275


In [60]:
# add normalized parameter value
# method: subtract minimum and divide by range

# find minimum and range
min_param = min(df_final['parameter_value'])
max_param = max(df_final['parameter_value'])
range_param = max_param - min_param

# normalize each value
df_final['parameter_value_normalized'] = (df_final['parameter_value'] - min_param)/range_param

In [61]:
df_final

Unnamed: 0,latitude,longitude,parameter,parameter_value,parameter_value_normalized
0,37.583176,-76.301041,avw,538.645508,0.0
1,37.245449,-76.37368,avw,552.625549,0.578004
2,36.983372,-76.284828,avw,562.832275,1.0


In [62]:
# export the data to CSV
df_final.to_csv('pace_'+param+'.csv', index=False, header=True)