# Pull in PACE data (Kd, AVW, BBP) for May 22 2024 at the following lat/lon points:

## Coastal points:
## 37.586667, -76.300556
## 37.241667, -76.371389
## 36.987222, -76.279722

### We need L2 AOP for AVW and L2 IOP for Kd and BBP

In [146]:
# Setup
import earthaccess
import xarray as xr
from xarray.backends.api import open_datatree
import matplotlib.pyplot as plt
import cartopy.crs as ccrs
import numpy as np
import pandas as pd
import datetime

In [3]:
# earthdata authentication
auth = earthaccess.login(persist=True)

In [4]:
# get OCI data
results = earthaccess.search_datasets(instrument="oci")

In [5]:
for item in results:
    summary = item.summary()
    print(summary["short-name"])

PACE_OCI_L0_SCI
PACE_OCI_L1A_SCI
PACE_OCI_L1B_SCI
PACE_OCI_L1C_SCI
PACE_OCI_L2_AOP_NRT
PACE_OCI_L2_BGC_NRT
PACE_OCI_L2_IOP_NRT
PACE_OCI_L2_PAR_NRT
PACE_OCI_L3B_CHL_NRT
PACE_OCI_L3B_IOP_NRT
PACE_OCI_L3B_KD_NRT
PACE_OCI_L3B_PAR_NRT
PACE_OCI_L3B_POC_NRT
PACE_OCI_L3B_RRS_NRT
PACE_OCI_L3M_CHL_NRT
PACE_OCI_L3M_IOP_NRT
PACE_OCI_L3M_KD_NRT
PACE_OCI_L3M_PAR_NRT
PACE_OCI_L3M_POC_NRT
PACE_OCI_L3M_RRS_NRT


In [6]:
# search for the day of interest
tspan = ("2024-05-22", "2024-05-22")
bbox = (-76.75, 36.97, -75.74, 39.01)
clouds = (0, 50)

## Start with IOP to get Kd and BBP

In [7]:
# pull granules for these parameters
results = earthaccess.search_data(
    short_name="PACE_OCI_L2_IOP_NRT",
    temporal=tspan,
    bounding_box=bbox,
    cloud_cover=clouds
)

In [8]:
results

[Collection: {'Version': '2.0', 'ShortName': 'PACE_OCI_L2_IOP_NRT'}
 Spatial coverage: {'HorizontalSpatialDomain': {'Geometry': {'GPolygons': [{'Boundary': {'Points': [{'Latitude': 47.47746, 'Longitude': -61.89199}, {'Latitude': 41.52162, 'Longitude': -95.06815}, {'Latitude': 24.36406, 'Longitude': -86.92741}, {'Latitude': 29.65719, 'Longitude': -60.46736}, {'Latitude': 47.47746, 'Longitude': -61.89199}]}}]}}}
 Temporal coverage: {'RangeDateTime': {'BeginningDateTime': '2024-05-22T17:37:48Z', 'EndingDateTime': '2024-05-22T17:42:47Z'}}
 Size(MB): 53.02047824859619
 Data: ['https://obdaac-tea.earthdatacloud.nasa.gov/ob-cumulus-prod-public/PACE_OCI.20240522T173748.L2.OC_IOP.V2_0.NRT.nc']]

In [9]:
results[0]

In [10]:
# open the granule using xarray
paths = earthaccess.open(results)

QUEUEING TASKS | :   0%|          | 0/1 [00:00<?, ?it/s]

PROCESSING TASKS | :   0%|          | 0/1 [00:00<?, ?it/s]

COLLECTING RESULTS | :   0%|          | 0/1 [00:00<?, ?it/s]

In [11]:
paths

[<File-like object S3FileSystem, ob-cumulus-prod-public/PACE_OCI.20240522T173748.L2.OC_IOP.V2_0.NRT.nc>]

In [12]:
dataset = xr.open_dataset(paths[0])

In [13]:
dataset

In [14]:
datatree = open_datatree(paths[0])

In [15]:
datatree

In [16]:
dataset = xr.merge(datatree.to_dict().values())

In [17]:
dataset

In [18]:
dataset = dataset.set_coords(("longitude", "latitude"))

# General Setup

In [77]:
# independently selected coordinates 
# format: (lat,lon)
# coordinates = {
#     'point1': (37.586667, -76.300556),
#     'point2': (37.241667, -76.371389),
#     'point3': (36.987222, -76.279722)
# }

In [78]:
# read in coordinates from CSV file
coordinates = pd.read_csv('coastal_points_final_Aug_Aft1.csv',index_col=False)
coordinates = coordinates[['Id','Lat','Lon']]
# coordinates = coordinates[['Lon','Lat']]
coordinates

Unnamed: 0,Id,Lat,Lon
0,0,32.472004,-80.360234
1,1,30.129284,-89.076222
2,2,30.416277,-81.361048
3,3,30.970782,-81.388097
4,4,31.308895,-81.212278
5,5,31.849876,-81.036459
6,6,32.066268,-80.847116
7,7,32.735512,-79.846998
8,8,33.104791,-79.279251
9,9,33.188803,-79.156552


In [79]:
# convert these points into a dictionary
coordinates = coordinates.set_index('Id').T.to_dict('list')
coordinates

{0: [32.47200356, -80.36023354],
 1: [30.12928417, -89.07622153],
 2: [30.41627698, -81.3610478],
 3: [30.97078217, -81.38809683],
 4: [31.3088951, -81.21227811],
 5: [31.84987578, -81.03645939],
 6: [32.06626805, -80.84711616],
 7: [32.73551159, -79.84699806],
 8: [33.10479091, -79.27925064],
 9: [33.18880296, -79.15655153],
 10: [33.87855332, -78.03401663],
 11: [35.06353864, -76.51849064],
 12: [35.33314529, -76.43655137],
 13: [36.03359392, -75.86297644],
 14: [36.99205206, -76.27869006],
 15: [37.24608718, -76.3729289],
 16: [37.59128814, -76.30634711],
 17: [37.97461742, -76.29773091],
 18: [38.3250827, -76.3847751],
 19: [39.30908049, -75.38590428],
 20: [40.55226696, -74.03780298],
 21: [41.15474345, -73.08404341],
 22: [41.28531139, -72.0740348],
 23: [42.81825904, -70.78487553],
 24: [43.04817583, -70.69020391]}

In [80]:
# assign lat, lon variables the latitude and longitude values
lat, lon = dataset['latitude'], dataset['longitude']

## Kd - Find closest match lat,lon coordinates in PACE using independently selected lat,lon points

In [81]:
# set the parameter of interest 
# IOP: Kd or bbp_442, 
# AOP: avw
param = 'Kd'

In [82]:
# set wavelength of interest
wave = 490.0

## Kd - Create the dataframe using specified parameters

In [83]:
df = dataset[[param]].to_dataframe()

In [84]:
df

Unnamed: 0_level_0,Unnamed: 1_level_0,Unnamed: 2_level_0,Kd,longitude,latitude
number_of_lines,pixels_per_line,wavelength_3d,Unnamed: 3_level_1,Unnamed: 4_level_1,Unnamed: 5_level_1
0,0,351.0,,-86.927414,24.364056
0,0,361.0,,-86.927414,24.364056
0,0,385.0,,-86.927414,24.364056
0,0,413.0,,-86.927414,24.364056
0,0,425.0,,-86.927414,24.364056
...,...,...,...,...,...
1708,1271,640.0,,-61.891991,47.477455
1708,1271,655.0,,-61.891991,47.477455
1708,1271,665.0,,-61.891991,47.477455
1708,1271,678.0,,-61.891991,47.477455


## Kd - Generalized solution to pull any independently selected list of points

In [85]:
# loop it for every independently selected set of coordinates

# store the parameter values in a list
param_val = []
lat_val = []
lon_val = []
wavelength_val = []
param_name = []

for i in coordinates:

    
    # get closest match PACE coordinate to independently selected coordinate
    latlon = np.stack([lat,lon],-1)
    dists = np.linalg.norm(latlon - np.array(list(coordinates[i])),axis=-1)
    min_dist_idx = np.argmin(dists)
    min_dist_y = min_dist_idx // dists.shape[1]
    min_dist_x = min_dist_idx % dists.shape[1]
    
    # get the lat lon coordinates that have the minimum distance
    coord_lat,coord_lon = latlon[min_dist_y,min_dist_x]
    
    # filter the paramteter data for the points
    df_param = df[(df['longitude']==coord_lon) & (df['latitude']==coord_lat)]

    # drop the number of lines index column
    df_param.index = df_param.index.droplevel(0)
    # reset the index
    df_param = df_param.reset_index()

    # select the wavelength of interest
    df_param = df_param[df_param['wavelength_3d']==wave]

    # append the latitude
    lat_val.append(df_param['latitude'].values[0])

    # append the longitude
    lon_val.append(df_param['longitude'].values[0])

    # append the wavelength
    wavelength_val.append(df_param['wavelength_3d'].values[0])

    # append the parameter name
    param_name.append(param)

    # append the parameter value
    param_val.append(df_param[param].values[0])

    # show the final dataframe
    print(df_param[['wavelength_3d',param,'longitude','latitude']])

    # user messaging
    print("Generated parameter values for point id " + str(i))

   wavelength_3d      Kd  longitude   latitude
8          490.0  0.4636 -80.354721  32.468266
Generated parameter values for point id 0
   wavelength_3d  Kd  longitude   latitude
8          490.0 NaN -89.106194  30.114605
Generated parameter values for point id 1
   wavelength_3d     Kd  longitude   latitude
8          490.0  0.126 -81.358238  30.421574
Generated parameter values for point id 2
   wavelength_3d      Kd  longitude   latitude
8          490.0  0.8438 -81.382744  30.977882
Generated parameter values for point id 3
   wavelength_3d     Kd  longitude   latitude
8          490.0  1.917 -81.202698  31.308884
Generated parameter values for point id 4
   wavelength_3d      Kd  longitude   latitude
8          490.0  0.7554 -81.039757  31.849695
Generated parameter values for point id 5
   wavelength_3d     Kd  longitude   latitude
8          490.0  1.235 -80.845467  32.071373
Generated parameter values for point id 6
   wavelength_3d      Kd  longitude   latitude
8          490.

In [86]:
# create a dataframe using arrays created above
df_final = pd.DataFrame({'latitude':lat_val, 'longitude':lon_val, 'wavelength_3d':wavelength_val, 'parameter':param_name, 'parameter_value':param_val})
df_final

Unnamed: 0,latitude,longitude,wavelength_3d,parameter,parameter_value
0,32.468266,-80.354721,490.0,Kd,0.4636
1,30.114605,-89.106194,490.0,Kd,
2,30.421574,-81.358238,490.0,Kd,0.126
3,30.977882,-81.382744,490.0,Kd,0.8438
4,31.308884,-81.202698,490.0,Kd,1.917
5,31.849695,-81.039757,490.0,Kd,0.7554
6,32.071373,-80.845467,490.0,Kd,1.235
7,32.732891,-79.848038,490.0,Kd,1.9678
8,33.098446,-79.283485,490.0,Kd,0.7104
9,33.192657,-79.154793,490.0,Kd,0.802


In [87]:
# add normalized parameter value
# method: subtract minimum and divide by range

# find minimum and range
min_param = min(df_final['parameter_value'])
max_param = max(df_final['parameter_value'])
range_param = max_param - min_param

# normalize each value
df_final['parameter_value_normalized'] = (df_final['parameter_value'] - min_param)/range_param

In [88]:
df_final

Unnamed: 0,latitude,longitude,wavelength_3d,parameter,parameter_value,parameter_value_normalized
0,32.468266,-80.354721,490.0,Kd,0.4636,0.08806
1,30.114605,-89.106194,490.0,Kd,,
2,30.421574,-81.358238,490.0,Kd,0.126,0.013138
3,30.977882,-81.382744,490.0,Kd,0.8438,0.172437
4,31.308884,-81.202698,490.0,Kd,1.917,0.410608
5,31.849695,-81.039757,490.0,Kd,0.7554,0.152818
6,32.071373,-80.845467,490.0,Kd,1.235,0.259254
7,32.732891,-79.848038,490.0,Kd,1.9678,0.421882
8,33.098446,-79.283485,490.0,Kd,0.7104,0.142832
9,33.192657,-79.154793,490.0,Kd,0.802,0.16316


In [89]:
# export the data to CSV
df_final.to_csv('pace_'+param+'.csv', index=False, header=True)

## BBP - Find closest match lat,lon coordinates in PACE using independently selected lat,lon points

In [90]:
# set the parameter of interest 
# IOP: Kd or bbp_442, 
# AOP: avw
param = 'bbp_442'

## BBP - Create the dataframe using specified parameters

In [91]:
df = dataset[[param]].to_dataframe()

In [92]:
df

Unnamed: 0_level_0,Unnamed: 1_level_0,bbp_442,longitude,latitude
number_of_lines,pixels_per_line,Unnamed: 2_level_1,Unnamed: 3_level_1,Unnamed: 4_level_1
0,0,,-86.927414,24.364056
0,1,,-86.859642,24.378946
0,2,,-86.792641,24.393661
0,3,,-86.726387,24.408211
0,4,,-86.660858,24.422594
...,...,...,...,...
1708,1267,,-62.263226,47.446732
1708,1268,,-62.172028,47.454357
1708,1269,,-62.079781,47.462009
1708,1270,,-61.986439,47.469711


## BBP - Generalized solution to pull any independently selected list of points

In [94]:
# loop it for every independently selected set of coordinates

# store the parameter values in a list
param_val = []
lat_val = []
lon_val = []
param_name = []

for i in coordinates:
    
    # get closest match PACE coordinate to independently selected coordinate
    latlon = np.stack([lat,lon],-1)
    dists = np.linalg.norm(latlon - np.array(list(coordinates[i])),axis=-1)
    min_dist_idx = np.argmin(dists)
    min_dist_y = min_dist_idx // dists.shape[1]
    min_dist_x = min_dist_idx % dists.shape[1]
    
    # get the lat lon coordinates that have the minimum distance
    coord_lat,coord_lon = latlon[min_dist_y,min_dist_x]
    
    # filter the paramteter data for the points
    df_param = df[(df['longitude']==coord_lon) & (df['latitude']==coord_lat)]

    # drop the number of lines index column
    df_param.index = df_param.index.droplevel(0)
    # reset the index
    df_param = df_param.reset_index()


    # append the latitude
    lat_val.append(df_param['latitude'].values[0])

    # append the longitude
    lon_val.append(df_param['longitude'].values[0])


    # append the parameter name
    param_name.append(param)

    # append the parameter value
    param_val.append(df_param[param].values[0])

    # show the final dataframe
    print(df_param[[param,'longitude','latitude']])

    # user messaging
    print("Generated parameter values for " + str(i))

   bbp_442  longitude   latitude
0   0.0226 -80.354721  32.468266
Generated parameter values for 0
   bbp_442  longitude   latitude
0      NaN -89.106194  30.114605
Generated parameter values for 1
    bbp_442  longitude   latitude
0  0.007415 -81.358238  30.421574
Generated parameter values for 2
   bbp_442  longitude   latitude
0  0.03684 -81.382744  30.977882
Generated parameter values for 3
   bbp_442  longitude   latitude
0  0.13655 -81.202698  31.308884
Generated parameter values for 4
    bbp_442  longitude   latitude
0  0.034095 -81.039757  31.849695
Generated parameter values for 5
   bbp_442  longitude   latitude
0  0.08426 -80.845467  32.071373
Generated parameter values for 6
    bbp_442  longitude   latitude
0  0.148925 -79.848038  32.732891
Generated parameter values for 7
    bbp_442  longitude   latitude
0  0.033605 -79.283485  33.098446
Generated parameter values for 8
    bbp_442  longitude   latitude
0  0.033955 -79.154793  33.192657
Generated parameter values for 9


In [96]:
# create a dataframe using arrays created above
df_final = pd.DataFrame({'latitude':lat_val, 'longitude':lon_val, 'parameter':param_name, 'parameter_value':param_val})
df_final

Unnamed: 0,latitude,longitude,parameter,parameter_value
0,32.468266,-80.354721,bbp_442,0.0226
1,30.114605,-89.106194,bbp_442,
2,30.421574,-81.358238,bbp_442,0.007415
3,30.977882,-81.382744,bbp_442,0.03684
4,31.308884,-81.202698,bbp_442,0.13655
5,31.849695,-81.039757,bbp_442,0.034095
6,32.071373,-80.845467,bbp_442,0.08426
7,32.732891,-79.848038,bbp_442,0.148925
8,33.098446,-79.283485,bbp_442,0.033605
9,33.192657,-79.154793,bbp_442,0.033955


In [97]:
# add normalized parameter value
# method: subtract minimum and divide by range

# find minimum and range
min_param = min(df_final['parameter_value'])
max_param = max(df_final['parameter_value'])
range_param = max_param - min_param

# normalize each value
df_final['parameter_value_normalized'] = (df_final['parameter_value'] - min_param)/range_param

In [98]:
df_final

Unnamed: 0,latitude,longitude,parameter,parameter_value,parameter_value_normalized
0,32.468266,-80.354721,bbp_442,0.0226,0.152665
1,30.114605,-89.106194,bbp_442,,
2,30.421574,-81.358238,bbp_442,0.007415,0.05081
3,30.977882,-81.382744,bbp_442,0.03684,0.248181
4,31.308884,-81.202698,bbp_442,0.13655,0.916994
5,31.849695,-81.039757,bbp_442,0.034095,0.229768
6,32.071373,-80.845467,bbp_442,0.08426,0.566254
7,32.732891,-79.848038,bbp_442,0.148925,1.0
8,33.098446,-79.283485,bbp_442,0.033605,0.226482
9,33.192657,-79.154793,bbp_442,0.033955,0.228829


In [99]:
# export the data to CSV
df_final.to_csv('pace_'+param+'.csv', index=False, header=True)

# AVW - using AOP

In [100]:
# pull granules for these parameters
results = earthaccess.search_data(
    short_name="PACE_OCI_L2_AOP_NRT",
    temporal=tspan,
    bounding_box=bbox,
    cloud_cover=clouds
)

In [101]:
results

[Collection: {'ShortName': 'PACE_OCI_L2_AOP_NRT', 'Version': '2.0'}
 Spatial coverage: {'HorizontalSpatialDomain': {'Geometry': {'GPolygons': [{'Boundary': {'Points': [{'Latitude': 47.47746, 'Longitude': -61.89199}, {'Latitude': 41.52162, 'Longitude': -95.06815}, {'Latitude': 24.36406, 'Longitude': -86.92741}, {'Latitude': 29.65719, 'Longitude': -60.46736}, {'Latitude': 47.47746, 'Longitude': -61.89199}]}}]}}}
 Temporal coverage: {'RangeDateTime': {'EndingDateTime': '2024-05-22T17:42:47Z', 'BeginningDateTime': '2024-05-22T17:37:48Z'}}
 Size(MB): 285.33649730682373
 Data: ['https://obdaac-tea.earthdatacloud.nasa.gov/ob-cumulus-prod-public/PACE_OCI.20240522T173748.L2.OC_AOP.V2_0.NRT.nc']]

In [45]:
results[0]

In [102]:
# open the granule using xarray
paths = earthaccess.open(results)

QUEUEING TASKS | :   0%|          | 0/1 [00:00<?, ?it/s]

PROCESSING TASKS | :   0%|          | 0/1 [00:00<?, ?it/s]

COLLECTING RESULTS | :   0%|          | 0/1 [00:00<?, ?it/s]

In [103]:
paths

[<File-like object S3FileSystem, ob-cumulus-prod-public/PACE_OCI.20240522T173748.L2.OC_AOP.V2_0.NRT.nc>]

In [104]:
dataset = xr.open_dataset(paths[0])

In [105]:
dataset

In [106]:
datatree = open_datatree(paths[0])

In [107]:
datatree

In [108]:
dataset = xr.merge(datatree.to_dict().values())

In [109]:
dataset = dataset.set_coords(("longitude", "latitude"))

In [110]:
dataset

## AVW - Find closest match lat,lon coordinates in PACE using independently selected lat,lon points

In [111]:
# set the parameter of interest 
# IOP: Kd or bbp_442, 
# AOP: avw
param = 'avw'

## AVW - Create the dataframe using specified parameters

In [112]:
df = dataset[[param]].to_dataframe()

In [113]:
df

Unnamed: 0_level_0,Unnamed: 1_level_0,avw,longitude,latitude
number_of_lines,pixels_per_line,Unnamed: 2_level_1,Unnamed: 3_level_1,Unnamed: 4_level_1
0,0,,-86.927414,24.364056
0,1,,-86.859642,24.378946
0,2,,-86.792641,24.393661
0,3,,-86.726387,24.408211
0,4,,-86.660858,24.422594
...,...,...,...,...
1708,1267,,-62.263226,47.446732
1708,1268,,-62.172028,47.454357
1708,1269,,-62.079781,47.462009
1708,1270,,-61.986439,47.469711


## AVW - Generalized solution to pull any independently selected list of points

In [114]:
# loop it for every independently selected set of coordinates

# store the parameter values in a list
param_val = []
lat_val = []
lon_val = []
param_name = []

for i in coordinates:
    
    # get closest match PACE coordinate to independently selected coordinate
    latlon = np.stack([lat,lon],-1)
    dists = np.linalg.norm(latlon - np.array(list(coordinates[i])),axis=-1)
    min_dist_idx = np.argmin(dists)
    min_dist_y = min_dist_idx // dists.shape[1]
    min_dist_x = min_dist_idx % dists.shape[1]
    
    # get the lat lon coordinates that have the minimum distance
    coord_lat,coord_lon = latlon[min_dist_y,min_dist_x]
    
    # filter the paramteter data for the points
    df_param = df[(df['longitude']==coord_lon) & (df['latitude']==coord_lat)]

    # drop the number of lines index column
    df_param.index = df_param.index.droplevel(0)
    # reset the index
    df_param = df_param.reset_index()


    # append the latitude
    lat_val.append(df_param['latitude'].values[0])

    # append the longitude
    lon_val.append(df_param['longitude'].values[0])


    # append the parameter name
    param_name.append(param)

    # append the parameter value
    param_val.append(df_param[param].values[0])

    # show the final dataframe
    print(df_param[[param,'longitude','latitude']])

    # user messaging
    print("Generated parameter values for " + str(i))

          avw  longitude   latitude
0  575.054443 -80.354721  32.468266
Generated parameter values for 0
   avw  longitude   latitude
0  NaN -89.106194  30.114605
Generated parameter values for 1
          avw  longitude   latitude
0  534.403564 -81.358238  30.421574
Generated parameter values for 2
          avw  longitude   latitude
0  588.998352 -81.382744  30.977882
Generated parameter values for 3
         avw  longitude   latitude
0  563.54541 -81.202698  31.308884
Generated parameter values for 4
          avw  longitude   latitude
0  594.961609 -81.039757  31.849695
Generated parameter values for 5
          avw  longitude   latitude
0  571.725525 -80.845467  32.071373
Generated parameter values for 6
          avw  longitude   latitude
0  563.748413 -79.848038  32.732891
Generated parameter values for 7
         avw  longitude   latitude
0  600.63385 -79.283485  33.098446
Generated parameter values for 8
          avw  longitude   latitude
0  596.599121 -79.154793  33.192657
G

In [115]:
# create a dataframe using arrays created above
df_final = pd.DataFrame({'latitude':lat_val, 'longitude':lon_val, 'parameter':param_name, 'parameter_value':param_val})
df_final

Unnamed: 0,latitude,longitude,parameter,parameter_value
0,32.468266,-80.354721,avw,575.054443
1,30.114605,-89.106194,avw,
2,30.421574,-81.358238,avw,534.403564
3,30.977882,-81.382744,avw,588.998352
4,31.308884,-81.202698,avw,563.54541
5,31.849695,-81.039757,avw,594.961609
6,32.071373,-80.845467,avw,571.725525
7,32.732891,-79.848038,avw,563.748413
8,33.098446,-79.283485,avw,600.63385
9,33.192657,-79.154793,avw,596.599121


In [116]:
# add normalized parameter value
# method: subtract minimum and divide by range

# find minimum and range
min_param = min(df_final['parameter_value'])
max_param = max(df_final['parameter_value'])
range_param = max_param - min_param

# normalize each value
df_final['parameter_value_normalized'] = (df_final['parameter_value'] - min_param)/range_param

In [117]:
df_final

Unnamed: 0,latitude,longitude,parameter,parameter_value,parameter_value_normalized
0,32.468266,-80.354721,avw,575.054443,0.573951
1,30.114605,-89.106194,avw,,
2,30.421574,-81.358238,avw,534.403564,0.065994
3,30.977882,-81.382744,avw,588.998352,0.748188
4,31.308884,-81.202698,avw,563.54541,0.430139
5,31.849695,-81.039757,avw,594.961609,0.822703
6,32.071373,-80.845467,avw,571.725525,0.532354
7,32.732891,-79.848038,avw,563.748413,0.432675
8,33.098446,-79.283485,avw,600.63385,0.893581
9,33.192657,-79.154793,avw,596.599121,0.843164


In [118]:
# export the data to CSV
df_final.to_csv('pace_'+param+'.csv', index=False, header=True)

# Combine the Kd, BBP, and AVW values into one file

In [142]:
# read in the 3 parameter files (Kd, BBP, AVW)
kd = pd.read_csv('pace_Kd.csv')
bbp = pd.read_csv('pace_bbp_442.csv')
avw = pd.read_csv('pace_avw.csv')

In [143]:
# rename the parameter columns and select only the lat, lon, and parameter value columns
kd = kd.rename(columns={'parameter_value': 'Kd_raw', 'parameter_value_normalized':'Kd_normalized'})
kd = kd[['latitude','longitude','Kd_raw','Kd_normalized']]

bbp = bbp.rename(columns={'parameter_value': 'bbp_raw', 'parameter_value_normalized':'bbp_normalized'})
bbp = bbp[['latitude','longitude','bbp_raw','bbp_normalized']]

avw = avw.rename(columns={'parameter_value': 'avw_raw', 'parameter_value_normalized':'avw_normalized'})
avw = avw[['latitude','longitude','avw_raw','avw_normalized']]

In [145]:
# combine all 3 dataframes on lat, lon
results = pd.merge(
    left=kd, 
    right=bbp,
    how='left',
    left_on=['latitude', 'longitude'],
    right_on=['latitude', 'longitude'],
)

results = pd.merge(left=results,
                   right=avw,
                   how='left',
                    left_on=['latitude', 'longitude'],
                    right_on=['latitude', 'longitude'],
                  )
results


Unnamed: 0,latitude,longitude,Kd_raw,Kd_normalized,bbp_raw,bbp_normalized,avw_raw,avw_normalized
0,32.468266,-80.35472,0.4636,0.08806,0.0226,0.152665,575.05444,0.573951
1,30.114605,-89.10619,,,,,,
2,30.421574,-81.35824,0.126,0.013138,0.007415,0.05081,534.40356,0.065994
3,30.977882,-81.382744,0.8438,0.172437,0.03684,0.248181,588.99835,0.748188
4,31.308884,-81.2027,1.917,0.410608,0.13655,0.916994,563.5454,0.430139
5,31.849695,-81.03976,0.7554,0.152818,0.034095,0.229768,594.9616,0.822703
6,32.071373,-80.84547,1.235,0.259254,0.08426,0.566254,571.7255,0.532354
7,32.73289,-79.84804,1.9678,0.421882,0.148925,1.0,563.7484,0.432675
8,33.098446,-79.283485,0.7104,0.142832,0.033605,0.226482,600.63385,0.893581
9,33.192657,-79.15479,0.802,0.16316,0.033955,0.228829,596.5991,0.843164


In [148]:
# export to CSV
results.to_csv('pace_kd_bbp_avw.csv',index=False, header=True)