# 1 - Data Access and Download

In [1]:
import os, sys, re, ast
import pandas as pd
import numpy as np
import xarray as xr

from ooi_data_explorations.common import load_kdata

from ooinet import M2M

### Available Gliders and Datasets

##### ADCP Datasets

In [7]:
adcp_datasets = M2M.search_datasets(array='CP15MOAS', instrument='ADCP', English_names=True)
adcp_datasets

Searching https://ooinet.oceanobservatories.org/api/m2m/12576/sensor/inv/CP15MOAS


Unnamed: 0,array,array_name,node,node_name,instrument,instrument_name,refdes,url,deployments
0,CP15MOAS,Coastal Pioneer MAB Mobile Assets,GL388,Coastal Glider 388,01-ADCPAM000,Velocity Profiler (600kHz),CP15MOAS-GL388-01-ADCPAM000,https://ooinet.oceanobservatories.org/api/m2m/...,[1]
1,CP15MOAS,Coastal Pioneer MAB Mobile Assets,GL387,Coastal Glider 387,01-ADCPAM000,Velocity Profiler (600kHz),CP15MOAS-GL387-01-ADCPAM000,https://ooinet.oceanobservatories.org/api/m2m/...,"[1, 2, 3]"
2,CP15MOAS,Coastal Pioneer MAB Mobile Assets,GL379,Coastal Glider 379,01-ADCPAM000,Velocity Profiler (600kHz),CP15MOAS-GL379-01-ADCPAM000,https://ooinet.oceanobservatories.org/api/m2m/...,"[1, 2, 3, 4]"
3,CP15MOAS,Coastal Pioneer MAB Mobile Assets,GL335,Coastal Glider 335,01-ADCPAM000,Velocity Profiler (600kHz),CP15MOAS-GL335-01-ADCPAM000,https://ooinet.oceanobservatories.org/api/m2m/...,[1]
4,CP15MOAS,Coastal Pioneer MAB Mobile Assets,G0913,Coastal Glider 913,01-ADCPAM000,Velocity Profiler (600kHz),CP15MOAS-G0913-01-ADCPAM000,https://ooinet.oceanobservatories.org/api/m2m/...,"[1, 2]"


##### Engineering Datasets

In [6]:
eng_datasets = M2M.search_datasets(array='CP15MOAS', instrument='ENG', English_names=True)
eng_datasets

Searching https://ooinet.oceanobservatories.org/api/m2m/12576/sensor/inv/CP15MOAS


Unnamed: 0,array,array_name,node,node_name,instrument,instrument_name,refdes,url,deployments
0,CP15MOAS,Coastal Pioneer MAB Mobile Assets,PG564,Profiling Glider 564,00-ENG000000,Mobile Asset Controller,CP15MOAS-PG564-00-ENG000000,https://ooinet.oceanobservatories.org/api/m2m/...,"[1, 2, 3]"
1,CP15MOAS,Coastal Pioneer MAB Mobile Assets,PG514,Profiling Glider 514,00-ENG000000,Mobile Asset Controller,CP15MOAS-PG514-00-ENG000000,https://ooinet.oceanobservatories.org/api/m2m/...,[1]
2,CP15MOAS,Coastal Pioneer MAB Mobile Assets,GL559,Coastal Glider 559,00-ENG000000,Mobile Asset Controller,CP15MOAS-GL559-00-ENG000000,https://ooinet.oceanobservatories.org/api/m2m/...,[1]
3,CP15MOAS,Coastal Pioneer MAB Mobile Assets,GL388,Coastal Glider 388,00-ENG000000,Mobile Asset Controller,CP15MOAS-GL388-00-ENG000000,https://ooinet.oceanobservatories.org/api/m2m/...,[1]
4,CP15MOAS,Coastal Pioneer MAB Mobile Assets,GL387,Coastal Glider 387,00-ENG000000,Mobile Asset Controller,CP15MOAS-GL387-00-ENG000000,https://ooinet.oceanobservatories.org/api/m2m/...,"[1, 2, 3]"
5,CP15MOAS,Coastal Pioneer MAB Mobile Assets,GL380,Coastal Glider 380,00-ENG000000,Mobile Asset Controller,CP15MOAS-GL380-00-ENG000000,https://ooinet.oceanobservatories.org/api/m2m/...,[1]
6,CP15MOAS,Coastal Pioneer MAB Mobile Assets,GL379,Coastal Glider 379,00-ENG000000,Mobile Asset Controller,CP15MOAS-GL379-00-ENG000000,https://ooinet.oceanobservatories.org/api/m2m/...,"[1, 2, 3, 4]"
7,CP15MOAS,Coastal Pioneer MAB Mobile Assets,GL376,Coastal Glider 376,00-ENG000000,Mobile Asset Controller,CP15MOAS-GL376-00-ENG000000,https://ooinet.oceanobservatories.org/api/m2m/...,[1]
8,CP15MOAS,Coastal Pioneer MAB Mobile Assets,GL339,Coastal Glider 339,00-ENG000000,Mobile Asset Controller,CP15MOAS-GL339-00-ENG000000,https://ooinet.oceanobservatories.org/api/m2m/...,"[1, 2]"
9,CP15MOAS,Coastal Pioneer MAB Mobile Assets,GL335,Coastal Glider 335,00-ENG000000,Mobile Asset Controller,CP15MOAS-GL335-00-ENG000000,https://ooinet.oceanobservatories.org/api/m2m/...,[1]


##### Select an ADCP and associated Engineering dataset

In [14]:
refdes = 'CP15MOAS-GL388-01-ADCPAM000'
eng_refdes = 'CP15MOAS-GL388-00-ENG000000'

### Download ADCP Data

##### Check available datastreams

In [15]:
M2M.get_datastreams(refdes)

Unnamed: 0,refdes,method,stream
0,CP15MOAS-GL388-01-ADCPAM000,recovered_host,adcp_bottom_track_config
1,CP15MOAS-GL388-01-ADCPAM000,recovered_host,adcp_bottom_track_earth
2,CP15MOAS-GL388-01-ADCPAM000,recovered_host,adcp_config
3,CP15MOAS-GL388-01-ADCPAM000,recovered_host,adcp_engineering
4,CP15MOAS-GL388-01-ADCPAM000,recovered_host,adcp_velocity_earth


In [16]:
M2M.get_deployments(refdes)

Unnamed: 0,deploymentNumber,uid,assetId,latitude,longitude,depth,deployStart,deployEnd,deployCruise,recoverCruise
0,1,CGINS-ADCPAM-55979,1793,35.95845,-74.76768,350.0,2024-11-20 16:04:00,2025-02-15 12:36:00,PH20241119,KH20250215


In [18]:
site, node, sensor = refdes.split("-", 2)
method = 'recovered_host'

# Load the velocity data
stream = 'adcp_velocity_earth'
adcp_data = load_kdata(site, node, sensor, method, stream, tag=f'deployment0001*{refdes}*.nc')

# Load the engineering data
stream = 'adcp_engineering'
adcp_engineering = load_kdata(site, node, sensor, method, stream, tag=f'deployment0001*{refdes}*.nc')

# Load the config data
stream = 'adcp_config'
adcp_config = load_kdata(site, node, sensor, method, stream, tag=f'deployment0001*{refdes}*.nc')

# Load the bottom track data
stream = 'adcp_bottom_track_earth'
adcp_bottom_track = load_kdata(site, node, sensor, method, stream, tag=f'deployment0001*{refdes}*.nc')

Downloading 1 data file(s) from the local kdata directory


Loading and Processing Data Files: 100%|██████████| 1/1 [00:04<00:00,  4.05s/it]


Merging the data files into a single dataset
Downloading 1 data file(s) from the local kdata directory


Loading and Processing Data Files: 100%|██████████| 1/1 [00:00<00:00,  2.53it/s]


Merging the data files into a single dataset
Downloading 1 data file(s) from the local kdata directory


Loading and Processing Data Files: 100%|██████████| 1/1 [00:00<00:00, 10.86it/s]


Merging the data files into a single dataset
Downloading 1 data file(s) from the local kdata directory


Loading and Processing Data Files: 100%|██████████| 1/1 [00:00<00:00,  1.65it/s]

Merging the data files into a single dataset





###### Save the ADCP Data & associated datasets

In [21]:
# ADCP Data
adcp_data.to_netcdf(f"../data/raw/{refdes}.deployment0001.adcp_data.nc", format="netcdf4", engine="h5netcdf")

# ADCP Engineering
adcp_engineering.to_netcdf(f"../data/raw/{refdes}.deployment0001.adcp_engineering.nc", format="netcdf4", engine="h5netcdf")

# ADCP Config
adcp_config.to_netcdf(f"../data/raw/{refdes}.deployment0001.adcp_config.nc", format="netcdf4", engine="h5netcdf")

# ADCP Bottom Track
adcp_bottom_track.to_netcdf(f"../data/raw/{refdes}.deployment0001.adcp_bottom_track.nc", format="netcdf4", engine="h5netcdf")

##### Glider Data
Next, need to get the glider engineering data which has the key data we will need for calculating differences between waypoints and surfacings. This is not available via netCDF file or on kdata, so will need to make a THREDDS request and download the separate csv files

In [22]:
M2M.get_datastreams(eng_refdes)

Unnamed: 0,refdes,method,stream
0,CP15MOAS-GL388-00-ENG000000,recovered_host,glider_eng_metadata_recovered
1,CP15MOAS-GL388-00-ENG000000,recovered_host,glider_eng_recovered
2,CP15MOAS-GL388-00-ENG000000,recovered_host,glider_eng_sci_recovered
3,CP15MOAS-GL388-00-ENG000000,recovered_host,glider_gps_position
4,CP15MOAS-GL388-00-ENG000000,telemetered,glider_eng_metadata
5,CP15MOAS-GL388-00-ENG000000,telemetered,glider_eng_sci_telemetered
6,CP15MOAS-GL388-00-ENG000000,telemetered,glider_eng_telemetered
7,CP15MOAS-GL388-00-ENG000000,telemetered,glider_gps_position


In [23]:
M2M.get_deployments(refdes, deploy_num="1")

Unnamed: 0,deploymentNumber,uid,assetId,latitude,longitude,depth,deployStart,deployEnd,deployCruise,recoverCruise
0,1,CGINS-ADCPAM-55979,1793,35.95845,-74.76768,350.0,2024-11-20 16:04:00,2025-02-15 12:36:00,PH20241119,KH20250215


In [41]:
from ooinet.M2M import SESSION
from bs4 import BeautifulSoup

# Request the dataset
# thredds_url = M2M.get_thredds_url(eng_refdes, 'recovered_host', 'glider_eng_recovered', goldCopy=False, beginDT="2024-11-20 16:04:00", endDT="2025-02-15 12:36:00")
# thredds_url = 'https://downloads.oceanobservatories.org/async_results/areed@whoi.edu/20250325T203718572Z-CP05MOAS-GL388-00-ENG000000-recovered_host-glider_eng_recovered/' # THREDDS request are good for ~6 months
thredds_url = 'https://downloads.oceanobservatories.org/async_results/areed@whoi.edu/20250516T193400826Z-CP15MOAS-GL388-00-ENG000000-recovered_host-glider_eng_recovered/'

# Access the THREDDS catalog of the available datasets
page = SESSION.get(thredds_url).text
soup = BeautifulSoup(page, "html.parser")
pattern = re.compile('.*\\.csv$')
catalog = sorted([node.get('href') for node in soup.find_all('a', text=pattern)])
catalog

  catalog = sorted([node.get('href') for node in soup.find_all('a', text=pattern)])


['deployment0001_CP15MOAS-GL388-00-ENG000000-recovered_host-glider_eng_recovered_20241120T160800-20241121T115954.csv',
 'deployment0001_CP15MOAS-GL388-00-ENG000000-recovered_host-glider_eng_recovered_20241121T120019-20241122T115956.csv',
 'deployment0001_CP15MOAS-GL388-00-ENG000000-recovered_host-glider_eng_recovered_20241122T120001-20241123T115958.csv',
 'deployment0001_CP15MOAS-GL388-00-ENG000000-recovered_host-glider_eng_recovered_20241123T120003-20241124T115953.csv',
 'deployment0001_CP15MOAS-GL388-00-ENG000000-recovered_host-glider_eng_recovered_20241124T120018-20241125T115946.csv',
 'deployment0001_CP15MOAS-GL388-00-ENG000000-recovered_host-glider_eng_recovered_20241125T120016-20241126T115957.csv',
 'deployment0001_CP15MOAS-GL388-00-ENG000000-recovered_host-glider_eng_recovered_20241126T120002-20241127T115932.csv',
 'deployment0001_CP15MOAS-GL388-00-ENG000000-recovered_host-glider_eng_recovered_20241127T120001-20241128T115944.csv',
 'deployment0001_CP15MOAS-GL388-00-ENG000000-rec

In [42]:
# load the glider engineering data
glider_engineering = pd.DataFrame()
for file in catalog:
    df = pd.read_csv(thredds_url + file)
    glider_engineering = pd.concat([glider_engineering, df], ignore_index=True)

glider_engineering.head()

Unnamed: 0,obs,c_air_pump,c_ballast_pumped,c_battpos,c_battroll,c_bsipar_on,c_de_oil_vol,c_dvl_on,c_flbbcd_on,c_heading,...,m_water_vy,m_why_started,m_x_lmc,m_y_lmc,port_timestamp,preferred_timestamp,time,x_last_wpt_lat,x_last_wpt_lon,x_system_clock_adjusted
0,"('0',)",1,260.0,0.7,0.0,-1.0,260.0,-1.0,-1.0,0.0,...,0.0,64,0.0,0.0,0.0,internal_timestamp,3941108000.0,39.833332,-70.666664,0.0
1,"('1',)",1,260.0,0.7,0.0,-1.0,260.0,-1.0,-1.0,0.0,...,0.0,64,0.0,0.0,0.0,internal_timestamp,3941108000.0,39.833332,-70.666664,0.0
2,"('2',)",1,,0.7,0.0,-1.0,260.0,-1.0,-1.0,,...,,-99,,,0.0,internal_timestamp,3941108000.0,,,
3,"('3',)",1,,0.7,0.0,-1.0,260.0,-1.0,-1.0,,...,,-99,,,0.0,internal_timestamp,3941108000.0,,,
4,"('4',)",1,,0.7,0.0,-1.0,260.0,-1.0,-1.0,,...,,-99,,,0.0,internal_timestamp,3941108000.0,,,


In [43]:
from ooinet.utils import ntp_seconds_to_datetime

glider_engineering['time'] = glider_engineering['time'].apply(lambda x: ntp_seconds_to_datetime(x))
glider_engineering = glider_engineering.set_index(keys='time')
glider_engineering.head()

Unnamed: 0_level_0,obs,c_air_pump,c_ballast_pumped,c_battpos,c_battroll,c_bsipar_on,c_de_oil_vol,c_dvl_on,c_flbbcd_on,c_heading,...,m_water_vx,m_water_vy,m_why_started,m_x_lmc,m_y_lmc,port_timestamp,preferred_timestamp,x_last_wpt_lat,x_last_wpt_lon,x_system_clock_adjusted
time,Unnamed: 1_level_1,Unnamed: 2_level_1,Unnamed: 3_level_1,Unnamed: 4_level_1,Unnamed: 5_level_1,Unnamed: 6_level_1,Unnamed: 7_level_1,Unnamed: 8_level_1,Unnamed: 9_level_1,Unnamed: 10_level_1,Unnamed: 11_level_1,Unnamed: 12_level_1,Unnamed: 13_level_1,Unnamed: 14_level_1,Unnamed: 15_level_1,Unnamed: 16_level_1,Unnamed: 17_level_1,Unnamed: 18_level_1,Unnamed: 19_level_1,Unnamed: 20_level_1,Unnamed: 21_level_1
2024-11-20 16:08:00.272670,"('0',)",1,260.0,0.7,0.0,-1.0,260.0,-1.0,-1.0,0.0,...,0.0,0.0,64,0.0,0.0,0.0,internal_timestamp,39.833332,-70.666664,0.0
2024-11-20 16:08:53.539060,"('1',)",1,260.0,0.7,0.0,-1.0,260.0,-1.0,-1.0,0.0,...,0.0,0.0,64,0.0,0.0,0.0,internal_timestamp,39.833332,-70.666664,0.0
2024-11-20 16:09:25.671230,"('2',)",1,,0.7,0.0,-1.0,260.0,-1.0,-1.0,,...,,,-99,,,0.0,internal_timestamp,,,
2024-11-20 16:09:30.097960,"('3',)",1,,0.7,0.0,-1.0,260.0,-1.0,-1.0,,...,,,-99,,,0.0,internal_timestamp,,,
2024-11-20 16:09:34.552520,"('4',)",1,,0.7,0.0,-1.0,260.0,-1.0,-1.0,,...,,,-99,,,0.0,internal_timestamp,,,


###### Save the glider engineering dataset

In [44]:
# Save the dataset
glider_engineering.to_csv("../data/raw/deployment0001_CP15MOAS-GL388-00-ENG000000-recovered_host-glider_eng_recovered.csv")

---
## Science Data
Next, download the associated glider data from the IOOS GDAC

In [45]:
from erddapy import ERDDAP

In [46]:
# Connect to the IOOS GDAC ERDDAP server
e = ERDDAP(server='https://gliders.ioos.us/erddap')

# Grab the search url and look for OOI gliders
url = e.get_search_url(search_for="ooi", response="csv")

# Get available OOI dataset
df = pd.read_csv(url)

# List the datasets with the glider that we want
glider_id = 'cp_388'
[x for x in df['Dataset ID'] if glider_id in x]

['cp_388-20241120T1604-delayed',
 'cp_388-20241120T1604',
 'cp_388-20141006T2014-delayed',
 'cp_388-20151022T1232-delayed',
 'cp_388-20160809T1406-delayed',
 'cp_388-20170116T1324-delayed',
 'cp_388-20171101T0120-delayed',
 'cp_388-20181030T2211-delayed',
 'cp_388-20190618T2258-delayed',
 'cp_388-20191212T2247-delayed',
 'cp_388-20200613T2149-delayed',
 'cp_388-20210517T2025-delayed',
 'cp_388-20220104T2057-delayed',
 'cp_388-20220410T1600-delayed',
 'cp_388-20220411T1624-delayed',
 'cp_388-20200613T2149',
 'cp_388-20210517T2025',
 'cp_388-20220104T2057',
 'cp_388-20220411T1624']

In [47]:
# Set the dataset constraints
e.dataset_id = 'cp_388-20241120T1604-delayed'
e.protocol = 'tabledap'
e.constraints = None
data_url = e.get_download_url(response='opendap')

In [48]:
# Download the data
gdac = e.to_xarray()
gdac['time'] = gdac['precise_time']
gdac = gdac.drop_vars(names='precise_time')
gdac = gdac.swap_dims({'obs':'time'})
gdac.load()

In [49]:
# Save the GDAC dataset
gdac.to_netcdf("../data/raw/CP15MOAS-GL388.deployment0001.science.nc", format='netcdf4', engine='h5netcdf')

---
# Other Metadata

This includes getting the mooring locations and other metdata

##### Mooring location data

In [4]:
github_url = "https://raw.githubusercontent.com/oceanobservatories/asset-management/refs/heads/master/deployment"

mooring_locations = {
    'mooring':[],
    'lat': [],
    'lon': [],
    'depth': []
}

for mooring in M2M.get_api(M2M.URLS['vocab']):
    if mooring.startswith(('CP','GA','GI','GS','GP')) and 'MOAS' not in mooring:
        # Get the data
        df = pd.read_csv(f'{github_url}/{mooring}_Deploy.csv')
        lat = df['lat'].mean()
        lon = df['lon'].mean()
        depth = df['water_depth'].mean()
        # Save the data
        mooring_locations['mooring'].append(mooring)
        mooring_locations['lat'].append(lat)
        mooring_locations['lon'].append(lon)
        mooring_locations['depth'].append(depth)

mooring_locations = pd.DataFrame(mooring_locations)
mooring_locations

Unnamed: 0,mooring,lat,lon,depth
0,CP01CNPM,40.136172,-70.774114,133.0
1,CP01CNSM,40.136497,-70.774589,134.03125
2,CP01CNSP,40.137813,-70.777408,133.25
3,CP02PMCI,40.226715,-70.884176,126.875
4,CP02PMCO,40.098392,-70.883301,147.411765
5,CP02PMUI,40.36495,-70.775333,94.411765
6,CP02PMUO,39.941455,-70.775943,448.842105
7,CP03ISPM,40.363231,-70.882505,92.0
8,CP03ISSM,40.364041,-70.883005,93.530648
9,CP03ISSP,40.365247,-70.88673,94.5


In [5]:
mooring_locations.to_csv('../data/external/mooring_locations.csv', index=False)