<a href="https://colab.research.google.com/github/MathewBiddle/ioos_metrics/blob/yearly_ra_gts/notebooks/IOOS_obs_2_NDBC.ipynb" target="_parent"><img src="https://colab.research.google.com/assets/colab-badge.svg" alt="Open In Colab"/></a>

In [1]:
!pip install erddapy

Collecting erddapy
  Downloading erddapy-2.2.4-py3-none-any.whl.metadata (6.7 kB)
Downloading erddapy-2.2.4-py3-none-any.whl (24 kB)
Installing collected packages: erddapy
Successfully installed erddapy-2.2.4


In [2]:
import pandas as pd
from erddapy import ERDDAP

# Function to grab the data.

In [3]:
def get_ndbc_full_stats():
    e = ERDDAP(
        server="https://erddap.ioos.us/erddap",
        protocol="tabledap",
    )
    e.response = "csv"
    dsets = {"IOOS": "gts_regional_statistics",
             "NDBC": "gts_ndbc_statistics",
             "non-NDBC": "gts_non_ndbc_statistics"}
    df_out = pd.DataFrame()
    for key, value in dsets.items():
        e.dataset_id = value
        df = e.to_pandas(
            index_col="time (UTC)",
            parse_dates=True
        )
        df["source"] = key
        df_out = pd.concat([df_out,df])
    return df_out

# Go get the data and return the dataframe

In [4]:
df = get_ndbc_full_stats()

df

Unnamed: 0_level_0,Year,Month,locationID,region,sponsor,met,wave,source
time (UTC),Unnamed: 1_level_1,Unnamed: 2_level_1,Unnamed: 3_level_1,Unnamed: 4_level_1,Unnamed: 5_level_1,Unnamed: 6_level_1,Unnamed: 7_level_1,Unnamed: 8_level_1
2018-01-01 00:00:00+00:00,2018,1,46108,AOOS,ALASKA OCEAN OBSERVING SYSTEM,0,2592,IOOS
2018-01-01 00:00:00+00:00,2018,1,AJXA2,AOOS,MARINE EXCHANGE OF ALASKA,8796,0,IOOS
2018-01-01 00:00:00+00:00,2018,1,CDXA2,AOOS,MARINE EXCHANGE OF ALASKA,4782,0,IOOS
2018-01-01 00:00:00+00:00,2018,1,ERXA2,AOOS,MARINE EXCHANGE OF ALASKA,5634,0,IOOS
2018-01-01 00:00:00+00:00,2018,1,GIXA2,AOOS,MARINE EXCHANGE OF ALASKA,8798,0,IOOS
...,...,...,...,...,...,...,...,...
2025-03-01 00:00:00+00:00,2025,3,OCSM2,,U.S. ARMY CORPS OF ENGINEERS,0,0,non-NDBC
2025-03-01 00:00:00+00:00,2025,3,44097,,U.S. ARMY CORPS OF ENGINEERS,0,2976,non-NDBC
2025-03-01 00:00:00+00:00,2025,3,FRFN7,,U.S. ARMY CORPS OF ENGINEERS,0,0,non-NDBC
2025-03-01 00:00:00+00:00,2025,3,44100,,U.S. ARMY CORPS OF ENGINEERS,0,2976,non-NDBC


# Compute yearly totals of number of IOOS messages sent to the GTS.

In [5]:
yearly_totals = df.groupby(by=["source", pd.Grouper(freq="YE")])[['met','wave']].sum()

yearly_totals['total'] = yearly_totals['met'] + yearly_totals['wave']

print(yearly_totals.loc['IOOS'].to_markdown(floatfmt=''))

| time (UTC)                |        met |      wave |      total |
|:--------------------------|-----------:|----------:|-----------:|
| 2018-12-31 00:00:00+00:00 | 10250188.0 | 1586648.0 | 11836836.0 |
| 2019-12-31 00:00:00+00:00 | 11344260.0 | 1682754.0 | 13027014.0 |
| 2020-12-31 00:00:00+00:00 | 10501536.0 | 1410472.0 | 11912008.0 |
| 2021-12-31 00:00:00+00:00 | 10189996.0 | 1610856.0 | 11800852.0 |
| 2022-12-31 00:00:00+00:00 | 11295426.0 | 1616542.0 | 12911968.0 |
| 2023-12-31 00:00:00+00:00 | 12719038.0 | 1796258.0 | 14515296.0 |
| 2024-12-31 00:00:00+00:00 | 12794686.0 | 1726010.0 | 14520696.0 |
| 2025-12-31 00:00:00+00:00 |  2910610.0 |  338230.0 |  3248840.0 |


# Compute yearly totals of IOOS messages sent to the GTS by region.

In [6]:
yearly_region_totals = df.groupby(by=["source", "region", pd.Grouper(freq="YE")])[['met','wave']].sum()

yearly_region_totals['total'] = yearly_region_totals['met'] + yearly_region_totals['wave']

yearly_region_totals.reset_index('region',inplace=True)

print(yearly_region_totals.loc['IOOS'].to_markdown(floatfmt=''))

| time (UTC)                | region   |     met |   wave |   total |
|:--------------------------|:---------|--------:|-------:|--------:|
| 2018-12-31 00:00:00+00:00 | AOOS     | 2571070 |  27968 | 2599038 |
| 2019-12-31 00:00:00+00:00 | AOOS     | 2810422 |  31584 | 2842006 |
| 2020-12-31 00:00:00+00:00 | AOOS     | 2697606 |   8258 | 2705864 |
| 2021-12-31 00:00:00+00:00 | AOOS     | 2574888 |  24336 | 2599224 |
| 2022-12-31 00:00:00+00:00 | AOOS     | 3675118 |  37414 | 3712532 |
| 2023-12-31 00:00:00+00:00 | AOOS     | 5620044 |  34026 | 5654070 |
| 2024-12-31 00:00:00+00:00 | AOOS     | 5939434 |  31992 | 5971426 |
| 2025-12-31 00:00:00+00:00 | AOOS     | 1516796 |   8100 | 1524896 |
| 2018-12-31 00:00:00+00:00 | CARICOOS |  619422 |  68520 |  687942 |
| 2019-12-31 00:00:00+00:00 | CARICOOS |  719876 |  83582 |  803458 |
| 2020-12-31 00:00:00+00:00 | CARICOOS |  629990 |  93232 |  723222 |
| 2021-12-31 00:00:00+00:00 | CARICOOS |  676958 |  99074 |  776032 |
| 2022-12-31 00:00:0

# Calculate the percentage of IOOS OBS per year

In [7]:
yearly_totals = df.groupby(by=["source", pd.Grouper(freq="YE")])[['met','wave']].sum()

pcnt_obs_year = yearly_totals.loc['IOOS'] / (yearly_totals.loc['IOOS'] + yearly_totals.loc['NDBC'] + yearly_totals.loc['non-NDBC'])

# pcnt_obs_year['total'] =

print(pcnt_obs_year.to_markdown())

| time (UTC)                |      met |     wave |
|:--------------------------|---------:|---------:|
| 2018-12-31 00:00:00+00:00 | 0.155619 | 0.296562 |
| 2019-12-31 00:00:00+00:00 | 0.167812 | 0.312697 |
| 2020-12-31 00:00:00+00:00 | 0.154613 | 0.277295 |
| 2021-12-31 00:00:00+00:00 | 0.149813 | 0.3107   |
| 2022-12-31 00:00:00+00:00 | 0.153994 | 0.278716 |
| 2023-12-31 00:00:00+00:00 | 0.167084 | 0.222074 |
| 2024-12-31 00:00:00+00:00 | 0.167279 | 0.217845 |
| 2025-12-31 00:00:00+00:00 | 0.173252 | 0.228609 |


In [8]:
yearly_totals.loc['IOOS'] + yearly_totals.loc['NDBC'] + yearly_totals.loc['non-NDBC']

# totals['total'] = totals['met']+totals['wave']

# totals['total'].sum()

#totals

Unnamed: 0_level_0,met,wave
time (UTC),Unnamed: 1_level_1,Unnamed: 2_level_1
2018-12-31 00:00:00+00:00,65867386,5350148
2019-12-31 00:00:00+00:00,67601032,5381422
2020-12-31 00:00:00+00:00,67921642,5086546
2021-12-31 00:00:00+00:00,68018280,5184606
2022-12-31 00:00:00+00:00,73349742,5799952
2023-12-31 00:00:00+00:00,76123506,8088546
2024-12-31 00:00:00+00:00,76487008,7923096
2025-12-31 00:00:00+00:00,16799904,1479514


In [9]:
yearly_totals.loc['IOOS'].sum().sum()

np.int64(93773510)

# Total number of unique platforms submitting data to the GTS from the IOOS regions

IOOS location IDs

In [11]:
df.loc[df['source']=='IOOS'].locationID

Unnamed: 0_level_0,locationID
time (UTC),Unnamed: 1_level_1
2018-01-01 00:00:00+00:00,46108
2018-01-01 00:00:00+00:00,AJXA2
2018-01-01 00:00:00+00:00,CDXA2
2018-01-01 00:00:00+00:00,ERXA2
2018-01-01 00:00:00+00:00,GIXA2
...,...
2025-03-01 00:00:00+00:00,SIPF1
2025-03-01 00:00:00+00:00,42098
2025-03-01 00:00:00+00:00,44095
2025-03-01 00:00:00+00:00,44086


In [12]:
n_ioos_stn = len(df.loc[df['source']=='IOOS'].locationID.unique())

n_ioos_stn

260

All location ID's

In [13]:
n_all_stn = len(df.locationID.unique())

n_all_stn

1039

IOOS stations represent this of the total # stations whose data are delivered by NDBC to the GTS

In [14]:
station_pcnt = n_ioos_stn / n_all_stn

print(f'IOOS stations represent {station_pcnt*100:.2f}% of the total {n_all_stn} stations whose data are delivered by NDBC to the GTS')

IOOS stations represent 25.02% of the total 1039 stations whose data are delivered by NDBC to the GTS


Focusing on station platform counts (260, I believe) - Can you compute what % that is of the total delivered by NDBC to the GTS? (I didn't notice if you identify the total # stations anywhere).

So, we can say "IOOS stations represent x% of the total # stations whose data are delivered by NDBC to the GTS"

In [15]:
ioos_ndbc_non_NDBC = yearly_totals.loc['IOOS'] + yearly_totals.loc['NDBC'] + yearly_totals.loc['non-NDBC']

yearly_totals.loc['IOOS','total'].sum() / ioos_ndbc_non_NDBC['total'].sum()

  yearly_totals.loc['IOOS','total'].sum() / ioos_ndbc_non_NDBC['total'].sum()


np.float64(nan)

# Total number of sponsors submitting data to the GTS via IOOS Regions

In [16]:
df.loc[df['source']=='IOOS'].sponsor

Unnamed: 0_level_0,sponsor
time (UTC),Unnamed: 1_level_1
2018-01-01 00:00:00+00:00,ALASKA OCEAN OBSERVING SYSTEM
2018-01-01 00:00:00+00:00,MARINE EXCHANGE OF ALASKA
2018-01-01 00:00:00+00:00,MARINE EXCHANGE OF ALASKA
2018-01-01 00:00:00+00:00,MARINE EXCHANGE OF ALASKA
2018-01-01 00:00:00+00:00,MARINE EXCHANGE OF ALASKA
...,...
2025-03-01 00:00:00+00:00,FLORIDA INSTITUTE OF TECHNOLOGY
2025-03-01 00:00:00+00:00,GREATER TAMPA BAY MARINE ADVISORY COUNCIL PORTS
2025-03-01 00:00:00+00:00,UNIVERSITY OF NORTH CAROLINA COASTAL STUDIES
2025-03-01 00:00:00+00:00,UNIVERSITY OF NORTH CAROLINA COASTAL STUDIES


In [17]:
len(df.loc[df['source']=='IOOS'].sponsor.unique())

47

# Average number of messages (met+wave) per year

For all the years, including 2025

In [18]:
print(df.groupby(by=["source", pd.Grouper(freq="YE")])[['met','wave']].sum().loc['IOOS'].mean().to_markdown(floatfmt=''))
print(df.groupby(by=["source", pd.Grouper(freq="YE")])[['met','wave']].sum().loc['IOOS'].mean().sum())

|      |           0 |
|:-----|------------:|
| met  | 10250717.5  |
| wave |  1470971.25 |
11721688.75


for 2018-2024, since 2025 is only through March.

In [19]:
print(df.groupby(by=["source", pd.Grouper(freq="YE")])[['met','wave']].sum().loc['IOOS'].loc[:"2024"].mean().to_markdown(floatfmt=''))
print(df.groupby(by=["source", pd.Grouper(freq="YE")])[['met','wave']].sum().loc['IOOS'].loc[:"2024"].mean().sum())

|      |                   0 |
|:-----|--------------------:|
| met  | 11299304.285714285  |
| wave |  1632791.4285714286 |
12932095.714285715


## Trying to regroup to account for CO-OPs 6-min obs.

In [20]:
df.groupby(by=["source", pd.Grouper(freq='h')])[['met','wave']].sum().loc['non-NDBC']

Unnamed: 0_level_0,met,wave
time (UTC),Unnamed: 1_level_1,Unnamed: 2_level_1
2018-01-01 00:00:00+00:00,4302942,185918
2018-02-01 00:00:00+00:00,3859286,169298
2018-03-01 00:00:00+00:00,4233350,189658
2018-04-01 00:00:00+00:00,4152756,181056
2018-05-01 00:00:00+00:00,4298992,203102
...,...,...
2024-11-01 00:00:00+00:00,4411730,242504
2024-12-01 00:00:00+00:00,4483752,190722
2025-01-01 00:00:00+00:00,4158458,205566
2025-02-01 00:00:00+00:00,3352772,190114


## Total number of stations

In [21]:
print(len(df.locationID.unique()))

print(df.locationID.unique())

1039
['46108' 'AJXA2' 'CDXA2' ... '46236' '42354' 'CXLM2']


In [22]:
260/1039

0.2502406159769009

In [23]:
df.locationID

Unnamed: 0_level_0,locationID
time (UTC),Unnamed: 1_level_1
2018-01-01 00:00:00+00:00,46108
2018-01-01 00:00:00+00:00,AJXA2
2018-01-01 00:00:00+00:00,CDXA2
2018-01-01 00:00:00+00:00,ERXA2
2018-01-01 00:00:00+00:00,GIXA2
...,...
2025-03-01 00:00:00+00:00,OCSM2
2025-03-01 00:00:00+00:00,44097
2025-03-01 00:00:00+00:00,FRFN7
2025-03-01 00:00:00+00:00,44100


"IOOS contributes the largest number of NOS platforms to the GTS." For the latter, we'd need to identify "NOS" (not to confuse with an earlier comment in a different issue about defining NOS, in which case I was equating NOS with CO-OPS based on the non-NDBC report labels). In this case,
1. NOS = IOOS-regional (all) + non-NDBC (National ocean service, NOAA NOS PORTS, CBIBS, and NERRS.)

2. Perhaps add another condition where if there are 0s for stations, then omit from the station count?

3. And constrain the station count to only the Calendar Year 2024.

In [61]:
non_ndbc_nos_list = ['CHESAPEAKE BAY INTERPRETIVE BUOY SYSTEM',
'NATIONAL ESTUARINE RESEARCH RESERVE SYSTEM',
'NATIONAL OCEAN SERVICE',
'NOAA NOS PHYSICAL OCEANOGRAPHIC RT SYSTEM PROGRAM']

df_non_ndbc = df.loc[df['source']=='non-NDBC']

nos = df_non_ndbc[df_non_ndbc['sponsor'].isin(non_ndbc_nos_list)]
print('total NOS:',len(nos))
condition = (nos['met'] ==0) & (nos['wave']==0)
nos = nos[~condition]
print('total NOS-empty:',len(nos))


ioos = df.loc[df['source']=='IOOS']
print('total IOOS:',len(ioos))
condition = (ioos['met'] ==0) & (ioos['wave']==0)
ioos = ioos[~condition]
print('total IOOS-empty:',len(ioos))

ioos_platform_count = len(ioos.locationID.unique())
nos_platform_count = len(nos.locationID.unique())

print(f'{ioos_platform_count} / {nos_platform_count + ioos_platform_count} = **{(ioos_platform_count/(nos_platform_count+ioos_platform_count))*100:.2f}%**')

total NOS: 28198
total NOS-empty: 25601
total IOOS: 15971
total IOOS-empty: 12385
237 / 575 = **41.22%**


In [62]:
for loc in nos.locationID.unique():
  if len(nos.loc[nos['locationID']==loc].source.unique()) >1:
    print(loc,nos.loc[nos['locationID']==loc].source.unique())

In [44]:
count = 0
for sponsor in non_ndbc_nos_list:
  length = len(nos.loc[nos['sponsor']==sponsor].locationID.unique())
  print(f'{sponsor}: {length}')
  count = count + length

print(f'Total = {count}')
print(f'Total w/ IOOS = {count+ioos_platform_count}')
#len(nos.loc[nos['sponsor']=='CHESAPEAKE BAY INTERPRETIVE BUOY SYSTEM'].locationID.unique())

CHESAPEAKE BAY INTERPRETIVE BUOY SYSTEM: 8
NATIONAL ESTUARINE RESEARCH RESERVE SYSTEM: 35
NATIONAL OCEAN SERVICE: 283
NOAA NOS PHYSICAL OCEANOGRAPHIC RT SYSTEM PROGRAM: 83
Total = 409
Total w/ IOOS = 646


In [54]:
non_ndbc_nos_list = ['CHESAPEAKE BAY INTERPRETIVE BUOY SYSTEM',
'NATIONAL ESTUARINE RESEARCH RESERVE SYSTEM',
'NATIONAL OCEAN SERVICE',
'NOAA NOS PHYSICAL OCEANOGRAPHIC RT SYSTEM PROGRAM']

df_non_ndbc = df.loc[df['source']=='non-NDBC']
#print(len(df_non_ndbc))


filtered = df_non_ndbc[df_non_ndbc['sponsor'].isin(non_ndbc_nos_list)]
print('total NOS:',len(filtered))

nos = pd.concat([filtered,df.loc[df['source']=='IOOS']])
print('total IOOS:',len(nos.loc[nos['source']=='IOOS']))


# drop empty met and wave rows
condition = (nos['met'] ==0) & (nos['wave']==0)

nos = nos[~condition]
print('total IOOS-empty:',len(nos.loc[nos['source']=='IOOS']))
print('total NOS-empty:',len(nos.loc[nos['source']!='IOOS']))

ioos_platform_count = len(nos.loc[nos['source']=='IOOS'].locationID.unique())
nos_platform_count = len(nos.locationID.unique())

print(f'{ioos_platform_count} / {nos_platform_count} = **{(ioos_platform_count/nos_platform_count)*100:.2f}%**')

total NOS: 28198
total IOOS: 15971
total IOOS-empty: 12385
total NOS-empty: 25601
237 / 569 = **41.65%**


In [60]:
for loc in nos.locationID.unique():
  if len(nos.loc[nos['locationID']==loc].source.unique()) >1:
    print(loc,nos.loc[nos['locationID']==loc].source.unique())

BZST2 ['non-NDBC' 'IOOS']
EPTT2 ['non-NDBC' 'IOOS']
MGPT2 ['non-NDBC' 'IOOS']
TXPT2 ['non-NDBC' 'IOOS']
FPST2 ['non-NDBC' 'IOOS']
NUET2 ['non-NDBC' 'IOOS']


In [46]:
nos.loc[nos['sponsor']=='NATIONAL OCEAN SERVICE'].source.unique()

array(['non-NDBC'], dtype=object)

In [27]:
len(nos.loc[nos['sponsor']=='NATIONAL ESTUARINE RESEARCH RESERVE SYSTEM'].locationID.unique())

35

In [28]:
len(nos.loc[nos['sponsor']=='NATIONAL OCEAN SERVICE'].locationID.unique())

283

In [29]:
len(nos.loc[nos['sponsor']=='NOAA NOS PHYSICAL OCEANOGRAPHIC RT SYSTEM PROGRAM'].locationID.unique())

83

In [30]:
df.loc[df['source']=='non-NDBC'].sponsor.unique()

array([nan, 'BP INC.', 'EPA & MEXICAN GOVERNMENT COOPERATIVE PROGRAM',
       'CHESAPEAKE BAY INTERPRETIVE BUOY SYSTEM',
       'SCRIPPS WAVERIDER COASTAL DATA INFORMATION PROGRAM',
       'EVERGLADES NATIONAL PARK', 'GREAT LAKES RESEARCH LABORATORY',
       'INTEGRATED CORAL OBSERVING NETWORK',
       'LOUISIANA OFFSHORE OIL PORT', 'MOSS LANDING MARINE LABORATORIES',
       'NATIONAL ESTUARINE RESEARCH RESERVE SYSTEM',
       'NATIONAL OCEAN SERVICE',
       'NATIONAL PARK SERVICE - LAKE MEAD NATIONAL REC AREA',
       'NATIONAL RENEWABLE ENERGY LABORATORY',
       'NATIONAL WEATHER SERVICE, ALASKA REGION',
       'NATIONAL WEATHER SERVICE, CENTRAL REGION',
       'NATIONAL WEATHER SERVICE, EASTERN REGION',
       'OCEAN OBSERVATORIES INITIATIVE', 'PETROBRAS', 'SHELL OIL',
       'U.S. ARMY CORPS OF ENGINEERS',
       'WOODS HOLE OCEANOGRAPHIC INSTITUTION', 'VERMONT EPSCOR',
       'NATIONAL PARK SERVICES - SLEEPING BEAR DUNES',
       'ALASKA OCEAN OBSERVING SYSTEM',
       'SUNY PLA

In [31]:
df_ioos = df.loc[(df['source']=='IOOS')]

condition = (df_ioos['met'] ==0) & (df_ioos['wave']==0)

len(df_ioos[~condition].locationID.unique())

237

In [32]:
df.locationID.unique()

condition = (df['met'] ==0) & (df['wave']==0)

len(df[~condition].locationID.unique())

975

In [33]:
(237/975) * 100

24.307692307692307