<a href="https://colab.research.google.com/github/MathewBiddle/ioos_by_the_numbers/blob/main/IOOS_BTN.ipynb" target="_parent"><img src="https://colab.research.google.com/assets/colab-badge.svg" alt="Open In Colab"/></a>

# Creating the IOOS By The Numbers

[Website](https://ioos.noaa.gov/about/ioos-by-the-numbers/)

[Spreadsheet](https://docs.google.com/spreadsheets/d/1AUfXmc3OwxpVdeMNjZyTGWjyR4ku3kRD5eexNrMORnI/edit#gid=516871794)

In [1]:
import pandas as pd

## Federal Partners

ICOOS Act/COORA

## Regional Associations

## Coastal & Ocean Modeling Testbed
Number of Projects

## HF Radar Installations

From http://hfrnet.ucsd.edu/sitediag/stationList.php

In [2]:
url = 'http://hfrnet.ucsd.edu/sitediag/stationList.php?output=CSV'

df_hfr = pd.read_csv(url)

hfr_installations = df_hfr['Station'].unique().size

print('HF Radar Installations:',hfr_installations)

HF Radar Installations: 179


## NGDAC Glider Days

From https://gliders.ioos.us/erddap/info/index.html?page=1&itemsPerPage=1000

Cumulative from 2008 - present



In [20]:
df_glider = pd.read_csv('https://gliders.ioos.us/erddap/tabledap/allDatasets.csvp?minTime%2CmaxTime')
df_glider.dropna(
    axis=0, 
    inplace=True,
    )

df_glider[['minTime (UTC)','maxTime (UTC)']] = df_glider[
                                                         ['minTime (UTC)','maxTime (UTC)']
                                                         ].apply(pd.to_datetime)

df_glider['glider_days'] = (df_glider['maxTime (UTC)'] - df_glider['minTime (UTC)']).dt.days

glider_days = df_glider['glider_days'].sum()

print('Cumulative glider days:', glider_days)

Cumulative glider days: 62704


In [21]:
start_date = '2021-10-01'
end_date = '2021-12-31'

# find glider deployments between 10/01 and 12/31
glider_day_within = df_glider.loc[
    (df_glider['minTime (UTC)'] > pd.to_datetime(start_date,utc=True)) &
    (df_glider['maxTime (UTC)'] < pd.to_datetime(end_date,utc=True))
]

# gliders that start before 10/01 and end after 12/31
glider_day_outside = df_glider.loc[
    (df_glider['minTime (UTC)'] < pd.to_datetime(start_date,utc=True)) &
    (df_glider['maxTime (UTC)'] > pd.to_datetime(end_date,utc=True))
]

glider_day_outside.loc[:, 'maxTime (UTC)'] = pd.to_datetime(end_date, utc=True)
glider_day_outside.loc[:, 'minTime (UTC)'] = pd.to_datetime(start_date, utc=True)

# drop the ones from above as they will be duplicates in the next round of filtering
df_glider.drop(axis=0, index=glider_day_outside.index, inplace=True)

# Find gliders that start before 10/01 and end after 10/01
glider_day_lower = df_glider.loc[
    (df_glider['minTime (UTC)'] < pd.to_datetime(start_date,utc=True)) &
    (df_glider['maxTime (UTC)'] > pd.to_datetime(start_date,utc=True))
]

glider_day_lower.loc[:,'minTime (UTC)'] = pd.to_datetime(start_date, utc=True)

# Find gliders that start before 12/31 and end after 12/31.
glider_day_upper = df_glider.loc[
    (df_glider['minTime (UTC)']<pd.to_datetime(end_date,utc=True)) &
    (df_glider['maxTime (UTC)']>pd.to_datetime(end_date,utc=True))
]

glider_day_upper.loc[:,'maxTime (UTC)'] = pd.to_datetime(end_date, utc=True)

# Combine it all together into one DF.
glider_subset = pd.concat([glider_day_lower, 
                           glider_day_within, 
                           glider_day_upper, 
                           glider_day_outside], 
                          verify_integrity=True)

# Calculate the days between min time and max time.
glider_subset['glider_days'] = (glider_subset['maxTime (UTC)'] - glider_subset['minTime (UTC)']).dt.days

# Calculate total glider days.
glider_subset['glider_days'].sum()

print("Glider days between %s and %s: %s" % (start_date,end_date,glider_subset['glider_days'].sum()))

Glider days between 2021-10-01 and 2021-12-31: 3308


## National Platforms

### CO-OPS
* https://opendap.co-ops.nos.noaa.gov/stations/index.jsp
  * as xml: https://opendap.co-ops.nos.noaa.gov/stations/stationsXML.jsp
* https://tidesandcurrents.noaa.gov/cdata/StationList?type=Current+Data&filter=active

In [22]:
#from lxml import etree
import requests

xml = requests.get('https://opendap.co-ops.nos.noaa.gov/stations/stationsXML.jsp').text
import re
count = sum(1 for _ in re.finditer(r'\b%s\b' % re.escape("station name"), xml))
print("All stations:",count)

url = 'https://tidesandcurrents.noaa.gov/cdata/StationListFormat?type=Current+Data&filter=active&format=csv'

df_coops = pd.read_csv(url)
#print(df_coops[' Project'].unique())
ports = df_coops[df_coops[' Project'].astype(str).str.contains('PORTS')].shape[0]
print("Ports:", ports)

All stations: 379
Ports: 66


### NDBC
https://www.ndbc.noaa.gov/wstat.shtml	Buoys: 106 (103 base-funded); CMAN: 45

In [23]:
import requests
from bs4 import BeautifulSoup
import re
import pprint

url = 'https://www.ndbc.noaa.gov/wstat.shtml'

html = requests.get(url).text

soup = BeautifulSoup(html, 'html.parser')

string_to_find = ['Total Base Funded Buoys:','Total Other Buoys:',
                  'Total Moored Buoys:','Total Base Funded Stations:',
                  'Total Stations:']

ndbc = dict()
for string in string_to_find:
    for tag in soup.find_all("td", string=string):
        ndbc[string] = tag.next_sibling.string

pprint.pprint(ndbc)

{'Total Base Funded Buoys:': '103',
 'Total Base Funded Stations:': '45',
 'Total Moored Buoys:': '106',
 'Total Other Buoys:': '3',
 'Total Stations:': '45'}


### NERRS
https://nosc.noaa.gov/OSC/OSN/index.php	NERRS SWMP; Across 29 NERRS; Source = internal access only - NOAA Observing System Council.

http://cdmo.baruch.sc.edu/webservices.cfm <- need IP address approval

Need number of stations (120 last time)

In [24]:

import requests
from bs4 import BeautifulSoup
import re

url = 'https://coast.noaa.gov/nerrs/about/'

html = requests.get(url).text

soup = BeautifulSoup(html, 'html.parser')

string_to_find = ['The National Estuarine Research Reserve System is a network of ']

nerrs = dict()
for string in string_to_find:
  for tag in soup.find_all("meta", attrs={'content': re.compile(string)}, limit=1):
    res = [int(i) for i in tag['content'].split() if i.isdigit()] # extract number
    #print(tag['content'])
    nerrs = res[0]
    #print('%s = %s' % (string, tag.next_sibling.string))

print("NERRS reserves:",nerrs)


NERRS reserves: 29


### CBIBS
https://buoybay.noaa.gov/locations

[API docs](https://buoybay.noaa.gov/node/174)

Base URL: https://mw.buoybay.noaa.gov/api/v1

Testing Key: f159959c117f473477edbdf3245cc2a4831ac61f

Latest measurements:
https://mw.buoybay.noaa.gov/api/v1/json/station?key=f159959c117f473477edbdf3245cc2a4831ac61f

In [25]:
import json

base_url = 'https://mw.buoybay.noaa.gov/api/v1'
apikey = 'f159959c117f473477edbdf3245cc2a4831ac61f'
start = '2021-12-08T01:00:00z'
end = '2021-12-09T23:59:59z'
var = 'Position'

query_url = '{}/json/query?key={}&sd={}&ed={}&var={}'.format(base_url,apikey,start,end,var)
#query_url = '{}/json/station?key={}'.format(base_url, apikey)

json = json.loads(requests.get(query_url).text)

cbibs = len(json['stations'])

print("CBIBS Stations:",cbibs)

CBIBS Stations: 10


### OAP
https://cdip.ucsd.edu/m/stn_table/	Includes overlap with the RAs and other programs

19

See buoys and moorings at https://oceanacidification.noaa.gov/WhatWeDo/Data.aspx

pull kml from pmel

In [26]:
# import geopandas as gpd
# import fiona

# gpd.io.file.fiona.drvsupport.supported_drivers['KML'] = 'rw'

# kml = 'https://www.pmel.noaa.gov/co2/files/basekml.kml'

# df = gpd.read_file(kml, driver='KML')

# df['Name'].unique().size

In [27]:
import requests
from bs4 import BeautifulSoup
import re

url = 'https://oceanacidification.noaa.gov/WhatWeDo/Data.aspx'

#url = 'https://www.arcgis.com/apps/Embed/index.html?webmap=9512aae84cae409786339479e31b6c8a&amp;extent=-152.4023,-57.7072,146.7773,74.4006&amp;zoom=true&amp;scale=true&amp;disable_scroll=true&amp;theme=light'

html = requests.get(url).text

soup = BeautifulSoup(html, 'html.parser')

#string_to_find = ['The National Etuarine Research Reserve System is a network of ']
#soup.find_all(atts={'id':'mapDiv_gc'})
text = soup.find_all(attrs={'id':"dnn_ctr14711_ContentPane"})[0].find_all(attrs={'class':'lead'})[0].text #id="mapDiv")

res = [int(i) for i in text.split() if i.isdigit()] # extract number
    #print(tag['content'])
OAP = res[0]

print("OAP Stations:",OAP)

    #res = [int(i) for i in tag['content'].split() if i.isdigit()] # extract number
    #print(tag['content'])
    #nerrs = res[0]
    #print('%s = %s' % (string, tag.next_sibling.string))

#print("NERRS reserves:",nerrs)

OAP Stations: 19


### CDIP
https://cdip.ucsd.edu/m/stn_table/	Includes overlap with the RAs

67

https://cdip.ucsd.edu/themes/?d2=p1:m:mobile&regions=all&units=standard&zoom=auto&pub_set=public&tz=UTC&ll_fmt=dm&numcolorbands=10&palette=cdip_classic&high=6.096

In [28]:
import lxml
import pandas as pd

url = 'https://cdip.ucsd.edu/themes/?d2=p1:m:mobile&regions=all&units=standard&zoom=auto&pub_set=public&tz=UTC&ll_fmt=dm&numcolorbands=10&palette=cdip_classic&high=6.096'
table_list = pd.read_html(url, match='Stn')

df = table_list[0]

print("CDIP Stations:",df['Stn'].unique().size)

CDIP Stations: 66


## Regional Platforms

https://github.com/ioos/ioos-asset-inventory/tree/main/2020

http://erddap.ioos.us/erddap/tabledap/2020_asset_inventory.html <- raw data, need processed data

In [29]:
url = 'https://github.com/ioos/ioos-asset-inventory/raw/main/2020/processed_inventory.csv'
df_regional_platforms = pd.read_csv(url)

regional_platforms = df_regional_platforms['station_long_name'].unique().size

print('Regional platforms:',regional_platforms)

Regional platforms: 517


## ATN Deployments

See Deployments at https://portal.atn.ioos.us/#
Not sure if there is a way to scrape that page or get those values from somewhere

4242

In [30]:
# from bs4 import BeautifulSoup
# import requests

# headers = {'Accept-Encoding': 'identity'}

# soup = BeautifulSoup(requests.get('https://portal.atn.ioos.us', headers=headers).text, 'html.parser')

# print(soup.prettify())

atn_deployments = 4242

print("ATN Deployments:",atn_deployments)

ATN Deployments: 4242


## MBON Projects
https://marinebon.org/

https://github.com/marinebon/www_marinebon2/tree/master/content/project

currently funded projects
6

In [31]:
mbon_projects = 6

print("MBON Projects:",mbon_projects)

MBON Projects: 6


## OTT Projects

https://ioos.noaa.gov/project/ocean-technology-transition/ 8 live

## HAB Pilot Projects

https://cdn.ioos.noaa.gov/media/2021/10/NHABON-Funding-Awards-FY21_v2.pdf
8 total

## QARTOD Manuals

https://ioos.noaa.gov/project/qartod/ 14?

## IOOS Core Variables

https://www.iooc.us/task-teams/core-ioos-variables/

34

## Metadata Records

previously 13,907

https://data.ioos.us/

In [4]:
from owslib.csw import CatalogueServiceWeb

endpoint = "https://data.ioos.us/csw"

csw = CatalogueServiceWeb(endpoint, timeout=60)

def get_csw_records(csw, pagesize=10, maxrecords=10):
    """Iterate `maxrecords`/`pagesize` times until the requested value in
    `maxrecords` is reached.
    """
    from owslib.fes import SortBy, SortProperty

    # Iterate over sorted results.
    sortby = SortBy([SortProperty("dc:title", "ASC")])
    csw_records = {}
    startposition = 0
    nextrecord = getattr(csw, "results", 1)
    while nextrecord != 0:
        csw.getrecords2(
#            constraints=filter_list,
            startposition=startposition,
            maxrecords=pagesize,
            sortby=sortby,
        )
        csw_records.update(csw.records)
        if csw.results["nextrecord"] == 0:
            break
        startposition += pagesize + 1  # Last one is included.
        if startposition >= maxrecords:
            break
    csw.records.update(csw_records)
    
    
get_csw_records(csw, pagesize=10, maxrecords=1000000)

records = "\n".join(csw.records.keys())
print("Found {} records.\n".format(len(csw.records.keys())))
# for key, value in list(csw.records.items()):
#     print(u"[{}]\n{}\n".format(value.title, key))

Found 8457 records.



## IOOS

In [15]:
ioos = 1

print("IOOS:",ioos)

IOOS: 1
