In [1]:
import os
import requests
import csv
import re
import pandas as pd

# https://www.ndbc.noaa.gov/data/stations/

# function to download station table
def download_station_table(data_url, destination_name):
    station_table_url= os.path.join(data_url,  destination_name)
    response = requests.get(station_table_url)
    if response.status_code == 200:
        with open("../data/station_table.txt", "wb") as file:
            file.write(response.content)
        print(f"Downloaded data for station table")
    else:
        print(f"Failed to download data for station table")

In [2]:
with open("../data/station_table.txt", "r") as file:
    data = file.read()

In [3]:
pattern = r'([A-Za-z0-9]+)\|([^|]*)\|([^|]*)\|([^|]*)\|([^|]*)\|([^|]*)\|.*?(\d+\.\d+)\s*([NS])\s*(\d+\.\d+)\s*([EW]).*?\|([^|]*)\|([^|]*)\|([^|^\n]*)'

# Use regular expressions to find all matches
matches = re.findall(pattern, data)

# Create a list to store the data
data_list = []

# Convert the matches into a list of dictionaries
for station_id, owner, ttype, hull, name, payload, lat, lat_dir, lon, lon_dir, timezone, forecast, note in matches:
    latitude = float(lat) if lat_dir == 'N' else -float(lat)
    longitude = float(lon) if lon_dir == 'E' else -float(lon)
    data_list.append({
        'station_id': station_id,
        'owner': owner,
        'ttype': ttype,
        'hull': hull,
        'name': name,
        'payload': payload,
        'latitude': latitude,
        'longitude': longitude,
        'timezone': timezone,
        'forecast': forecast,
        'note': note
    })

# Create a Pandas DataFrame from the list of dictionaries
df = pd.DataFrame(data_list)

df.head(10)

In [10]:
df.to_parquet('../data/station_table.parquet')

In [8]:
parquet = pd.read_parquet('../data/station_table.parquet')

In [9]:
parquet.head(5)

Unnamed: 0,station_id,owner,ttype,hull,name,payload,latitude,longitude,timezone,forecast,note
0,0y2w3,CG,Weather Station,,"Sturgeon Bay CG Station, WI",,44.794,-87.313,C,,
1,13001,PR,Atlas Buoy,PM-595,NE Extension,,12.0,-23.0,,,
2,13002,PR,Atlas Buoy,,NE Extension,,21.0,-23.0,,,
3,13008,PR,Atlas Buoy,PM-531,Reggae,,15.0,-38.0,,,
4,13009,PR,Atlas Buoy,PM-533,Lambada,,8.0,-38.0,,,


# Florida: look at buoys 20.000 < lat < 32.336, -87.500 < lon < -66.000

In [22]:
df[(df['latitude'] < 32.336) & (df['latitude'] > 20.000) & (df['longitude'] > -87.500) & (df['longitude'] < -66.000)]

Unnamed: 0,station_id,owner,ttype,hull,name,payload,latitude,longitude,timezone,forecast,note
115,41002,N,3-meter foam buoy,3D66,SOUTH HATTERAS - 225 NM South of Cape Hatteras,SCOOP payload,31.759,-74.936,E,FZNT22.KWBC,Right whales are active off SC from November t...
116,41003,N,6-meter NOMAD buoy,,,MVXII payload,30.400,-80.100,E,,(formerly eb05)
118,41005,N,6-meter NOMAD buoy,,,GSBP payload,31.700,-79.700,E,,
119,41006,N,6-meter NOMAD buoy,,,GSBP payload,29.300,-77.400,E,,
121,41008,N,3-meter discus buoy,3D36,"GRAYS REEF - 40 NM Southeast of Savannah, GA",SCOOP payload,31.400,-80.866,E,FZUS52.KCHS,"<p>This buoy is located in <a href=""http://www..."
...,...,...,...,...,...,...,...,...,...,...,...
1777,venf1,N,C-MAN Station,,"Venice, FL",ARES payload,27.072,-82.453,E,FZUS52.KTBW,
1805,wiwf1,NP,Water Quality Station,,"Willy Willy, FL",,25.587,-81.044,E,,Water levels are referenced to local datums.&n...
1812,wplf1,NP,Water Quality Station,,"Watson Place, FL",,25.710,-81.249,E,FZUS52.KMFL,Water levels are referenced to local datums.&n...
1815,wrbf1,NP,Water Quality Station,,"Whipray Basin, FL",,25.072,-80.735,E,FZUS52.KKEY,Water levels are referenced to local datums.&n...
