In [1]:
import os
import requests
import csv
import re
import pandas as pd

# https://www.ndbc.noaa.gov/data/stations/

# function to download station table
def download_station_table(data_url, destination_name):
    station_table_url= os.path.join(data_url,  destination_name)
    response = requests.get(station_table_url)
    if response.status_code == 200:
        with open("../data/station_table.txt", "wb") as file:
            file.write(response.content)
        print(f"Downloaded data for station table")
    else:
        print(f"Failed to download data for station table")

In [2]:
with open("../data/station_table.txt", "r") as file:
    data = file.read()

In [3]:
pattern = r'([A-Za-z0-9]+)\|([^|]*)\|([^|]*)\|([^|]*)\|([^|]*)\|([^|]*)\|.*?(\d+\.\d+)\s*([NS])\s*(\d+\.\d+)\s*([EW]).*?\|([^|]*)\|([^|]*)\|([^|^\n]*)'

# Use regular expressions to find all matches
matches = re.findall(pattern, data)

# Create a list to store the data
data_list = []

# Convert the matches into a list of dictionaries
for station_id, owner, ttype, hull, name, payload, lat, lat_dir, lon, lon_dir, timezone, forecast, note in matches:
    latitude = float(lat) if lat_dir == 'N' else -float(lat)
    longitude = float(lon) if lon_dir == 'E' else -float(lon)
    data_list.append({
        'station_id': station_id,
        'owner': owner,
        'ttype': ttype,
        'hull': hull,
        'name': name,
        'payload': payload,
        'latitude': latitude,
        'longitude': longitude,
        'timezone': timezone,
        'forecast': forecast,
        'note': note
    })

# Create a Pandas DataFrame from the list of dictionaries
df = pd.DataFrame(data_list)

In [5]:
df.head(10)

Unnamed: 0,station_id,owner,ttype,hull,name,payload,latitude,longitude,timezone,forecast,note
0,0y2w3,CG,Weather Station,,"Sturgeon Bay CG Station, WI",,44.794,-87.313,C,,
1,13001,PR,Atlas Buoy,PM-595,NE Extension,,12.0,-23.0,,,
2,13002,PR,Atlas Buoy,,NE Extension,,21.0,-23.0,,,
3,13008,PR,Atlas Buoy,PM-531,Reggae,,15.0,-38.0,,,
4,13009,PR,Atlas Buoy,PM-533,Lambada,,8.0,-38.0,,,
5,13010,PR,Atlas Buoy,PM-590,Soul,,0.0,0.0,,,
6,14040,RM,Atlas Buoy,,,,-8.0,67.0,,,
7,14041,RM,Atlas Buoy,,,,-8.0,55.0,,,
8,14043,RM,Atlas Buoy,,,,-12.0,67.0,,,
9,14047,RM,Atlas Buoy,,,,-4.0,57.0,,,


In [10]:
df.to_parquet('../data/station_table.parquet')

In [8]:
parquet = pd.read_parquet('../data/station_table.parquet')

In [9]:
parquet.head(5)

Unnamed: 0,station_id,owner,ttype,hull,name,payload,latitude,longitude,timezone,forecast,note
0,0y2w3,CG,Weather Station,,"Sturgeon Bay CG Station, WI",,44.794,-87.313,C,,
1,13001,PR,Atlas Buoy,PM-595,NE Extension,,12.0,-23.0,,,
2,13002,PR,Atlas Buoy,,NE Extension,,21.0,-23.0,,,
3,13008,PR,Atlas Buoy,PM-531,Reggae,,15.0,-38.0,,,
4,13009,PR,Atlas Buoy,PM-533,Lambada,,8.0,-38.0,,,
