In [1]:
import pandas as pd
import requests
from bs4 import BeautifulSoup
import time

In [8]:
def update_station_urls():
    # URL of the webpage
    url = 'https://www.ndbc.noaa.gov/to_station.shtml'
    
    # Send a request to the webpage
    response = requests.get(url)
    response.raise_for_status()  # Check that the request was successful
    
    # Parse the webpage content
    soup = BeautifulSoup(response.text, 'html.parser')
    
    # Find all station ID links
    station_links = soup.find_all('a', href=True)
    
    # Extract station IDs from the links
    station_ids = [link['href'].split('=')[-1] for link in station_links if 'station=' in link['href']]
    
        
    df = pd.DataFrame(station_ids,columns=['stations'])
    
    df.to_csv(r'./buoy_stations.csv')

    return

In [5]:
def get_stations_list():
    stations = pd.read_csv(r'./buoy_stations.csv')
    stations = stations.drop('Unnamed: 0', axis = 1)
    return stations

In [7]:
def download_buoy_data(stations):
    
    timestr = time.strftime("%Y%m%d-%H%M%S")

    errs = []
    buoy_err=[]
    suffix = ".spec" # the file suffix can determine the table downloaded (.spec is wave data)
    
    
    for buoy_id in stations['stations'].values:
        
        # URL of the webpage containing the table
        url = "https://www.ndbc.noaa.gov/data/realtime2/"+buoy_id+suffix 
        
        try:
            # Read the table data into a pandas DataFrame
            df = pd.read_csv(url, delim_whitespace=True)
            
            # Display the DataFrame
            print(df)
            df = df.iloc[1:]
            
            # Save the DataFrame to a CSV file (optional)
            df.to_csv("buoy_data/buoy_"+buoy_id+timestr+".csv", index=False)
        except Exception as e:
            buoy_err.append(buoy_id)
            errs.append(e)

    
    errors = pd.DataFrame({'buoy_id':buoy_err,'error':errs})
    
    print("Complete")
    print("Errors: " + str(len(errs)))
    
    return errors
    

In [6]:
## execute block

update_station_urls()

stations = get_stations_list()

e = download_buoy_data(stations)
e

1801583
1801589
21413
21414
21415
21416
21417
21418
21419
21420
32301
32302
32411
32412
32413
32D12
32D13
41001
41002
41003
41004
41005
41006
41007
41008
41009
41010
41011
41012
41013
41015
41016
41017
41018
41021
41022
41023
41025
41035
41036
41040
41041
41043
41044
41046
41047
41048
41049
41420
41421
41424
41425
41A46
41B41
41D20
41S43
41S46
42001
42002
42003
42004
42005
42006
42007
42008
42009
42010
42011
42012
42015
42016
42017
42018
42019
42020
42025
42035
42036
42037
42038
42039
42040
42041
42042
42053
42054
42055
42056
42057
42058
42059
42060
42065
42080
42407
42408
42409
42429
42501
42503
42534
43412
43413
44001
44003
44004
44005
44006
44007
44008
44009
44010
44011
44012
44013
44014
44015
44017
44018
44019
44020
44023
44025
44026
44027
44028
44065
44066
44070
44071
44401
44402
44403
44585
45001
45002
45003
45004
45005
45006
45007
45008
45009
45010
45011
45012
46001
46002
46003
46005
46006
46007
46008
46009
46010
46011
46012
46013
46014
46015
46016
46017
46018
46019
46020
46021


NameError: name 'download_buoy_data' is not defined