In [None]:
#Download all the csv files from the IABP interpolated data storage website and save them in the data/raw/buoydata folder for later use

import os
import requests
from bs4 import BeautifulSoup
from urllib.parse import urljoin

# URL of the webpage to scrape
url = 'https://iabp.apl.uw.edu/Data_Products/Daily_Interp/BuoyData_2024/'

# Directory to save the downloaded CSV files
output_dir = '../data/raw/buoydata/past'

# Create the directory if it does not exist
if not os.path.exists(output_dir):
    os.makedirs(output_dir)

# Send a GET request to the webpage
response = requests.get(url)
response.raise_for_status()  # Check if the request was successful

# Parse the webpage content
soup = BeautifulSoup(response.text, 'html.parser')

# Find all links ending with .csv
csv_links = soup.find_all('a', href=lambda href: href and href.endswith('.csv'))

# Download each CSV file
for link in csv_links:
    csv_url = urljoin(url, link['href'])
    csv_response = requests.get(csv_url)
    csv_response.raise_for_status()
    
    # Extract the filename from the URL
    filename = os.path.join(output_dir, os.path.basename(csv_url))
    
    # Save the CSV file
    with open(filename, 'wb') as file:
        file.write(csv_response.content)
    
    print(f'Downloaded {filename}')

print('All files downloaded successfully!')

In [None]:
#Download the last n days of buoy data for use in predictions with IDs of your choice

import os
import requests

# List of bouyID values
bids = [
    '300234011751690',
    #'300234011751691',
    #'300234011751692'
    # Add more bid values as needed
]

# Directory to save the downloaded CSV files
output_dir = '../data/raw/buoydata/current'

# Create the directory if it does not exist
os.makedirs(output_dir, exist_ok=True)

# Number of days to download data for
ndays = 10

# Base URL for the API
base_url = 'https://iabp.apl.uw.edu/download'

# Iterate over each bid value
for bid in bids:
    # Construct the URL for the current bid
    url = f'{base_url}?bid={bid}&ndays={ndays}'
    
    # Send a GET request to the URL
    response = requests.get(url)
    response.raise_for_status()  # Check if the request was successful
    
    # Construct the filename and save path
    filename = f'{bid}.csv'
    file_path = os.path.join(output_dir, filename)
    
    # Save the CSV file
    with open(file_path, 'wb') as file:
        file.write(response.content)
    
    print(f'Downloaded {filename} to {file_path}')

In [None]:
import ftplib
import os

# FTP server details
ftp_server = 'ftp.cdc.noaa.gov'
ftp_path = '/Datasets/ncep/'
filename = 'uwnd.2024.nc'

# Local directory to save the downloaded file
local_dir = '../data/raw/reanalyses/ncep'
os.makedirs(local_dir, exist_ok=True)

# Connect to the FTP server
ftp = ftplib.FTP(ftp_server)
ftp.login()

# Change to the specified directory
ftp.cwd(ftp_path)

# Download the file
local_filename = os.path.join(local_dir, filename)
with open(local_filename, 'wb') as file:
    ftp.retrbinary(f'RETR {filename}', file.write)

print(f'Downloaded {filename} to {local_filename}')

#Doing the same for the vwnd file

filename = 'vwnd.2024.nc'

# Local directory to save the downloaded file
local_dir = '../data/raw/reanalyses/ncep'
os.makedirs(local_dir, exist_ok=True)

# Connect to the FTP server
ftp = ftplib.FTP(ftp_server)
ftp.login()

# Change to the specified directory
ftp.cwd(ftp_path)

# Download the file
local_filename = os.path.join(local_dir, filename)
with open(local_filename, 'wb') as file:
    ftp.retrbinary(f'RETR {filename}', file.write)

print(f'Downloaded {filename} to {local_filename}')

# Close the FTP connection
ftp.quit()

In [None]:
#Download the most recent GFS forecast data from the NOAA FTP server

import ftplib
from datetime import datetime
import os
from ftplib import FTP

# FTP server details
ftp_server = 'ftp.ncep.noaa.gov'
ftp_path = '/pub/data/nccf/com/gfs/prod/'

# Connect to the FTP server
ftp = ftplib.FTP(ftp_server)
ftp.login()

# Change to the specified directory
ftp.cwd(ftp_path)

# List directories and their modification times
directories = []
ftp.retrlines('LIST', directories.append)

# Filter directories with "gfs" in their name and get their modification times
gfs_dirs = []
for entry in directories:
    parts = entry.split()
    name = parts[-1]
    if 'gfs' in name:
        # Parse the modification time
        mod_time_str = ' '.join(parts[-4:-1])
        mod_time = datetime.strptime(mod_time_str, '%b %d %H:%M')
        gfs_dirs.append((name, mod_time))

# Find the most recently edited directory
most_recent_dir = max(gfs_dirs, key=lambda x: x[1])

# Enter the most recently edited directory
ftp.cwd(most_recent_dir[0])
print(f"Entered directory: {most_recent_dir[0]}")

# List directories and their modification times in the current directory
subdirectories = []
ftp.retrlines('LIST', subdirectories.append)

# Filter directories and get their modification times
sub_dirs = []
for entry in subdirectories:
    parts = entry.split()
    name = parts[-1]
    if entry.startswith('d'):  # Check if it's a directory
        # Parse the modification time
        mod_time_str = ' '.join(parts[-4:-1])
        mod_time = datetime.strptime(mod_time_str, '%b %d %H:%M')
        sub_dirs.append((name, mod_time))

# Find the most recently edited subdirectory
most_recent_subdir = max(sub_dirs, key=lambda x: x[1])

# Enter the most recently edited subdirectory
ftp.cwd(most_recent_subdir[0])
print(f"Entered subdirectory: {most_recent_subdir[0]}")

# Enter the "atmos" folder
ftp.cwd('atmos')
print("Entered 'atmos' folder")

# List files in the "atmos" folder
files = []
ftp.retrlines('LIST', files.append)

# Filter .nc files and get their modification times
nc_files = []
for entry in files:
    parts = entry.split()
    name = parts[-1]
    if name.endswith('.nc'):
        # Parse the modification time
        mod_time_str = ' '.join(parts[-4:-1])
        mod_time = datetime.strptime(mod_time_str, '%b %d %H:%M')
        nc_files.append((name, mod_time))

# Find the most recently edited .nc file
most_recent_nc_file = max(nc_files, key=lambda x: x[1])

# Ensure the local directory exists
local_dir = '../data/raw/forecasts/gfs'
os.makedirs(local_dir, exist_ok=True)

# Download the most recently edited .nc file
local_filename = os.path.join(local_dir, most_recent_nc_file[0])
with open(local_filename, 'wb') as file:
    ftp.retrbinary(f'RETR {most_recent_nc_file[0]}', file.write)

print(f'Downloaded {most_recent_nc_file[0]} to {local_filename}')

# Close the FTP connection
ftp.quit()