In [None]:
import requests
from datetime import datetime, timedelta
import pandas as pd

In [None]:
def get_historical_water_temp_data_paginated(begin_date_str='20160101', end_date_str='20240101'):
    base_url = "https://api.tidesandcurrents.noaa.gov/api/prod/datagetter"
    begin_date = datetime.strptime(begin_date_str, '%Y%m%d')
    end_date = datetime.strptime(end_date_str, '%Y%m%d')
    
    # Initialize a variable to store all the data
    all_data = []
    
    while begin_date < end_date:
        # Calculate the next end date, but not beyond the original end date or 31 days
        next_end_date = min(begin_date + timedelta(days=30), end_date)
        
        # Format dates for the API request
        formatted_begin_date = begin_date.strftime('%Y%m%d')
        formatted_next_end_date = next_end_date.strftime('%Y%m%d')
        
        params = {
            'begin_date': formatted_begin_date,
            'end_date': formatted_next_end_date,
            'station': '8594900',
            'product': 'water_temperature',
            'datum': 'MLLW',
            'units': 'english',
            'time_zone': 'lst',
            'format': 'json',
            'application': 'your_app_name',
        }
        
        response = requests.get(base_url, params=params)
        if response.status_code == 200:
            # Append this chunk of data to all_data
            all_data.extend(response.json().get('data', []))
        else:
            print(f"Error fetching data for {formatted_begin_date} to {formatted_next_end_date}: {response.status_code}")
        
        # Move the begin_date to the day after next_end_date for the next iteration
        begin_date = next_end_date + timedelta(days=1)
    
    return all_data



In [None]:
df = get_historical_water_temp_data_paginated()
df['t'] = pd.to_datetime(df['t'])
hourly_records = df[df['t'].dt.minute == 0]
hourly_records.to_csv('water_temp_dc_2016_2024_hourly.csv')