In [None]:
pip install requests pandas




In [None]:
import requests

# Function to check if a station supports the requested product
def check_station_products(station):
    url = f"https://api.tidesandcurrents.noaa.gov/mdapi/prod/webapi/stations/{station}.json"
    response = requests.get(url)

    if response.status_code == 200:
        station_data = response.json()
        available_products = [prod['id'] for prod in station_data['station']['products']]
        print(f"Available products for station {station}: {available_products}")
        return available_products
    else:
        print(f"Failed to retrieve metadata for station {station}: {response.status_code}")
        return []

In [None]:
import requests
import pandas as pd
from datetime import datetime

# Function to check if a station supports the requested product
def check_station_products(station):
    url = f"https://api.tidesandcurrents.noaa.gov/mdapi/prod/webapi/stations/{station}.json"
    response = requests.get(url)

    if response.status_code == 200:
        station_data = response.json()
        print(f"Response for station {station}: {station_data}")  # Debugging output

        if 'stations' in station_data and isinstance(station_data['stations'], list) and len(station_data['stations']) > 0:
            products_url = station_data['stations'][0]['products']['self']
            # Fetch the products data
            products_response = requests.get(products_url)
            if products_response.status_code == 200:
                products_data = products_response.json()
                print(f"Products data for station {station}: {products_data}")  # Debugging output

                if 'products' in products_data:
                    available_products = [prod.get('id') for prod in products_data['products'] if 'id' in prod]
                    print(f"Available products for station {station}: {available_products}")
                    return available_products
                else:
                    print(f"No products key found in products_data for station {station}.")
                    return []
            else:
                print(f"Failed to retrieve products for station {station}: {products_response.status_code}")
                return []
        else:
            print(f"No valid station data found for station {station}: {station_data}")
            return []
    else:
        print(f"Failed to retrieve metadata for station {station}: {response.status_code}")
        print(f"Response: {response.text}")
        return []

# Function to fetch data for a given station and product
def fetch_noaa_data(station, product, start_date, end_date):
    url = "https://api.tidesandcurrents.noaa.gov/api/prod/datagetter"
    params = {
        'station': station,
        'product': product,
        'datum': 'MLLW',  # For water level
        'begin_date': start_date.strftime('%Y%m%d'),
        'end_date': end_date.strftime('%Y%m%d'),
        'units': 'metric',
        'time_zone': 'gmt',
        'format': 'json'
    }

    # Print request parameters for debugging
    print(f"Fetching data with parameters: {params}")

    response = requests.get(url, params=params)

    if response.status_code == 200:
        data = response.json().get('data', [])
        print(f"Data retrieved for {product} from {station}: {data}")  # Print fetched data for debugging
        if data:  # Check if data is not empty
            return pd.DataFrame(data)
        else:
            print(f"No data returned for {product} from {station} for the period {start_date} to {end_date}.")
            return None
    else:
        print(f"Failed to fetch {product} data from {station}: {response.status_code}")
        print(f"Error details: {response.text}")
        return None

# Function to fetch data only if the product is available at the station
def fetch_noaa_data_with_check(station, product, start_date, end_date):
    available_products = check_station_products(station)

    if product not in available_products:
        print(f"{product} not available for station {station}. Skipping.")
        return None

    return fetch_noaa_data(station, product, start_date, end_date)

# Define stations and date range
stations = ['9414290', '8724580', '8410140', '9432780', '8518750']  # Example stations
products = ['water_level', 'air_temperature', 'wind']  # Ensure these match the product IDs
start_year = 2015
end_year = 2020

# Initialize an empty dataframe to store all the data
all_data = pd.DataFrame()

# Loop through each station individually
for station in stations:
    for year in range(start_year, end_year + 1):
        start_date = datetime(year, 1, 1)
        end_date = datetime(year, 12, 31)

        # Fetch and merge data for each product
        for product in products:
            data_df = fetch_noaa_data_with_check(station, product, start_date, end_date)

            # If data was returned, append it to the overall dataset
            if data_df is not None:
                all_data = pd.concat([all_data, data_df], ignore_index=True)

# Check the number of instances
print(f"Total data points collected: {len(all_data)}")
if not all_data.empty:
    print(all_data.head())
else:
    print("No data points collected.")

# Save the data to a CSV file for future use
all_data.to_csv('sea_level_dataset.csv', index=False)


Response for station 9414290: {'count': 1, 'units': None, 'stations': [{'tidal': True, 'greatlakes': False, 'shefcode': 'FTPC1', 'details': {'self': 'https://api.tidesandcurrents.noaa.gov/mdapi/prod/webapi/stations/9414290/details.json'}, 'sensors': {'self': 'https://api.tidesandcurrents.noaa.gov/mdapi/prod/webapi/stations/9414290/sensors.json'}, 'floodlevels': {'self': 'https://api.tidesandcurrents.noaa.gov/mdapi/prod/webapi/stations/9414290/floodlevels.json'}, 'datums': {'self': 'https://api.tidesandcurrents.noaa.gov/mdapi/prod/webapi/stations/9414290/datums.json'}, 'supersededdatums': {'self': 'https://api.tidesandcurrents.noaa.gov/mdapi/prod/webapi/stations/9414290/supersededdatums.json'}, 'harmonicConstituents': {'self': 'https://api.tidesandcurrents.noaa.gov/mdapi/prod/webapi/stations/9414290/harcon.json'}, 'benchmarks': {'self': 'https://api.tidesandcurrents.noaa.gov/mdapi/prod/webapi/stations/9414290/benchmarks.json'}, 'tidePredOffsets': {'self': 'https://api.tidesandcurrents.n

In [None]:
import requests

# Function to check if a station supports the requested product
def check_station_products(station):
    url = f"https://api.tidesandcurrents.noaa.gov/mdapi/prod/webapi/stations/{station}.json"
    response = requests.get(url)

    if response.status_code == 200:
        station_data = response.json()
        if 'stations' in station_data and isinstance(station_data['stations'], list) and len(station_data['stations']) > 0:
            products_url = station_data['stations'][0]['products']['self']
            # Fetch the products data
            products_response = requests.get(products_url)
            if products_response.status_code == 200:
                products_data = products_response.json()

                if 'products' in products_data:
                    available_products = [prod.get('id') for prod in products_data['products'] if 'id' in prod]
                    print(f"Station {station} has the following products: {available_products}")
                    return available_products
                else:
                    print(f"No products key found in products_data for station {station}.")
                    return []
            else:
                print(f"Failed to retrieve products for station {station}: {products_response.status_code}")
                return []
        else:
            print(f"No valid station data found for station {station}: {station_data}")
            return []
    else:
        print(f"Failed to retrieve metadata for station {station}: {response.status_code}")
        print(f"Response: {response.text}")
        return []

# Define stations to check
stations = ['9414290', '8724580', '8410140', '9432780', '8518750']

# Check each station
for station in stations:
    check_station_products(station)


Station 9414290 has the following products: []
Station 8724580 has the following products: []
Station 8410140 has the following products: []
Station 9432780 has the following products: []
Station 8518750 has the following products: []


In [None]:
import requests

# Function to list available stations and their products
def list_available_stations():
    url = "https://api.tidesandcurrents.noaa.gov/mdapi/prod/webapi/stations.json"
    response = requests.get(url)

    if response.status_code == 200:
        station_data = response.json()
        if 'stations' in station_data:
            for station in station_data['stations']:
                station_id = station['id']
                products_url = station['products']['self']

                products_response = requests.get(products_url)
                if products_response.status_code == 200:
                    products_data = products_response.json()
                    print(f"Products data for station {station_id}: {products_data}")  # Debugging output

                    # Check if 'products' is present and has valid entries
                    if 'products' in products_data and isinstance(products_data['products'], list):
                        available_products = [
                            {'name': prod['name'], 'value': prod['value']} for prod in products_data['products']
                        ]
                        if available_products:
                            print(f"Station {station_id} has the following products:")
                            for prod in available_products:
                                print(f" - {prod['name']}: {prod['value']}")
                        else:
                            print(f"No available products for station {station_id}.")
                    else:
                        print(f"No 'products' key found or it's not a list for station {station_id}.")
                else:
                    print(f"Failed to retrieve products for station {station_id}: {products_response.status_code}")
    else:
        print(f"Failed to retrieve station data: {response.status_code}")

# Call the function to list stations and their products
list_available_stations()


Products data for station 1611400: {'products': [{'name': 'Water Levels', 'value': 'https://tidesandcurrents.noaa.gov/waterlevels.html?id=1611400'}, {'name': 'Reports', 'value': 'https://tidesandcurrents.noaa.gov/reports.html?id=1611400'}, {'name': 'Tide Predictions', 'value': 'https://tidesandcurrents.noaa.gov/noaatidepredictions.html?id=1611400'}, {'name': 'Meteorological', 'value': 'https://tidesandcurrents.noaa.gov/met.html?id=1611400'}, {'name': 'Benchmarks', 'value': 'https://tidesandcurrents.noaa.gov/benchmarks.html?id=1611400'}, {'name': 'Datums', 'value': 'https://tidesandcurrents.noaa.gov/datums.html?id=1611400'}, {'name': 'Harmonic', 'value': 'https://tidesandcurrents.noaa.gov/harcon.html?id=1611400'}, {'name': 'Sea Level Trends', 'value': 'https://tidesandcurrents.noaa.gov/sltrends/sltrends_station.shtml?id=1611400'}, {'name': 'Extreme Water Levels', 'value': 'https://tidesandcurrents.noaa.gov/est/est_station.shtml?stnid=1611400'}], 'self': 'https://api.tidesandcurrents.noa

In [None]:
import requests
import pandas as pd

# Function to list available stations and their products
def list_available_stations():
    url = "https://api.tidesandcurrents.noaa.gov/mdapi/prod/webapi/stations.json"
    response = requests.get(url)

    if response.status_code == 200:
        station_data = response.json()
        if 'stations' in station_data:
            # Initialize a list to store the data for each station
            all_station_data = []

            for station in station_data['stations']:
                station_id = station['id']
                products_url = station['products']['self']

                products_response = requests.get(products_url)
                if products_response.status_code == 200:
                    products_data = products_response.json()

                    # Check if 'products' is present and has valid entries
                    if 'products' in products_data and isinstance(products_data['products'], list):
                        available_products = [
                            {'name': prod['name'], 'value': prod['value']} for prod in products_data['products']
                        ]

                        # Append data to the all_station_data list
                        all_station_data.append({
                            'station_id': station_id,
                            'products': available_products
                        })

                    else:
                        print(f"No 'products' key found or it's not a list for station {station_id}.")
                else:
                    print(f"Failed to retrieve products for station {station_id}: {products_response.status_code}")

            # Save the collected data to a DataFrame
            df = pd.DataFrame(all_station_data)

            # Expand the products into separate columns
            product_columns = pd.json_normalize(df['products']).columns
            for col in product_columns:
                df[col] = df['products'].apply(lambda x: [d['value'] for d in x if d['name'] == col][0] if any(d['name'] == col for d in x) else None)

            # Drop the original 'products' column
            df.drop(columns=['products'], inplace=True)

            # Save the DataFrame to a CSV file
            df.to_csv('sea_level_station_products.csv', index=False)

            print("Data has been collected and saved to 'sea_level_station_products.csv'.")
        else:
            print("No 'stations' key found in the response.")
    else:
        print(f"Failed to retrieve station data: {response.status_code}")

# Call the function to list stations and their products
list_available_stations()


Data has been collected and saved to 'sea_level_station_products.csv'.


In [None]:
import requests
import pandas as pd
from datetime import datetime

# Function to check if a station supports the requested product
def check_station_products(station):
    url = f"https://api.tidesandcurrents.noaa.gov/mdapi/prod/webapi/stations/{station}.json"
    response = requests.get(url)

    if response.status_code == 200:
        station_data = response.json()
        print(f"Response for station {station}: {station_data}")  # Debugging output

        if 'stations' in station_data and isinstance(station_data['stations'], list) and len(station_data['stations']) > 0:
            products_url = station_data['stations'][0]['products']['self']
            # Fetch the products data
            products_response = requests.get(products_url)
            if products_response.status_code == 200:
                products_data = products_response.json()
                print(f"Products data for station {station}: {products_data}")  # Debugging output

                if 'products' in products_data:
                    available_products = [prod.get('id') for prod in products_data['products'] if 'id' in prod]
                    print(f"Available products for station {station}: {available_products}")
                    return available_products
                else:
                    print(f"No products key found in products_data for station {station}.")
                    return []
            else:
                print(f"Failed to retrieve products for station {station}: {products_response.status_code}")
                return []
        else:
            print(f"No valid station data found for station {station}: {station_data}")
            return []
    else:
        print(f"Failed to retrieve metadata for station {station}: {response.status_code}")
        print(f"Response: {response.text}")
        return []

# Function to fetch data for a given station and product
def fetch_noaa_data(station, product, start_date, end_date):
    url = "https://api.tidesandcurrents.noaa.gov/api/prod/datagetter"
    params = {
        'station': station,
        'product': product,
        'datum': 'MLLW',  # For water level
        'begin_date': start_date.strftime('%Y%m%d'),
        'end_date': end_date.strftime('%Y%m%d'),
        'units': 'metric',
        'time_zone': 'gmt',
        'format': 'json'
    }

    # Print request parameters for debugging
    print(f"Fetching data with parameters: {params}")

    response = requests.get(url, params=params)

    if response.status_code == 200:
        data = response.json().get('data', [])
        print(f"Data retrieved for {product} from {station}: {data}")  # Print fetched data for debugging
        if data:  # Check if data is not empty
            return pd.DataFrame(data)
        else:
            print(f"No data returned for {product} from {station} for the period {start_date} to {end_date}.")
            return None
    else:
        print(f"Failed to fetch {product} data from {station}: {response.status_code}")
        print(f"Error details: {response.text}")
        return None

# Function to fetch data only if the product is available at the station
def fetch_noaa_data_with_check(station, product, start_date, end_date):
    available_products = check_station_products(station)

    if product not in available_products:
        print(f"{product} not available for station {station}. Skipping.")
        return None

    return fetch_noaa_data(station, product, start_date, end_date)

# Load the dataset containing station IDs
station_ids = pd.read_csv('/content/sea_level_station_products.csv')['station_id'].tolist()  # Assuming the column name is 'station_id'

# Define products and date range
products = ['water_level', 'air_temperature', 'wind']  # Ensure these match the product IDs
start_year = 2015
end_year = 2020

# Initialize an empty dataframe to store all the data
all_data = pd.DataFrame()

# Loop through each station individually
for station in station_ids:
    for year in range(start_year, end_year + 1):
        start_date = datetime(year, 1, 1)
        end_date = datetime(year, 12, 31)

        # Fetch and merge data for each product
        for product in products:
            data_df = fetch_noaa_data_with_check(station, product, start_date, end_date)

            # If data was returned, append it to the overall dataset
            if data_df is not None:
                # Add station ID and year as new columns
                data_df['station_id'] = station
                data_df['year'] = year
                all_data = pd.concat([all_data, data_df], ignore_index=True)

# Check the number of instances
print(f"Total data points collected: {len(all_data)}")
if not all_data.empty:
    print(all_data.head())
else:
    print("No data points collected.")

# Save the data to a CSV file for future use
all_data.to_csv('sea_level_dataset.csv', index=False)


[1;30;43mStreaming output truncated to the last 5000 lines.[0m
Available products for station 9416841: []
air_temperature not available for station 9416841. Skipping.
Response for station 9416841: {'count': 1, 'units': None, 'stations': [{'tidal': True, 'greatlakes': False, 'shefcode': 'ANVC1', 'details': {'self': 'https://api.tidesandcurrents.noaa.gov/mdapi/prod/webapi/stations/9416841/details.json'}, 'sensors': {'self': 'https://api.tidesandcurrents.noaa.gov/mdapi/prod/webapi/stations/9416841/sensors.json'}, 'floodlevels': {'self': 'https://api.tidesandcurrents.noaa.gov/mdapi/prod/webapi/stations/9416841/floodlevels.json'}, 'datums': {'self': 'https://api.tidesandcurrents.noaa.gov/mdapi/prod/webapi/stations/9416841/datums.json'}, 'supersededdatums': {'self': 'https://api.tidesandcurrents.noaa.gov/mdapi/prod/webapi/stations/9416841/supersededdatums.json'}, 'harmonicConstituents': {'self': 'https://api.tidesandcurrents.noaa.gov/mdapi/prod/webapi/stations/9416841/harcon.json'}, 'bench