### API Request to Load Data from the NWS Data Downloads 

Notebook contents 
* Creating network connection to NWS website to download historical site data

created by Cassie Lumbrazo\
last updated: Jan 2025\
run location: UAS linux\
python environment: **requests**

In [1]:
# import packages 
%matplotlib inline

# plotting packages 
import matplotlib as mpl
import matplotlib.pyplot as plt
import seaborn as sns 

sns.set_theme()
# plt.rcParams['figure.figsize'] = [12,6] #overriding size

# data packages 
import pandas as pd
import numpy as np
import xarray as xr
from datetime import datetime

import scipy
from matplotlib.patches import Patch
from matplotlib.colors import ListedColormap
from matplotlib import ticker

In [2]:
pwd

'/home/cassie/python/repos/juneau_data_analysis'

In [3]:
# # Paths for local machine
# ds = xr.open_dataset('C:\\Users\\Lumbr\\OneDrive - UW/Documents\\Washington\\EasternCascades\\Python\\CER_treatment_manuscript\\cer_treatment_region_dataset_all_variables.nc') 

Created a new environment called "requests" for the requests package, then proceed

In [18]:
import requests
import json

In [5]:
# Different dataset_id options, 
# LCD: Local Climatological Data for local observations
# GHCND: Global Historical Climatology Network - Daily
# GHCNh: Global Historical Climatology Network - Hourly
# ISD: Integrated Surface Dataset for global hourly data

In [6]:
# api_token = "your_api_token_here"
api_token = "jTMjURmuxbSzHpMdeKfqdrifCRPSRNIE" # created a personal token here: https://www.ncdc.noaa.gov/cdo-web/token

headers = {'token': api_token}

# Define station ID and time period
station_id = "PPSA2" # powder patch eaglerest station (cite Iowa Environmental Mesonet)
start_date = "2023-10-01"
end_date = "2023-10-15"
dataset = "GHCNh"  # For hourly data (Global Historical Climatology Network - Hourly)  (cite NCEI)

# Create the url for the API request
base_url = "https://www.ncdc.noaa.gov/cdo-web/api/v2/data"
params = {
    "datasetid": dataset,
    "stationid": station_id,
    "startdate": start_date,
    "enddate": end_date,
    "limit": 1000, # maximum records per request
    "units": "metric", # options: metric, standard or imperial
}

# Make the API request
response = requests.get(base_url, headers=headers, params=params)

# Check the response status code
if response.status_code == 200:
    try:
        # Parse the JSON data
        data = response.json()
        print(data)
    except ValueError as e:
        print(f"Error decoding JSON: {e}")
else:
    print(f"Request failed with status code {response.status_code}")
    print(f"Response content: {response.text}")

Request failed with status code 500
Response content: <?xml version="1.0" encoding="UTF-8" standalone="yes"?><response><statusCode>500</statusCode><userMessage>An unexpected error occured while servicing your request.</userMessage><developerMessage>An unexpected error occured while servicing your request.</developerMessage></response>


Try running for a region around Juneau, AK 

In [25]:
# Define the location (latitude and longitude of Juneau, AK)
latitude = 58.3019
longitude = -134.4197
radius = 50  # radius in kilometers from the lat/lon point to search the stations

# Define the base URL and parameters
base_url = "https://www.ncdc.noaa.gov/cdo-web/api/v2/stations"
params = {
    # "datasetid": "GHCNh",  # For hourly data (Global Historical Climatology Network - Hourly)
    "datasetid": "LCD", # For local observations (Local Climatological Data)
    "extent": f"{latitude-radius/111:.4f},{longitude-radius/111:.4f},{latitude+radius/111:.4f},{longitude+radius/111:.4f}",
    "limit": 1000,  # Maximum number of results
}

# Make the API request
response = requests.get(base_url, headers=headers, params=params)

# Check the response
if response.status_code == 200:
    stations = response.json()
    print(stations)
else:
    print(f"Request failed with status code {response.status_code}")
    print(f"Response content: {response.text}")

{'metadata': {'resultset': {'offset': 1, 'count': 1, 'limit': 1000}}, 'results': [{'elevation': 5.9, 'mindate': '1948-07-01', 'maxdate': '2025-01-17', 'latitude': 58.354, 'name': 'JUNEAU AIRPORT, AK US', 'datacoverage': 1, 'id': 'WBAN:25309', 'elevationUnit': 'METERS', 'longitude': -134.55606}]}


So, it looks like Juneau Airport (station_id: `WBAN:25309`) is the only station which we can grab with the archive using `LCD`. 
The request came up empty for `GHCNh`. If we expanded this to 100km, then we picked up further stations. 

This still did not resolve the issue of finding Powder Patch in the archive to request data from...

### Try downloading data from the Airport Station, 

In [27]:
# Define the station ID and dataset
station_id = "WBAN:25309" # Juneau Airport Station (WBAN:25309)
dataset_id = "LCD"

# Define the date range for data retrieval
start_date = "2023-01-01"  # YYYY-MM-DD
end_date = "2023-01-30"    

# Define the base URL and parameters
base_url = "https://www.ncdc.noaa.gov/cdo-web/api/v2/data"
params = {
    "datasetid": dataset_id,
    "stationid": station_id,
    "startdate": start_date,
    "enddate": end_date,
    "units": "metric",  # metric, standard, or imperial
    "limit": 1000,      # maximum number of records per request
}

# Make the API request
response = requests.get(base_url, headers=headers, params=params)

# RUN FOR SAVING TO A JSON FILE --------------------------------------
# Check the response
if response.status_code == 200:
    data = response.json()
    # Save the data to a file or process it
    with open("juneau_airport_data.json", "w") as file:
        import json
        json.dump(data, file, indent=4)
    print("Data successfully downloaded and saved to 'juneau_airport_data.json'")
else:
    print(f"Request failed with status code {response.status_code}")
    print(f"Response content: {response.text}")
    

# RUN FOR SAVING TO A NETCDF INSTEAD ---------------------------------
# # Check the response
# if response.status_code == 200:
#     data = response.json().get("results", [])
#     if not data:
#         print("No data found for the specified range.")
#     else:
#         # Convert the data to a Pandas DataFrame
#         df = pd.DataFrame(data)

#         # Convert the DataFrame to an xarray Dataset
#         ds = xr.Dataset.from_dataframe(df.set_index("date"))  # Index by the 'date' column

#         # Add metadata (optional)
#         ds.attrs["station_id"] = station_id
#         ds.attrs["source"] = "NOAA LCD Dataset"
#         ds.attrs["date_retrieved"] = pd.Timestamp.now().isoformat()

#         # Save as NetCDF
#         netcdf_filename = "juneau_airport_data.nc"
#         ds.to_netcdf(netcdf_filename)

#         print(f"Data successfully saved to '{netcdf_filename}'")
# else:
#     print(f"Request failed with status code {response.status_code}")
#     print(f"Response content: {response.text}")

Request failed with status code 500
Response content: <?xml version="1.0" encoding="UTF-8" standalone="yes"?><response><statusCode>500</statusCode><userMessage>An error occured while servicing your request.</userMessage><developerMessage>An error occured while servicing your request.</developerMessage></response>


In [None]:
ds = xr.open_dataset("juneau_airport_data.nc")
print(ds)

Now, try running for all of Alaska instead

In [8]:
# api_token = "your_api_token_here"
api_token = "jTMjURmuxbSzHpMdeKfqdrifCRPSRNIE" # created a personal token here: https://www.ncdc.noaa.gov/cdo-web/token

headers = {'token': api_token}

# Define station ID and time period
location_id = "FIPS:02" # Alaska state FIPS code
start_date = "2023-10-01"
end_date = "2023-10-15"
dataset = "GHCNh"  # For hourly data (Global Historical Climatology Network - Hourly)  (cite NCEI)

# Create the url for the API request
base_url = "https://www.ncdc.noaa.gov/cdo-web/api/v2/data"
params = {
    "datasetid": dataset,
    "locationid": location_id,
    "startdate": start_date,
    "enddate": end_date,
    "limit": 1000, # maximum records per request
    "units": "metric", # options: metric, standard or imperial
}

# Make the API request
response = requests.get(base_url, headers=headers, params=params)

# Check the response status code
if response.status_code == 200:
    try:
        # Parse the JSON data
        data = response.json()
        print(data)
    except ValueError as e:
        print(f"Error decoding JSON: {e}")
else:
    print(f"Request failed with status code {response.status_code}")
    print(f"Response content: {response.text}")

Request failed with status code 500
Response content: <?xml version="1.0" encoding="UTF-8" standalone="yes"?><response><statusCode>500</statusCode><userMessage>An unexpected error occured while servicing your request.</userMessage><developerMessage>An unexpected error occured while servicing your request.</developerMessage></response>


In [None]:
## previous more simple code.... commented out for now to create a more complex url 
# url = f"https://www.ncdc.noaa.gov/cdo-web/api/v2/data?station={station_id}&startDate={start_date}&endDate={end_date}&dataset={dataset}"

# response = requests.get(url, headers=headers)
# data = response.json()

# print(data)