In [10]:
import numpy as np
import pandas as pd
import requests
import json
import dotenv
import os
from datetime import date
from pyncei import NCEIBot, NCEIResponse
import warnings
warnings.filterwarnings("ignore")


In [14]:
botname = 'weather-trends'
version = '0.0'
email = 'cfd6yn@virginia.edu'
useragent = f'{botname}/{version} ({email}) python-requests/{requests.__version__}'
headers = {'User-Agent': useragent}
headers

{'User-Agent': 'weather-trends/0.0 (cfd6yn@virginia.edu) python-requests/2.32.5'}

In [19]:
dotenv.load_dotenv()
NCDCkey = os.getenv('NCDCkey')

In [78]:
BASE_URL = "https://www.ncei.noaa.gov/access/services/data/v1"
stations = ["GHCND:USC00093992"]  # make sure these are strings
all_data = []

for year in range(2013, 2023):
    start = f"{year}-01-01"
    end = f"{year}-12-31"
    
    params = {
        "dataset": "daily-summaries",
        "stations": ",".join(stations),  # now valid
        "startDate": start,
        "endDate": end,
        "dataTypes": "TMAX,TMIN,PRCP",
        "format": "json",
        "units": "metric"
    }
    
    r = requests.get(BASE_URL, headers=headers, params=params)
    print(year, r.status_code)
    
    if r.status_code == 200 and r.text.strip():
        year_data = r.json()
        all_data.extend(year_data)
    else:
        print(f"No data for {year}")

df_weather = pd.json_normalize(all_data)
print(df_weather.columns)


2013 200
2014 200
2015 200
2016 200
2017 200
2018 200
2019 200
2020 200
2021 200
2022 200
RangeIndex(start=0, stop=0, step=1)


In [None]:
BASE_URL = "https://www.ncei.noaa.gov/access/services/data/v1/stations"

all_data = []

for year in range(2013, 2023):
    start = f"{year}-01-01"
    end = f"{year}-12-31"
    params = {
        "dataset": "daily-summaries",
        "stations": "station_id",
        "startDate": start,
        "endDate": end,
        "dataTypes": "TMAX,TMIN,PRCP",
        "format": "json",
        "units": "metric"
    }

r = requests.get(BASE_URL, headers=headers, params=params)
r.status_code

200

In [68]:
if r.status_code == 200 and r.text.strip():
    year_data = r.json()
    all_data.extend(year_data)
else:
    print(f"No data for {year}")
df_weather = pd.json_normalize(all_data)
df_weather.columns

RangeIndex(start=0, stop=0, step=1)

In [69]:
df_weather['DATE'] = pd.to_datetime(df_weather['DATE'])
df_weather['YearMonth'] = df_weather['DATE'].dt.to_period('M')

df_monthly = df_weather.groupby('YearMonth').agg({
    'TMAX': 'max',
    'TMIN': 'min',
    'PRCP': 'sum'
}).reset_index()

df_monthly['YearMonth'] = df_monthly['YearMonth'].dt.to_timestamp()
print(df_monthly.head())

KeyError: 'DATE'

### seperator

In [None]:
station_id = "GHCND:USC00085944"

In [13]:
ncei = NCEIBot(NCDCkey, cache_name='ncei_cache', useragent=useragent)

In [22]:
dataset_id = "GHCND"
datatype_ids = ['PRCP', 'TMAX', 'TMIN']
location_id = "FIPS:51"
start_year = 2013
end_year = 2023
years = range(start_year, end_year + 1)
n_stations = 10

In [5]:
params = {'format': 'json',
          'api_key': NCDCkey}