In [30]:
import json
import time
import datetime
from urllib.request import urlopen
import pandas as pd
from io import StringIO

In [52]:
"""Build a station list by using a bunch of IEM networks."""
stations = pd.DataFrame(columns=[
    'station_id',
    'station_name',
    'state',
    'country',
    'latitude',
    'longitude',
    'elevation',
    'climate_site',
    'wfo',
    'tzname',
    'ncdc81',
    'ncei91',
    'ugc_county',
    'ugc_zone',
    'county',
    'data_availability_range'
])
states = """AK AL AR AZ CA CO CT DE FL GA HI IA ID IL IN KS KY LA MA MD ME
    MI MN MO MS MT NC ND NE NH NJ NM NV NY OH OK OR PA RI SC SD TN TX UT VA VT
    WA WI WV WY"""
networks = []
for state in states.split():
    networks.append(f"{state}_ASOS")

for network in networks:
    if network not in ['MN_ASOS','WI_ASOS']:
        continue
    # Get metadata
    uri = f"https://mesonet.agron.iastate.edu/geojson/network/{network}.geojson"
    data = urlopen(uri)
    jdict = json.load(data)
    for site in jdict["features"]:
        new_site = {
            'station_id': site['properties']['sid'],
            'station_name': site['properties']['sname'],
            'state': site['properties']['state'],
            'country': site['properties']['country'],
            'latitude': site['geometry']['coordinates'][1],
            'longitude': site['geometry']['coordinates'][0],
            'elevation': site['properties']['elevation'],
            'climate_site': site['properties']['climate_site'],
            'wfo': site['properties']['wfo'],
            'tzname': site['properties']['tzname'],
            'ncdc81': site['properties']['ncdc81'],
            'ncei91': site['properties']['ncei91'],
            'ugc_county': site['properties']['ugc_county'],
            'ugc_zone': site['properties']['ugc_zone'],
            'county': site['properties']['county'],
            'data_availability_range': site['properties']['time_domain']
        }
        stations = stations.append(new_site, ignore_index=True)


In [54]:
stations[stations.station_name.str.contains('LA CROSSE')]

Unnamed: 0,station_id,station_name,state,country,latitude,longitude,elevation,climate_site,wfo,tzname,ncdc81,ncei91,ugc_county,ugc_zone,county,data_availability_range
125,LSE,LA CROSSE,WI,US,43.88,-91.25,199.0,WI4370,ARX,America/Chicago,USW00014920,USW00014920,WIC063,WIZ041,La Crosse,(1948-Now)


In [20]:
MAX_ATTEMPTS = 6
SERVICE = "http://mesonet.agron.iastate.edu/cgi-bin/request/asos.py?"
def download_data(uri):
    """Fetch the data from the IEM
    The IEM download service has some protections in place to keep the number
    of inbound requests in check.  This function implements an exponential
    backoff to keep individual downloads from erroring.
    Args:
      uri (string): URL to fetch
    Returns:
      string data
    """
    attempt = 0
    while attempt < MAX_ATTEMPTS:
        try:
            data = urlopen(uri, timeout=300).read().decode("utf-8")
            if data is not None and not data.startswith("ERROR"):
                return data
        except Exception as exp:
            print("download_data(%s) failed with %s" % (uri, exp))
            time.sleep(5)
        attempt += 1

    print("Exhausted attempts to download, returning empty data")
    return ""

In [55]:
#### INPUTS ####
start = datetime.datetime(2022, 6, 13)
end = datetime.datetime(2022, 6, 15)
stations_to_pull = ['LSE']

# Define Time Interval (can only pull 24 hours at a time)
interval = datetime.timedelta(hours=24)

# Put the URI together to prep the data download
df = None

# Pull data for each station, 24 hours at a time
for station_to_pull in stations_to_pull:
    start_url = SERVICE + "datda=all&tz=Etc/UTC&format=comma&latlon=yes&"
    start_url += f'station={station_to_pull}&'
    now = start
    while now < end:
        now_url = start_url + start.strftime('year1=%Y&month1=%m&day1=%d&')
        now_url += (now + interval).strftime('year2=%Y&month2=%m&day2=%d&')
        print(f'Downloading: {station_to_pull} ({now} -> {(now+interval)})')
        data = download_data(now_url)
        str_io_data = StringIO(data[data.find('station'):])
        df_data = pd.read_csv(str_io_data)
        if df is None:
            df = pd.DataFrame(columns=df_data.columns)
        df = pd.concat([df, df_data], ignore_index=True)
        now += interval



Downloading: LSE (2022-06-13 00:00:00 -> 2022-06-14 00:00:00)
Downloading: LSE (2022-06-14 00:00:00 -> 2022-06-15 00:00:00)


In [57]:
df.to_csv('test.csv', index=False)