In [55]:
import requests
import json
import glob
import datetime
from io import StringIO
import numpy as np
import pandas as pd
from pandas.io.json import json_normalize
import matplotlib.pyplot as plt
import seaborn as sns

%matplotlib inline
sns.set_style('ticks')

In [56]:
# ids (states) for sensor URL parameters
states = 'AK AL AR AZ CA CO CT DE FL GA HI IA ID IL IN KS KY LA MA MD ME \
 MI MN MO MS MT NC ND NE NH NJ NM NV NY OH OK OR PA RI SC SD TN TX UT VA VT \
 WA WI WV WY'.split()

In [57]:
# create network ids from list of state ids
networks = ['AWOS'] + ['{}_ASOS'.format(state) for state in states]

In [65]:
def get_weather(stations, start=pd.Timestamp('2014-01-01'), end=pd.Timestamp('2014-01-31')):
    '''
        fetch weather from MESOnet between start and end dates
    '''
    
    url = ("http://mesonet.agron.iastate.edu/cgi-bin/request/asos.py?"
           "&data=tmpf&data=relh&data=sped&data=mslp&data=p01i&data=vsby&data=gust_mph&data=skyc1&data=skyc2&data=skyc3"
           "&tz=Etc/UTC&format=comma&latlon=no"
           "&{start:year1=%Y&month1=%m&day1=%d}"
           "&{end:year2=%Y&month2=%m&day2=%d}&{stations}")

    stations = '&'.join('station={}'.format(s) for s in stations)

    weather = (pd.read_csv(url.format(start=start, end=end, stations=stations),
                           comment='#')
                 .rename(columns={'valid': 'date'})
                 .rename(columns=str.strip)
                 .assign(date=lambda df: pd.to_datetime(df['date']))
                 .set_index(['station', 'date'])
                 .sort_index())
    
    float_cols = ['tmpf', 'relh', 'sped', 'mslp', 'p01i', 'vsby', "gust_mph"]
    weather[float_cols] = weather[float_cols].apply(pd.to_numeric, errors="coerce")
    
    return weather

In [66]:
def get_ids(network):
    '''
        get sensor id from network
    '''
    
    url = 'http://mesonet.agron.iastate.edu/geojson/network.php?network={}'
    
    r = requests.get(url.format(network))
    
    md = json_normalize(r.json()['features'])
    
    md['network'] = network
    
    return md

In [60]:
# get sensor ids
ids = pd.concat([get_ids(network) for network in networks], ignore_index=True)

In [61]:
# group ids by network
id_groups = ids.groupby('network')

In [67]:
# create csv files for weather data
import os

os.makedirs("weather", exist_ok=True)

for i, (k, v) in enumerate(id_groups):
    print("{}/{}".format(i, len(networks)), end='\r')
    weather = get_weather(v['id'])
    weather.to_csv("weather/{}.csv".format(k))

0/51

  if self.run_code(code, result):


23/51

  if self.run_code(code, result):


50/51

In [69]:
weather = pd.concat([
    pd.read_csv(f, parse_dates=['date'], index_col=['station', 'date'])
    for f in glob.glob('weather/*.csv')])

weather.to_hdf("weather.h5", "weather")

In [None]:
!ls weather/
