<a href="https://colab.research.google.com/github/atharvas/utils/blob/master/mesonet_pulldata.ipynb" target="_parent"><img src="https://colab.research.google.com/assets/colab-badge.svg" alt="Open In Colab"/></a>

# Mesonet Data Pull

A simple python script to pull data from https://mesonet.agron.iastate.edu/request/download.phtml

skip to **Construct-Request** to actually make a request. 

## Imports

In [0]:
from bs4 import BeautifulSoup
import pandas as pd
import numpy as np
import requests
import datetime
import time
import random
from io import StringIO


## Helper Functions

In [0]:
def get_network_list():
  '''
  get all the US ASOS networks in the database. these will later be used to get stations in each network.
  
  returns:
  network_list : a list of all found networks.
  '''
  us_states = pd.read_csv("https://raw.githubusercontent.com/jasonong/List-of-US-States/master/states.csv")['State'].tolist()
  url = "https://mesonet.agron.iastate.edu/request/download.phtml"
  source = requests.get(url).text
  soup = BeautifulSoup(source, 'lxml')

  # <select class="iemselect2" name="network">
  station_list = soup.find("select", attrs={"name":"network"}).find_all("option")

  network_list = list()

  for station in station_list:
    code = station['value']
    if (station.text[:-5] in us_states):
      network_list.append(station['value'])
  return network_list
    
get_network_list()[0:2]

['AL_ASOS', 'AK_ASOS']

In [0]:
def get_stations(network_list):
  '''
  get all the stations in the network list
  inputs:
  station_list: list of ASOS stations. output from get_network_list()
  returns:
  stationDF : a pandas df with info about all the stations in the network list
  '''
  station_list = list()
  sleep_list = [0.5, 1, 1.5, 2, 2.5, 5, 7]

  for network in network_list:
    time.sleep(random.choice(sleep_list)) # wait some seconds before making each request to prevent DDOSing their site.
    url = "https://mesonet.agron.iastate.edu/sites/networks.php?network=" + str(network) + "&format=csv"
    source = requests.get(url).text
    soup = BeautifulSoup(source, 'lxml')
    stations = pd.read_csv(StringIO(soup.find("form", attrs={"name":"networkSelect"}).find("pre").text[1:]))
    station_list.append(stations)
  stationDF = pd.concat(station_list)
  return stationDF

stationDF = get_stations(get_network_list()[0:5])
stationDF.head()

Unnamed: 0,stid,station_name,lat,lon,elev,begints,iem_network
0,EET,ALABASTER,33.18,-86.78,178.0,1998-07-01 00:53:00-05,AL_ASOS
1,8A0,Albertville,34.2291,-86.2558,315.0,2006-04-01 00:18:00-06,AL_ASOS
2,ALX,ALEXANDER_CITY,32.91474,-85.96296,209.0,2003-06-01 00:00:00-05,AL_ASOS
3,79J,ANDALUSIA/OPP ARPT,31.30875,-86.39378,94.0,1973-01-01 00:00:00-06,AL_ASOS
4,ANB,ANNISTON/CALHOUN CO,33.58817,-85.85811,186.0,1948-01-01 00:00:00-06,AL_ASOS


In [0]:
def get_station_request(stationDF):
  '''
  A wrapper to just get the stations as a string of requests.
  '''
  stations = str()
  for station in stationDF['stid'].tolist():        # @TODO REMOVE [0:5] to make actual requests
    stations += "station=" + station + "&"
  return stations

get_station_request(stationDF)[0:50]

'station=EET&station=8A0&station=ALX&station=79J&st'

## Construct-Request

In [0]:
start_year = 2012
start_month = 1
start_day = 1

end_year = 2012
end_month = 1
end_day = 2

save_directory = "./"

In [0]:
def construct_request(start_year = 2012, start_month = 1, start_day = 1, end_year = 2012, end_month = 1, end_day = 2, save_directory = "./")
  base_url = "http://mesonet.agron.iastate.edu/cgi-bin/request/asos.py?"
  metadata = "data=all&tz=Etc/UTC&format=onlycomma&latlon=yes&missing=M&trace=T&direct=no&report_type=1&report_type=2&"
  start_time = datetime.datetime(start_year, start_month, start_day).strftime('year1=%Y&month1=%m&day1=%d&')
  end_time = datetime.datetime(end_year, end_month, end_day).strftime('year2=%Y&month2=%m&day2=%d&')
  network_list = get_network_list()
  for network in network_list:
    stationDF = get_stations([network])
    stations = get_station_request(stationDF)
    url = base_url + metadata + start_time + end_time + stations
    huge_ass_csv = (requests.get(url).text)
    metar_name = (save_directory + 
                  "METAR_" +
                  str(start_year) + "_" + str(start_month) + "_" + str(start_day) + "_" +
                  str(end_year) + "_" + str(end_month) + "_" + str(end_day) + "_" +
                  network[0:2] +
                 ".csv")  
    print("network: ", network, " saved at: ", metar_name)
#     with open(metar_name, "w") as file:
#         file.write(huge_ass_csv)
  return
construct_request()

network:  AL_ASOS  saved at:  ./METAR_2012_1_1_2012_1_2_AL.csv
network:  AK_ASOS  saved at:  ./METAR_2012_1_1_2012_1_2_AK.csv
network:  AZ_ASOS  saved at:  ./METAR_2012_1_1_2012_1_2_AZ.csv
network:  AR_ASOS  saved at:  ./METAR_2012_1_1_2012_1_2_AR.csv
network:  CA_ASOS  saved at:  ./METAR_2012_1_1_2012_1_2_CA.csv
network:  CO_ASOS  saved at:  ./METAR_2012_1_1_2012_1_2_CO.csv
network:  CT_ASOS  saved at:  ./METAR_2012_1_1_2012_1_2_CT.csv
network:  DE_ASOS  saved at:  ./METAR_2012_1_1_2012_1_2_DE.csv
network:  FL_ASOS  saved at:  ./METAR_2012_1_1_2012_1_2_FL.csv
network:  GA_ASOS  saved at:  ./METAR_2012_1_1_2012_1_2_GA.csv
network:  HI_ASOS  saved at:  ./METAR_2012_1_1_2012_1_2_HI.csv
network:  ID_ASOS  saved at:  ./METAR_2012_1_1_2012_1_2_ID.csv
network:  IL_ASOS  saved at:  ./METAR_2012_1_1_2012_1_2_IL.csv
network:  IN_ASOS  saved at:  ./METAR_2012_1_1_2012_1_2_IN.csv
network:  IA_ASOS  saved at:  ./METAR_2012_1_1_2012_1_2_IA.csv
network:  AWOS  saved at:  ./METAR_2012_1_1_2012_1_2_AW