# NOAA
- request example: https://www.ncei.noaa.gov/access/services/data/v1?dataset=local-climatological-data&dataTypes=HourlyWindSpeed&dataTypes=HourlyDryBulbTemperature&stations=72530094846&startDate=2018-05-01&endDate=2018-05-31
- list of stationsids: https://www.itl.nist.gov/div898/winds/asos-wx/WBAN-MSC.TXT
 - list of WBAN and lat/long: https://www.epa.gov/sites/default/files/documents/STATION_LOCATIONS.PDF
- Other Notes: https://www.ncei.noaa.gov/access/services/support/v3/datasets.json

In [15]:
#needed to make web requests
import requests

#store the data we get as a dataframe
import pandas as pd

#convert the response as a strcuctured json
import json

#mathematical operations on lists
import numpy as np

#parse the datetimes we get from NOAA
import datetime

import urllib

import os

## Setup

### Read in Station Names

In [21]:
tday = datetime.datetime.today().replace(hour=0,minute=0,second=0,microsecond=0)
tday_str = tday.strftime('%m-%d-%Y')

cwd = os.getcwd()

download_fldr = os.path.join(cwd, 'downloads')

# sites
stations_link = 'https://www.itl.nist.gov/div898/winds/asos-wx/WBAN-MSC.TXT'

# read in html byte
i = requests.get(stations_link)
content = i.content
decoded_content  = str(content,'UTF-8')

end_list = []

#skip first 6 rows since it doesn't contain actual data
#read cols in first then create list with all data
#Couldnt pd.read_csv bc some of the names are 3+ words and it would skip the >2+ NAMEs
for n,i in enumerate(decoded_content.splitlines()[6:]):
    if n ==0:
        cols = list(i.split())
    else:
        nums = i.split()[0:2]
        names = " ".join(i for i in i.split()[2:])
        
        nums.append(names)

        end_list.append(nums)

awsmcs_wban_df = pd.DataFrame(end_list, columns=cols)
awsmcs_wban_df = awsmcs_wban_df[awsmcs_wban_df['NAME'].notna()]
awsmcs_wban_df['AWSMSC_WBAN'] = awsmcs_wban_df['AWSMSC'] + awsmcs_wban_df['WBAN']

## Define List of Hourly Variables that you want to pull

In [22]:
vars_dict = {'HourlyAltimeterSetting': 'float',
            'HourlyDewPointTemperature': 'int',
            'HourlyDryBulbTemperature': 'int',
            'HourlyPrecipitation': 'float',
            'HourlyPressureChange': 'float',
            'HourlyPressureTendency': 'int',
            'HourlyRelativeHumidity': 'int',
            'HourlySeaLevelPressure': 'float',
            'HourlyStationPressure': 'float',
            'HourlyVisibility': 'float',
            'HourlyWetBulbTemperature': 'int',
            'HourlyWindDirection': 'int',
            'HourlyWindGustSpeed': 'int',
            'HourlyWindSpeed': 'int'}

hrly_vars = list(vars_dict.keys())

hrly_vars_str = '&dataTypes='.join(hrly_vars)

### Houston Stations

In [5]:
hou_stations_df = df[df.NAME.str.contains('HOUSTON')].reset_index(drop=True)
hou_stations_df

Unnamed: 0,AWSMSC,WBAN,NAME,AWSMSC_WBAN
0,722429,53910,HOUSTON HOOKS MEMORIAL AP,72242953910
1,722430,12960,HOUSTON INTERCONTINENTAL AP,72243012960
2,722433,12969,HOUSTON LAKESIDE ARP,72243312969
3,722435,12918,HOUSTON WILLIAM P HOBBY AP,72243512918
4,722436,12906,HOUSTON ELLINGTON AFB,72243612906


## Data Download

In [34]:
def hrly_station_wx(awsmcs_wban, strt_dte, end_dte):
   '''Pulls hourly weather from NOAA Api'''

   try:
      os.makedirs(download_fldr)
   except FileExistsError:
      # directory already exists
      pass

   noaa_api_call = f'''https://www.ncei.noaa.gov/access/services/data/v1?dataset=local-climatological-data&dataTypes={hrly_vars_str}&stations={awsmsc_wban}&startDate={strt_dte}&endDate={end_dte}'''

   station_name = awsmcs_wban_df[awsmcs_wban_df.AWSMSC_WBAN==awsmcs_wban]['NAME'].iloc[0]

   filename = f'{station_name}_{strt_dte}_{end_dte}.csv'
   with open(os.path.join(download_fldr, filename), 'wb') as file:
      response = requests.get(noaa_api_call, allow_redirects=True)
      file.write(response.content)

   # get filepath and readin csv
   filepath = os.path.join(download_fldr,filename) 

   df = pd.read_csv(filepath)

   # column formatting
   df['DATE'] = pd.to_datetime(df.DATE)

   # get a list of all non-main columns that are also object datatypes
   # all should be numeric but sometimes they have random strings in them, so the str needs to be removed
   object_var_cols = list(set(df.select_dtypes(include=['object']).columns).intersection(set(hrly_vars)))

   # remove any strings from these variable columns because they shouldnt have any strings in them
   df[object_var_cols] = df[object_var_cols].replace(r'[^\d.]+', '',regex=True)

   #fill NaNs and change to ints
   df[hrly_vars] = df[hrly_vars].fillna(0)
   df[hrly_vars] = df[hrly_vars].replace('','0')
   df = df.astype(vars_dict)

   df.insert(1,'STATION_NAME', station_name)

   return df


In [38]:
x = hrly_station_wx(awsmcs_wban='72243312969',strt_dte='2020-01-01',end_dte='2023-12-31')

  df = pd.read_csv(filepath)


In [39]:
x

Unnamed: 0,STATION,STATION_NAME,DATE,REPORT_TYPE,SOURCE,HourlyAltimeterSetting,HourlyDewPointTemperature,HourlyDryBulbTemperature,HourlyPrecipitation,HourlyPressureChange,HourlyPressureTendency,HourlyRelativeHumidity,HourlySeaLevelPressure,HourlyStationPressure,HourlyVisibility,HourlyWetBulbTemperature,HourlyWindDirection,HourlyWindGustSpeed,HourlyWindSpeed
0,72243012960,HOUSTON LAKESIDE ARP,2020-01-01 00:00:00,FM-12,4,0.00,38,47,0.0,0.03,8,71,30.14,30.01,9.94,43,130,0,3
1,72243012960,HOUSTON LAKESIDE ARP,2020-01-01 00:53:00,FM-15,7,30.13,41,48,0.0,0.00,0,77,30.13,30.02,8.00,45,0,0,0
2,72243012960,HOUSTON LAKESIDE ARP,2020-01-01 01:53:00,FM-15,7,30.11,40,48,0.0,0.00,0,74,30.11,30.00,8.00,44,0,0,0
3,72243012960,HOUSTON LAKESIDE ARP,2020-01-01 02:53:00,FM-15,7,30.12,41,49,0.0,0.02,5,74,30.12,30.01,7.00,45,0,0,0
4,72243012960,HOUSTON LAKESIDE ARP,2020-01-01 03:53:00,FM-15,7,30.12,42,50,0.0,0.00,0,74,30.12,30.01,6.00,46,0,0,0
...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...
42670,72243012960,HOUSTON LAKESIDE ARP,2023-07-11 23:53:00,FM-15,6,0.00,0,0,0.0,0.00,0,0,0.00,0.00,0.00,0,0,0,0
42671,72243012960,HOUSTON LAKESIDE ARP,2023-07-11 23:59:00,SOD,6,0.00,0,0,0.0,0.00,0,0,0.00,0.00,0.00,0,0,0,0
42672,72243012960,HOUSTON LAKESIDE ARP,2023-07-12 00:53:00,FM-15,6,0.00,0,0,0.0,0.00,0,0,0.00,0.00,0.00,0,0,0,0
42673,72243012960,HOUSTON LAKESIDE ARP,2023-07-12 01:25:00,FM-16,6,0.00,0,0,0.0,0.00,0,0,0.00,0.00,0.00,0,0,0,0
