# NOAA
- request example: https://www.ncei.noaa.gov/access/services/data/v1?dataset=local-climatological-data&dataTypes=HourlyWindSpeed&dataTypes=HourlyDryBulbTemperature&stations=72530094846&startDate=2018-05-01&endDate=2018-05-31
- list of stationsids: https://www.itl.nist.gov/div898/winds/asos-wx/WBAN-MSC.TXT
 - list of WBAN and lat/long: https://www.epa.gov/sites/default/files/documents/STATION_LOCATIONS.PDF
- Other Notes: https://www.ncei.noaa.gov/access/services/support/v3/datasets.json

In [1]:
#needed to make web requests
import requests

#store the data we get as a dataframe
import pandas as pd

#convert the response as a strcuctured json
import json

#mathematical operations on lists
import numpy as np

#parse the datetimes we get from NOAA
from datetime import datetime

import urllib

import os

## Setup

### Read in Station Names

In [75]:
cwd = os.getcwd()

download_fldr = os.path.join(cwd, 'downloads')

# sites
stations_link = 'https://www.itl.nist.gov/div898/winds/asos-wx/WBAN-MSC.TXT'

# read in html byte
i = requests.get(stations_link)
content = i.content
decoded_content  = str(content,'UTF-8')

end_list = []

#skip first 6 rows since it doesn't contain actual data
#read cols in first then create list with all data
#Couldnt pd.read_csv bc some of the names are 3+ words and it would skip the >2+ NAMEs
for n,i in enumerate(decoded_content.splitlines()[6:]):
    if n ==0:
        cols = list(i.split())
    else:
        nums = i.split()[0:2]
        names = " ".join(i for i in i.split()[2:])
        
        nums.append(names)

        end_list.append(nums)

df = pd.DataFrame(end_list, columns=cols)
df = df[df['NAME'].notna()]
df['AWSMSC_WBAN'] = df['AWSMSC'] + df['WBAN']

## Define List of Hourly Variables that you want to pull

In [72]:
vars_dict = {'HourlyAltimeterSetting': 'float',
            'HourlyDewPointTemperature': 'int',
            'HourlyDryBulbTemperature': 'int',
            'HourlyPrecipitation': 'float',
            'HourlyPressureChange': 'float',
            'HourlyPressureTendency': 'int',
            'HourlyRelativeHumidity': 'int',
            'HourlySeaLevelPressure': 'float',
            'HourlyStationPressure': 'float',
            'HourlyVisibility': 'float',
            'HourlyWetBulbTemperature': 'int',
            'HourlyWindDirection': 'int',
            'HourlyWindGustSpeed': 'int',
            'HourlyWindSpeed': 'int'}

hrly_vars = list(vars_dict.keys())

hrly_vars_str = '&dataTypes='.join(hrly_vars)
hrly_vars_str

'HourlyAltimeterSetting&dataTypes=HourlyDewPointTemperature&dataTypes=HourlyDryBulbTemperature&dataTypes=HourlyPrecipitation&dataTypes=HourlyPressureChange&dataTypes=HourlyPressureTendency&dataTypes=HourlyRelativeHumidity&dataTypes=HourlySeaLevelPressure&dataTypes=HourlyStationPressure&dataTypes=HourlyVisibility&dataTypes=HourlyWetBulbTemperature&dataTypes=HourlyWindDirection&dataTypes=HourlyWindGustSpeed&dataTypes=HourlyWindSpeed'

### Houston Stations

In [76]:
hou_stations = df[df.NAME.str.contains('HOUSTON')].reset_index(drop=True)
hou_stations

Unnamed: 0,AWSMSC,WBAN,NAME,AWSMSC_WBAN
0,722429,53910,HOUSTON HOOKS MEMORIAL AP,72242953910
1,722430,12960,HOUSTON INTERCONTINENTAL AP,72243012960
2,722433,12969,HOUSTON LAKESIDE ARP,72243312969
3,722435,12918,HOUSTON WILLIAM P HOBBY AP,72243512918
4,722436,12906,HOUSTON ELLINGTON AFB,72243612906


## Data Download

In [77]:
awsmsc_wban = 72243012960
strt_dte = '2020-01-01'
end_dte = '2020-12-01'

In [78]:
#noaa_api_call = f'https://www.ncei.noaa.gov/access/services/data/v1?dataset=local-climatological-data&dataTypes=HourlyWindSpeed&dataTypes=HourlyDryBulbTemperature&stations={awsmsc_wban}&startDate={strt_dte}&endDate={end_dte}'
noaa_api_call = f'https://www.ncei.noaa.gov/access/services/data/v1?dataset=local-climatological-data&dataTypes={hrly_vars_str}&stations={awsmsc_wban}&startDate={strt_dte}&endDate={end_dte}'

noaa_api_call

'https://www.ncei.noaa.gov/access/services/data/v1?dataset=local-climatological-data&dataTypes=HourlyAltimeterSetting&dataTypes=HourlyDewPointTemperature&dataTypes=HourlyDryBulbTemperature&dataTypes=HourlyPrecipitation&dataTypes=HourlyPressureChange&dataTypes=HourlyPressureTendency&dataTypes=HourlyRelativeHumidity&dataTypes=HourlySeaLevelPressure&dataTypes=HourlyStationPressure&dataTypes=HourlyVisibility&dataTypes=HourlyWetBulbTemperature&dataTypes=HourlyWindDirection&dataTypes=HourlyWindGustSpeed&dataTypes=HourlyWindSpeed&stations=72243012960&startDate=2020-01-01&endDate=2020-12-01'

In [79]:
station = ''
data_type = 'local-climatological-data'

In [80]:
try:
   os.makedirs(download_fldr)
except FileExistsError:
   # directory already exists
   pass

filename = f'awsmsc_wban_{strt_dte}_{end_dte}' + '.csv'
with open(os.path.join(download_fldr, filename), 'wb') as file:
    response = requests.get(noaa_api_call, allow_redirects=True)
    file.write(response.content)

# get filepath and readin csv
filepath = os.path.join(download_fldr,filename) 
df = pd.read_csv(filepath)

# column formatting
df['DATE'] = pd.to_datetime(df.DATE)

# get a list of all non-main columns that are also object datatypes
# all_cols_set = set(df.columns.to_list())
# main_cols_set = set(df.loc[:,:'SOURCE'].columns.to_list())
# var_cols = list(all_cols_set.difference(main_cols_set))
object_var_cols = list(set(df.select_dtypes(include=['object']).columns).intersection(set(hrly_vars)))

# remove any strings from these variable columns because they shouldnt have any strings in them
df[object_var_cols] = df[object_var_cols].replace(r'[^\d.]+', '',regex=True)

#fill NaNs and change to ints
df[hrly_vars] = df[hrly_vars].fillna(0)
df[hrly_vars] = df[hrly_vars].replace('','0')
df = df.astype(vars_dict)

df



Unnamed: 0,STATION,DATE,REPORT_TYPE,SOURCE,HourlyAltimeterSetting,HourlyDewPointTemperature,HourlyDryBulbTemperature,HourlyPrecipitation,HourlyPressureChange,HourlyPressureTendency,HourlyRelativeHumidity,HourlySeaLevelPressure,HourlyStationPressure,HourlyVisibility,HourlyWetBulbTemperature,HourlyWindDirection,HourlyWindGustSpeed,HourlyWindSpeed
0,72243012960,2020-01-01 00:00:00,FM-12,4,0.00,38,47,0.0,0.03,8,71,30.14,30.01,9.94,43,130,0,3
1,72243012960,2020-01-01 00:53:00,FM-15,7,30.13,41,48,0.0,0.00,0,77,30.13,30.02,8.00,45,0,0,0
2,72243012960,2020-01-01 01:53:00,FM-15,7,30.11,40,48,0.0,0.00,0,74,30.11,30.00,8.00,44,0,0,0
3,72243012960,2020-01-01 02:53:00,FM-15,7,30.12,41,49,0.0,0.02,5,74,30.12,30.01,7.00,45,0,0,0
4,72243012960,2020-01-01 03:53:00,FM-15,7,30.12,42,50,0.0,0.00,0,74,30.12,30.01,6.00,46,0,0,0
...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...
11251,72243012960,2020-12-01 20:53:00,FM-15,7,30.09,47,53,0.0,0.01,8,80,30.09,29.98,10.00,50,120,0,9
11252,72243012960,2020-12-01 21:53:00,FM-15,7,30.09,47,51,0.0,0.00,0,86,30.09,29.98,10.00,49,130,0,3
11253,72243012960,2020-12-01 22:53:00,FM-15,7,30.09,47,52,0.0,0.00,0,83,30.09,29.98,10.00,49,110,0,5
11254,72243012960,2020-12-01 23:53:00,FM-15,7,30.08,49,54,0.0,0.01,8,83,30.08,29.97,10.00,51,130,0,3
