# NOAA
- request example: https://www.ncei.noaa.gov/access/services/data/v1?dataset=local-climatological-data&dataTypes=HourlyWindSpeed&dataTypes=HourlyDryBulbTemperature&stations=72530094846&startDate=2018-05-01&endDate=2018-05-31
- list of stationsids: https://www.itl.nist.gov/div898/winds/asos-wx/WBAN-MSC.TXT
 - list of WBAN and lat/long: https://www.epa.gov/sites/default/files/documents/STATION_LOCATIONS.PDF
- Other Notes: https://www.ncei.noaa.gov/access/services/support/v3/datasets.json

- Fort Bend WBAN: 12977

In [1]:
#needed to make web requests
import requests

#store the data we get as a dataframe
import pandas as pd

#convert the response as a strcuctured json
import json

#mathematical operations on lists
import numpy as np

#parse the datetimes we get from NOAA
import datetime

import urllib

import os

## Setup

### Read in Station Names

In [27]:
tday = datetime.datetime.today().replace(hour=0,minute=0,second=0,microsecond=0)
tday_str = tday.strftime('%m-%d-%Y')

cwd = os.getcwd()

download_fldr = os.path.join(cwd, 'downloads')

# sites
stations_link = 'https://www.itl.nist.gov/div898/winds/asos-wx/WBAN-MSC.TXT'

# read in html byte
i = requests.get(stations_link)
content = i.content
decoded_content  = str(content,'UTF-8')

end_list = []

#skip first 6 rows since it doesn't contain actual data
#read cols in first then create list with all data
#Couldnt pd.read_csv bc some of the names are 3+ words and it would skip the >2+ NAMEs
for n,i in enumerate(decoded_content.splitlines()[6:]):
    if n ==0:
        cols = list(i.split())
    else:
        nums = i.split()[0:2]
        names = " ".join(i for i in i.split()[2:])
        
        nums.append(names)

        end_list.append(nums)

awsmcs_wban_df = pd.DataFrame(end_list, columns=cols)
awsmcs_wban_df = awsmcs_wban_df[awsmcs_wban_df['NAME'].notna()]
sl_data = pd.DataFrame({'AWSMSC':['720637'],'WBAN':['00223'],'NAME':'SUGAR LAND REGIONAL AIRPORT'})
awsmcs_wban_df = pd.concat([awsmcs_wban_df, sl_data])
awsmcs_wban_df['AWSMSC_WBAN'] = awsmcs_wban_df['AWSMSC'] + awsmcs_wban_df['WBAN']


## Define List of Hourly Variables that you want to pull

In [3]:
vars_dict = {'HourlyAltimeterSetting': 'float',
            'HourlyDewPointTemperature': 'int',
            'HourlyDryBulbTemperature': 'int',
            'HourlyPrecipitation': 'float',
            'HourlyPressureChange': 'float',
            'HourlyPressureTendency': 'int',
            'HourlyRelativeHumidity': 'int',
            'HourlySeaLevelPressure': 'float',
            'HourlyStationPressure': 'float',
            'HourlyVisibility': 'float',
            'HourlyWetBulbTemperature': 'int',
            'HourlyWindDirection': 'int',
            'HourlyWindGustSpeed': 'int',
            'HourlyWindSpeed': 'int'}

hrly_vars = list(vars_dict.keys())

hrly_vars_str = '&dataTypes='.join(hrly_vars)

### Houston Stations

In [4]:
hou_stations_df = awsmcs_wban_df[awsmcs_wban_df.NAME.str.contains('HOUSTON')].reset_index(drop=True)
hou_stations_df

Unnamed: 0,AWSMSC,WBAN,NAME,AWSMSC_WBAN
0,722429,53910,HOUSTON HOOKS MEMORIAL AP,72242953910
1,722430,12960,HOUSTON INTERCONTINENTAL AP,72243012960
2,722433,12969,HOUSTON LAKESIDE ARP,72243312969
3,722435,12918,HOUSTON WILLIAM P HOBBY AP,72243512918
4,722436,12906,HOUSTON ELLINGTON AFB,72243612906


## Data Download

In [22]:
def hrly_station_wx(awsmcs_wban, strt_dte, end_dte):
   '''Pulls hourly weather from NOAA Api'''

   try:
      os.makedirs(download_fldr)
   except FileExistsError:
      # directory already exists
      pass

   noaa_api_call = f'''https://www.ncei.noaa.gov/access/services/data/v1?dataset=local-climatological-data&dataTypes={hrly_vars_str}&stations={awsmcs_wban}&startDate={strt_dte}&endDate={end_dte}'''
   print(noaa_api_call)
   station_name = awsmcs_wban_df[awsmcs_wban_df.AWSMSC_WBAN==awsmcs_wban]['NAME'].iloc[0]

   filename = f'{station_name}_{strt_dte}_{end_dte}.csv'
   with open(os.path.join(download_fldr, filename), 'wb') as file:
      response = requests.get(noaa_api_call, allow_redirects=True)
      file.write(response.content)

   # get filepath and readin csv
   filepath = os.path.join(download_fldr,filename) 

   df = pd.read_csv(filepath)

   # column formatting
   df['DATE'] = pd.to_datetime(df.DATE)

   # get a list of all non-main columns that are also object datatypes
   # all should be numeric but sometimes they have random strings in them, so the str needs to be removed
   object_var_cols = list(set(df.select_dtypes(include=['object']).columns).intersection(set(hrly_vars)))

   # remove any strings from these variable columns because they shouldnt have any strings in them
   df[object_var_cols] = df[object_var_cols].replace(r'[^\d.]+', '',regex=True)

   #fill NaNs and change to ints
   df[hrly_vars] = df[hrly_vars].fillna(0)
   df[hrly_vars] = df[hrly_vars].replace('','0')
   df = df.astype(vars_dict)

   df.insert(1,'STATION_NAME', station_name)

   return df


In [10]:
x = pd.concat([hrly_station_wx(awsmcs_wban=i,strt_dte='2020-01-01',end_dte='2023-12-31') for i in hou_stations_df.AWSMSC_WBAN])
x

  df = pd.read_csv(filepath)
  df = pd.read_csv(filepath)


Unnamed: 0,STATION,STATION_NAME,DATE,REPORT_TYPE,SOURCE,HourlyAltimeterSetting,HourlyDewPointTemperature,HourlyDryBulbTemperature,HourlyPrecipitation,HourlyPressureChange,HourlyPressureTendency,HourlyRelativeHumidity,HourlySeaLevelPressure,HourlyStationPressure,HourlyVisibility,HourlyWetBulbTemperature,HourlyWindDirection,HourlyWindGustSpeed,HourlyWindSpeed
0,72242953910,HOUSTON HOOKS MEMORIAL AP,2020-01-01 00:25:00,FM-16,7,30.13,38,48,0.0,0.0,0,68,0.00,29.97,2.5,43,0,0,0
1,72242953910,HOUSTON HOOKS MEMORIAL AP,2020-01-01 00:49:00,FM-16,7,30.13,39,46,0.0,0.0,0,76,0.00,29.97,3.0,43,0,0,0
2,72242953910,HOUSTON HOOKS MEMORIAL AP,2020-01-01 00:51:00,FM-16,6,30.13,39,46,0.0,0.0,0,76,0.00,29.97,2.5,43,0,0,0
3,72242953910,HOUSTON HOOKS MEMORIAL AP,2020-01-01 00:53:00,FM-15,7,30.12,40,46,0.0,0.0,0,79,30.11,29.96,2.5,43,140,0,3
4,72242953910,HOUSTON HOOKS MEMORIAL AP,2020-01-01 01:06:00,FM-16,7,30.14,39,46,0.0,0.0,0,77,0.00,29.98,3.0,43,0,0,0
...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...
31013,72243612906,HOUSTON ELLINGTON AFB,2023-09-13 23:54:00,FM-15,6,29.91,73,77,0.0,0.0,0,89,0.00,29.88,10.0,74,190,0,6
31014,72243612906,HOUSTON ELLINGTON AFB,2023-09-13 23:59:00,SOD,O,0.00,0,0,0.0,0.0,0,0,0.00,0.00,0.0,0,0,0,0
31015,72243612906,HOUSTON ELLINGTON AFB,2023-09-14 00:54:00,FM-15,6,29.91,73,79,0.0,0.0,0,84,0.00,29.88,10.0,75,310,0,3
31016,72243612906,HOUSTON ELLINGTON AFB,2023-09-14 01:54:00,FM-15,6,29.90,73,79,0.0,0.0,0,84,0.00,29.86,6.0,75,290,0,3


In [28]:
hrly_station_wx(awsmcs_wban='72063700223', strt_dte='2023-01-01', end_dte='2023-12-31')

https://www.ncei.noaa.gov/access/services/data/v1?dataset=local-climatological-data&dataTypes=HourlyAltimeterSetting&dataTypes=HourlyDewPointTemperature&dataTypes=HourlyDryBulbTemperature&dataTypes=HourlyPrecipitation&dataTypes=HourlyPressureChange&dataTypes=HourlyPressureTendency&dataTypes=HourlyRelativeHumidity&dataTypes=HourlySeaLevelPressure&dataTypes=HourlyStationPressure&dataTypes=HourlyVisibility&dataTypes=HourlyWetBulbTemperature&dataTypes=HourlyWindDirection&dataTypes=HourlyWindGustSpeed&dataTypes=HourlyWindSpeed&stations=72063700223&startDate=2023-01-01&endDate=2023-12-31


Unnamed: 0,STATION,STATION_NAME,DATE,REPORT_TYPE,SOURCE,HourlyAltimeterSetting,HourlyDewPointTemperature,HourlyDryBulbTemperature,HourlyPrecipitation,HourlyPressureChange,HourlyPressureTendency,HourlyRelativeHumidity,HourlySeaLevelPressure,HourlyStationPressure,HourlyVisibility,HourlyWetBulbTemperature,HourlyWindDirection,HourlyWindGustSpeed,HourlyWindSpeed
0,72063700223,SUGAR LAND REGIONAL AIRPORT,2023-01-01 00:15:00,FM-15,7,29.97,56,57,0.0,0.0,0,96,0.0,29.90,0.0,56,0,0,0
1,72063700223,SUGAR LAND REGIONAL AIRPORT,2023-01-01 00:35:00,FM-15,7,29.96,55,57,0.0,0.0,0,96,0.0,29.88,0.0,56,0,0,0
2,72063700223,SUGAR LAND REGIONAL AIRPORT,2023-01-01 00:55:00,FM-15,7,29.97,54,56,0.0,0.0,0,96,0.0,29.90,0.0,55,0,0,0
3,72063700223,SUGAR LAND REGIONAL AIRPORT,2023-01-01 01:15:00,FM-15,7,29.96,53,54,0.0,0.0,0,96,0.0,29.88,0.0,53,0,0,0
4,72063700223,SUGAR LAND REGIONAL AIRPORT,2023-01-01 01:35:00,FM-15,7,29.96,52,54,0.0,0.0,0,95,0.0,29.88,0.0,53,0,0,0
...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...
18474,72063700223,SUGAR LAND REGIONAL AIRPORT,2023-09-14 00:15:00,FM-15,6,29.94,75,76,0.0,0.0,0,95,0.0,29.87,10.0,75,0,0,0
18475,72063700223,SUGAR LAND REGIONAL AIRPORT,2023-09-14 00:35:00,FM-15,6,29.94,74,77,0.0,0.0,0,91,0.0,29.87,10.0,75,0,0,0
18476,72063700223,SUGAR LAND REGIONAL AIRPORT,2023-09-14 00:55:00,FM-15,6,29.93,75,76,0.0,0.0,0,95,0.0,29.85,10.0,75,0,0,0
18477,72063700223,SUGAR LAND REGIONAL AIRPORT,2023-09-14 01:15:00,FM-15,6,29.93,75,77,0.0,0.0,0,95,0.0,29.85,10.0,76,0,0,0


In [None]:
awsmcs_wban='72063700223' 
strt_dte='2023-01-01'
end_dte='2023-12-31'

noaa_api_call = f'''https://www.ncei.noaa.gov/access/services/data/v1?dataset=local-climatological-data&dataTypes={hrly_vars_str}&stations={awsmcs_wban}&startDate={strt_dte}&endDate={end_dte}'''
print(noaa_api_call)
station_name = awsmcs_wban_df[awsmcs_wban_df.AWSMSC_WBAN==awsmcs_wban]['NAME'].iloc[0]


In [None]:
filename = f'{station_name}_{strt_dte}_{end_dte}.csv'
with open(os.path.join(download_fldr, filename), 'wb') as file:
    response = requests.get(noaa_api_call, allow_redirects=True)
    file.write(response.content)

# get filepath and readin csv
filepath = os.path.join(download_fldr,filename) 

df = pd.read_csv(filepath)