# NOAA
- request example: https://www.ncei.noaa.gov/access/services/data/v1?dataset=local-climatological-data&dataTypes=HourlyWindSpeed&dataTypes=HourlyDryBulbTemperature&stations=72530094846&startDate=2018-05-01&endDate=2018-05-31
- list of stationsids: https://www.itl.nist.gov/div898/winds/asos-wx/WBAN-MSC.TXT
 - list of WBAN and lat/long: https://www.epa.gov/sites/default/files/documents/STATION_LOCATIONS.PDF
- Other Notes: https://www.ncei.noaa.gov/access/services/support/v3/datasets.json

In [None]:
#needed to make web requests
import requests

#store the data we get as a dataframe
import pandas as pd

#convert the response as a strcuctured json
import json

#mathematical operations on lists
import numpy as np

#parse the datetimes we get from NOAA
from datetime import datetime

import urllib

import os

## Setup

### Read in Station Names

In [None]:
cwd = os.getcwd()

download_fldr = os.path.join(cwd, 'downloads')

# sites
stations_link = 'https://www.itl.nist.gov/div898/winds/asos-wx/WBAN-MSC.TXT'

# read in html byte
i = requests.get(stations_link)
content = i.content
decoded_content  = str(content,'UTF-8')

end_list = []

#skip first 6 rows since it doesn't contain actual data
#read cols in first then create list with all data
#Couldnt pd.read_csv bc some of the names are 3+ words and it would skip the >2+ NAMEs
for n,i in enumerate(decoded_content.splitlines()[6:]):
    if n ==0:
        cols = list(i.split())
    else:
        nums = i.split()[0:2]
        names = " ".join(i for i in i.split()[2:])
        
        nums.append(names)

        end_list.append(nums)

df = pd.DataFrame(end_list, columns=cols)
df = df[df['NAME'].notna()]
df['AWSMSC_WBAN'] = df['AWSMSC'] + df['WBAN']

### Houston Stations

In [None]:
hou_stations = df[df.NAME.str.contains('HOUSTON')].reset_index(drop=True)
hou_stations

## Data Download

In [None]:
awsmsc_wban = 72243012960
strt_dte = '2020-01-01'
end_dte = '2020-12-01'

In [None]:
noaa_api_call = f'https://www.ncei.noaa.gov/access/services/data/v1?dataset=local-climatological-data&dataTypes=HourlyWindSpeed&dataTypes=HourlyDryBulbTemperature&stations={awsmsc_wban}&startDate={strt_dte}&endDate={end_dte}'
noaa_api_call

In [None]:
station = ''
data_type = 'local-climatological-data'

In [None]:
try:
   os.makedirs(download_fldr)
except FileExistsError:
   # directory already exists
   pass

filename = f'awsmsc_wban_{strt_dte}_{end_dte}' + '.csv'
with open(os.path.join(download_fldr, filename), 'wb') as file:
    response = requests.get(noaa_api_call, allow_redirects=True)
    file.write(response.content)

# get filepath and readin csv
filepath = os.path.join(download_fldr,filename) 
df = pd.read_csv(filepath)

# column formatting
df['DATE'] = pd.to_datetime(df.DATE)

# get a list of all non-main columns that are also object datatypes
all_cols_set = set(df.columns.to_list())
main_cols_set = set(df.loc[:,:'SOURCE'].columns.to_list())
var_cols = list(all_cols_set.difference(main_cols_set))
object_var_cols = list(set(df.select_dtypes(include=['object']).columns).intersection(set(var_cols)))

# remove any strings from these variable columns because they shouldnt have any strings in them
df[object_var_cols] = df[object_var_cols].replace(r'[^\d.]+', '',regex=True)

#fill NaNs and change to ints
df[var_cols] = df[var_cols].fillna(0)
df[var_cols] = df[var_cols].astype(int)


df



In [None]:
df.info()