### Basic Data Fetch Prototypes

Thie first one I found on the blog I recently published an article on:
https://towardsdatascience.com/getting-weather-data-in-3-easy-steps-8dc10cc5c859

Feel free to use the SDK API token I setup referenced in the `noaa` class.

In [1]:
# In case you don't have folium
# !pip install folium

In [2]:
import requests, pandas as pd, numpy as np, json
import folium
#parse the datetimes we get from NOAA
from datetime import datetime



#Long Beach Airport station
station_id = 'GHCND:USW00023129'

In [3]:
class noaa:
    
    base_url   = "https://www.ncdc.noaa.gov/cdo-web/api/v2/data?datasetid=GHCND&datatypeid=TAVG&limit=1000&stationid=GHCND:USW00023129&startdate=2015-01-01&enddate=2015-12-31"
    noaa_token = 'gqfRhLiqfjECEAjlenVDCVImuElAuPGA'
    
    def __init__(self, **opts):
        for attr, value in opts.items():
            if hasattr(self, attr):
                setattr(self, attr, value)
        
    def request_json(self, url, **params):
        headers = {'token': self.noaa_token}
        result = requests.get(url, headers = headers)
        return result.json()
    
    def get_station(self, datasetid = "GHCND", datatypeid = "TAVG", stationid = "GHCND:USW00023129", startdate = "2015-01-01", enddate = "2015-12-31"):
        return self.request_json(self.base_url)
        
w = noaa()
response = w.get_station()

In [4]:
df = pd.DataFrame(response['results'])
df['date'] = pd.to_datetime(df['date'])

In [5]:
df

Unnamed: 0,date,datatype,station,attributes,value
0,2015-01-01,TAVG,GHCND:USW00023129,"H,,S,",81
1,2015-01-02,TAVG,GHCND:USW00023129,"H,,S,",89
2,2015-01-03,TAVG,GHCND:USW00023129,"H,,S,",99
3,2015-01-04,TAVG,GHCND:USW00023129,"H,,S,",112
4,2015-01-05,TAVG,GHCND:USW00023129,"H,,S,",148
...,...,...,...,...,...
360,2015-12-27,TAVG,GHCND:USW00023129,"H,,S,",98
361,2015-12-28,TAVG,GHCND:USW00023129,"H,,S,",92
362,2015-12-29,TAVG,GHCND:USW00023129,"H,,S,",99
363,2015-12-30,TAVG,GHCND:USW00023129,"H,,S,",104


### From the noaa-sdk

Someone on github published a package in teh pypi universe system.  Here's an example of its usage (uncomment the next line to install the package).

https://pypi.org/project/noaa-sdk/

In [6]:
# !pip install noaa_sdk

### Lookup Weather by Zipcode

The return format is a generator which is a type not compatible with DataFrame inputs so we will convert each obervation to a list for quick view.  I do believe we could create an easy mapping of this data that could automatically translate to a nice convention like `feature_name_unit_type`.

> The service seems to work great, except for when you plug seldovia zipcode in. Will try to lookup by station next.

In [7]:
from noaa_sdk import noaa
n = noaa.NOAA()

## List of stations

In [8]:
station_results = n.stations()

In [9]:
stations = []
## normalized structure
for row in station_results['features']:
    stations.append({
        'station_name': row['properties']['name'],
        'uri':          row['id'],
        'id':           row['properties']['stationIdentifier'],
        'lat':          row['geometry']['coordinates'][1],
        'long':         row['geometry']['coordinates'][0],
        'elevation_m':  row['properties']['elevation']['value'],
    })
    
df_stations = pd.DataFrame(stations)
df_stations.head(10)

Unnamed: 0,station_name,uri,id,lat,long,elevation_m
0,"Phoenix, Phoenix Sky Harbor International Airport",https://api.weather.gov/stations/KPHX,KPHX,33.427799,-112.003465,339.852
1,"San Francisco, San Francisco International Air...",https://api.weather.gov/stations/KSFO,KSFO,37.61961,-122.36558,3.048
2,Boise Air Terminal,https://api.weather.gov/stations/KBOI,KBOI,43.56704,-116.24053,860.1456
3,"Dodge City, Dodge City Regional Airport",https://api.weather.gov/stations/KDDC,KDDC,37.77278,-99.96972,790.0416
4,"Salt Lake City, Salt Lake City International A...",https://api.weather.gov/stations/KSLC,KSLC,40.77069,-111.96503,1288.0848
5,"Denver, Denver International Airport",https://api.weather.gov/stations/KDEN,KDEN,39.84658,-104.65622,1647.1392
6,"Great Falls, Great Falls International Airport",https://api.weather.gov/stations/KGTF,KGTF,47.47333,-111.38222,1120.14
7,Rogue Valley International Airport,https://api.weather.gov/stations/KMFR,KMFR,42.37503,-122.87696,399.8976
8,"Albuquerque, Albuquerque International Airport",https://api.weather.gov/stations/KABQ,KABQ,35.04167,-106.61472,1630.9848
9,"Los Angeles, Los Angeles International Airport",https://api.weather.gov/stations/KLAX,KLAX,33.93806,-118.38889,38.1


## Stations of Interest

In [10]:
name_mask = df_stations['station_name'].str.contains("Homer|Seldovia|Kenai|Nome|Adak|Fairbanks")
df_stations[name_mask]

Unnamed: 0,station_name,uri,id,lat,long,elevation_m
294,"Fairbanks, Fairbanks International Airport",https://api.weather.gov/stations/PAFA,PAFA,64.80389,-147.87611,131.9784
312,"Nome, Nome Airport",https://api.weather.gov/stations/PAOM,PAOM,64.51194,-165.445,10.9728
586,"Kenai, Kenai Municipal Airport",https://api.weather.gov/stations/PAEN,PAEN,60.57306,-151.245,29.8704
638,"Homer, Homer Airport",https://api.weather.gov/stations/PAHO,PAHO,59.65,-151.48333,25.908
743,"Fairbanks, Eielson Air Force Base",https://api.weather.gov/stations/PAEI,PAEI,64.65,-147.1,167.0304
1000,St. Johnsbury - Fairbanks,https://api.weather.gov/stations/K1V4,K1V4,44.4201,-72.0193,212.1408
1161,"Adak Island, Adak Airport",https://api.weather.gov/stations/PADK,PADK,51.87778,-176.64583,6.096
1590,"Seldovia, Seldovia Airport",https://api.weather.gov/stations/PASO,PASO,59.45,-151.7,9.144
2061,Homerville Airport,https://api.weather.gov/stations/KHOE,KHOE,31.0559,-82.7741,56.388


### Basic Map Exploration of Stations

> Warning: Loading all US stations into map.. will chunk CPU in your browser

In [11]:
station_match = "Seldovia" # carefull of matching more than one at once when updating this value
lat, long = df_stations[df_stations['station_name'].\
                       str.contains("Seldovia")][['lat','long']].values[0]
f_map = folium.Map(
    location   = (lat, long),
    tiles      = 'Stamen Terrain',
    zoom_start = 5
)
## Add markers to map from DataFrame
def apply_markers(row):
    tooltip_meta = f"<div style='width: 300px;'><strong>{row['station_name']}</strong></br>"
    tooltip_meta += f"id: {row['id']}</br> elivation/m: {row['elevation_m']}</div>"
    folium.Marker([row['lat'], row['long']], popup=tooltip_meta).add_to(f_map)
    
df_stations.apply(apply_markers, axis = 1)  

## Display map
f_map

## Updated: Geting observations by station
Rather than zipcode.  Seems that the web api is inconsistent to get the data from NOAA by zipcode we we will be using the station by id.

### Beginning of our own API
This class will be designed to perform the following tasks:
- Fetch data by station ID (known as "observations")
- Auto-normalize / flatten data with units
- Save data as files
- View raw data from response to see what the noaa service has sent us (for testing)

Things to be done:
- Integrate Hig's code
- Discuss any naming conventions
- Setup a script to automate this stuff to a SQLite instance (really easy to manage for now)
  - This is when we have it run from a list of station ID's
  - Automatically fetch historical data (need to do more research.  Easy to do -- just time.)
- Test the code maybe setup basic unit tests
- Host these datasets somewhere else other than our laptops eventually

In [12]:
class noaa_requests:
    
    station_data_result = []
    station_data        = []
    noaa_service        = False
    station_id          = None
    df                  = None
    
    def __init__(self, **params):
        for attr, value in params.items():
            if(hasattr(self, attr)):
                setattr(self, attr, value)
                
    def set_station_data(self, station_id):
        self.station_id = station_id
        self.station_data_result = [] # reset data
        self.station_data_result = self.noaa_service.stations_observations(station_id, start='2000-01-01', end='2020-12-31')
        
    def normalize_station_data(self):
        self.station_data = [] # reset result whenever called
        data_length = len(self.station_data_result)
        print(f"Lenght of station_data_result: {data_length}")
        for obs in self.station_data_result:
            meta_observation = {}
            for key, item in obs['properties'].items():
                if type(item) == dict and item.get('unitCode'):
                    meta_name = f"{key}_{item['unitCode'].split(':')[1]}"
                    meta_observation[meta_name] = item['value']
                elif type(item) == list and len(item) > 0:
                    for item_key, item_value in item[0].items():
                        unit_id = item_value.get('unitCode', False) if type(item_value) == dict else False
                        if unit_id:
                            item_key += "_" + unit_id
                            item_value = item_value.get("value")
                        multi_item_key = f"{key}_{item_key}"
                        meta_observation[multi_item_key] = item_value
                else:
                    meta_observation[key] = item
            self.station_data.append(meta_observation)
            
    def gen_df(self):
        print("Generating data frame")
        self.df = pd.DataFrame(self.station_data)
    
    def output_csv(self):
        station_id = self.station_id
        print(f"Writing {station_id}.csv")
        self.df.to_csv(f"../data/processed/{station_id}.csv", encoding = "UTF8")

# This library actually makes the request
n  = noaa.NOAA()

# the noaa_sdk is passed to our class and used internally throughout
nr = noaa_requests(noaa_service = n)
stations_of_interest = ['PABR','PASO','PAHO']
for station in stations_of_interest:
    nr.set_station_data(station)
    nr.normalize_station_data()
    nr.gen_df()
    nr.output_csv()

Lenght of station_data_result: 904
Generating data frame
Writing PABR.csv
Lenght of station_data_result: 1164
Generating data frame
Writing PASO.csv
Lenght of station_data_result: 994
Generating data frame
Writing PAHO.csv
