## Fetch Weather Data: From Stations Around the Wind Farms
### *This notebook downloads wind data from the NOAA API from the list of stations found in 02 - Fetch Weather Data.ipynb*

In [12]:
import requests
import json
import pandas as pd
from pyproj import Proj
from shapely.geometry import shape
headers = {"token": "xVEIkLnfHyheHhvoheZSxesUerlyrxGN"}

#### Stations
* Stations are where the data comes from (for most datasets) and can be considered the smallest granual of location data. 
* If the desired station is known, all of its data can quickly be viewed.

In [13]:
# read all project stations fetched from the NOAA API
all_project_stations = pd.read_csv('./data/01_all_project_stations.csv')
all_project_stations.head()

Unnamed: 0,index,datacoverage,elevation,elevationUnit,id,latitude,longitude,maxdate,mindate,name,square_diagonal,query_count,final_area,query_long,query_lat,dist_from_center,p_name
0,3,0.9995,304.8,METERS,GHCND:USW00013969,36.73667,-97.10194,2018-10-10,1948-01-01,"PONCA CITY MUNICIPAL AIRPORT, OK US",2.139864,14,22741770000.0,-97.306297,36.572365,0.262218,Thunder Ranch
1,1,0.9871,299.9,METERS,GHCND:USW00003965,36.1624,-97.0894,2018-10-10,1996-11-01,"STILLWATER REGIONAL AIRPORT, OK US",2.139864,14,22741770000.0,-97.306297,36.572365,0.463805,Thunder Ranch
2,0,0.9675,272.8,METERS,GHCND:USC00348501,36.1175,-97.095,2018-10-11,1893-01-01,"STILLWATER 2 W, OK US",2.139864,14,22741770000.0,-97.306297,36.572365,0.501546,Thunder Ranch


#### Get the dataypes for wind which we will be downloading the wind data

In [36]:
url = "https://www.ncdc.noaa.gov/cdo-web/api/v2/datatypes?datacategoryid=WIND&limit=56"
response = requests.get(url=url, headers=headers)
noaa_wind_data_types_json = response.json()
noaa_wind_data_types_df = pd.DataFrame(noaa_wind_data_types_json['results'])
wind_data_types = noaa_wind_data_types_df.id

In [37]:
len(wind_data_types)

27

In [16]:
#parameters for weather query
datasetid='GHCND'
datatypeid="AWND"
stationid='GHCND:USW00013969'
units='standard'
startdate='2017-01-01'
enddate='2017-12-31'
limit = 365

In [17]:
def get_wind_data_daily_summary(stationid, datatypeid, units = 'standard', startdate = '2017-01-01', enddate='2017-12-31', limit =365, offset = 25):
    url = "https://www.ncdc.noaa.gov/cdo-web/api/v2/data?datasetid=GHCND&datatypeid=" +\
    datatypeid + "&stationid=" + stationid + "&units=" + units + "&startdate=" + startdate +\
    "&enddate=" + enddate + "&limit=" + str(limit)
    response = requests.get(url=url, headers=headers)
    station_weather_report_json = response.json()
    station_weather_report_df = pd.DataFrame(station_weather_report_json['results'])
    return(station_weather_report_df)

In [39]:
all_project_stations.head()

Unnamed: 0,index,datacoverage,elevation,elevationUnit,id,latitude,longitude,maxdate,mindate,name,square_diagonal,query_count,final_area,query_long,query_lat,dist_from_center,p_name
0,3,0.9995,304.8,METERS,GHCND:USW00013969,36.73667,-97.10194,2018-10-10,1948-01-01,"PONCA CITY MUNICIPAL AIRPORT, OK US",2.139864,14,22741770000.0,-97.306297,36.572365,0.262218,Thunder Ranch
1,1,0.9871,299.9,METERS,GHCND:USW00003965,36.1624,-97.0894,2018-10-10,1996-11-01,"STILLWATER REGIONAL AIRPORT, OK US",2.139864,14,22741770000.0,-97.306297,36.572365,0.463805,Thunder Ranch
2,0,0.9675,272.8,METERS,GHCND:USC00348501,36.1175,-97.095,2018-10-11,1893-01-01,"STILLWATER 2 W, OK US",2.139864,14,22741770000.0,-97.306297,36.572365,0.501546,Thunder Ranch


In [None]:
import os
for index, row in all_project_stations.iterrows():
    stationid = row['id']
    stationid_dir_name = stationid.replace(":", "_")
    os.mkdir('./data/weather/2017/' + stationid_dir_name)
    failed_datatypes = []
    for w in wind_data_types:
        try:
            downloaded_data = get_wind_data_daily_summary(stationid, w, units = 'standard', startdate = '2017-01-01', enddate='2017-12-31', limit =365)
            file_path = './data/weather/2017/' + stationid_dir_name + '/' + w
            downloaded_data.to_csv(file_path, index=False)
        except:
            failed_datatypes.append(w)
    break

In [8]:
failed_datatypes = []
for w in wind_data_types:
    try:
        downloaded_data = get_wind_data_daily_summary(stationid, w, units = 'standard', startdate = '2017-01-01', enddate='2017-12-31', limit =365)
        file_path = './data/weather/2017/' + w
        downloaded_data.to_csv(file_path, index=False)
    except:
        failed_datatypes.append(w)

In [9]:
failed_datatypes

['DAWM',
 'FMTM',
 'HLY-WIND-1STDIR',
 'HLY-WIND-1STPCT',
 'HLY-WIND-2NDDIR',
 'HLY-WIND-2NDPCT',
 'HLY-WIND-AVGSPD',
 'HLY-WIND-PCTCLM',
 'HLY-WIND-VCTDIR',
 'HLY-WIND-VCTSPD',
 'MDWM',
 'TWND',
 'WDF1',
 'WDFG',
 'WDFI',
 'WDFM',
 'WDMV',
 'WSF1',
 'WSFG',
 'WSFI',
 'WSFM']

In [18]:
datasetid = 'DAWM'
downloaded_data = get_wind_data_daily_summary(stationid, w, units = 'standard', startdate = '2017-01-01', enddate='2017-12-31', limit =365)

KeyError: 'results'

In [34]:
url = "https://www.ncdc.noaa.gov/cdo-web/api/v2/data?datasetid=GHCND&datatypeid=" +\
"HLY-WIND-1STDIR" + "&stationid=" + stationid + "&units=" + units + "&startdate=" + startdate +\
"&enddate=" + enddate + "&limit=" + str(limit)
response = requests.get(url=url, headers=headers)
station_weather_report_json = response.json()
# station_weather_report_df = pd.DataFrame(station_weather_report_json['results'])

In [35]:
station_weather_report_json

{}