# NOAA NDBC Wind Data
_Factory Method Pattern to download NDBC gauge data_
1. NDBC Station Location Map https://www.ndbc.noaa.gov/  
2. All data from NDBC https://www.ndbc.noaa.gov/historical_data.shtml

In [113]:
import os
import urllib3
from bs4 import BeautifulSoup
import certifi
import re
import requests
import traceback

import wget

class Noaa_Ndbc_Wind():
    # Download all NDBC wind stations to specific location.
    def __init__(self, base_dir, ndbc_station_number, gauge_type, record_flag=False, download_flag=False):
        # Base directory to save all files. 
        self.base_dir = base_dir 
        self.ndbc_sta_num = ndbc_station_number 
        self.gauge_type = gauge_type
        self.record_flag = record_flag
        self.download_flag = download_flag

    def _get_stdmet(self):
        """Standard meterological data, largest record"""
        print(
            "{} - Searching for Stdmet data: https://www.ndbc.noaa.gov/measdes.shtml#stdmet".format(
                self.ndbc_sta_num
            )
        )
        stdmet_url = 'https://www.ndbc.noaa.gov/data/historical/stdmet/'
        return self._make_list_href_links(stdmet_url, self.ndbc_sta_num)
        
    def _get_cwind(self, print_record=False):
        """Continuous wind data"""
        print(
            "{} - Searching for Continuous Wind data: https://www.ndbc.noaa.gov/measdes.shtml#cwind".format(
                self.ndbc_sta_num
            )
        )
        cwind_url = 'https://www.ndbc.noaa.gov/data/historical/cwind/' 
        return self._make_list_href_links(cwind_url, self.ndbc_sta_num)
        
    def _make_list_href_links(self, url, key_word=None):
        """
        input:
            url -> url to search for href.
            key_word -> regex to look for in <a> tag for href link.
        output:
            href -> list of all href links at given url that contain keyword
        """
        req = urllib3.PoolManager(
            cert_reqs='CERT_REQUIRED',
            ca_certs=certifi.where())
        res = req.request('GET', url)
        
        if res.status == 200:            
            soup = BeautifulSoup(res.data, 'html.parser') #Collect entire webpage as an object.
            href = soup.find_all(
                'a', attrs={'href': re.compile(key_word.lower())}  # search for keyword in all <a href> tags.
            )
            if href:
                href = [''.join((url, link.text)) for link in href] 
                return href
            elif not href:
                print('href is NoneType, maybe no data, check {} is correct station.'.format(self.ndbc_sta_num))
            else:
                print('I don\'t know what you did. You never should have got here.')        
        else:
            print('Did not connect to website, check url.')
            
            
    def _print_gauge_record(self, href_list):
        """Print data availability range."""
        if href_list is not None:
            years_list = [year.split('.txt')[0][-4:] for year in href_list]
            max_year = int(max(years_list))
            min_year = int(min(years_list))
            length_years = int(max_year) - int(min_year) + 1            
            print('Gauge {} has {} year long record from {} - {}.'.format(
                self.ndbc_sta_num.upper(), length_years, min_year, max_year)
                 )    
        else:
            print('Station has no hrefs for {} gauge type'.format(self.gauge_type))
        
    def _make_dir(self, save_location):
        if not os.path.isdir(save_location):
            print('Making directory {}'.format(save_location))
            os.mkdir(save_location)
        else:
            print('Save filepath: {}'.format(save_location))
            
    def _download_wind_gauge(self, href_list, gauge_save_loc):
        for link in href_list:          
            data = link.split('/')[-1]
            if os.path.isfile(os.path.join(gauge_save_loc, data)):
                print('File {} exists'.format(data))
            else:
                print('\nDownload file {}'.format(data))
                try:
                    wget.download(link, gauge_save_loc)
                except Exception as e:
                    print('Error {} on {}'.format(e, data))
                    print(wget_url)

    def get_ndbc_gauge(self):
        if self.gauge_type is 'stdmet':
            href_list = self._get_stdmet()
        elif self.gauge_type is 'cwind':
            href_list = self._get_cwind()
            url_base = ''
        else:
            raise ValueError(gauge_type)
            
        if self.record_flag is True:
            self._print_gauge_record(href_list)

        if self.download_flag is True and href_list is not None:
            # make gauge type save file location.
            gtype_save_loc = os.path.join(self.base_dir, self.gauge_type) 
            self._make_dir(gtype_save_loc)
            # make gauge number save location.
            gauge_save_loc = os.path.join(gtype_save_loc, self.ndbc_sta_num) 
            self._make_dir(gauge_save_loc)
            
            self._download_wind_gauge(href_list, gauge_save_loc)
        elif href_list is not None:
            print('To download set value to True.')

In [114]:
# Define base directory to save all files.
base_dir = r'C:\User Settings\Desktop\To Do\ADCNR\Coffee Island\Modeling\noaa_ndbc'

# Stations to search for wind data.
ADCNR_stations = [
    'KATA1',
    'CRTA1',
    'DPIA1',
    'DPHA1',
    'FMOA1',
    'MBLA1',    
]

# Searches all stations in list
for station in ADCNR_stations:    
    '''Continuos Wind Stations Example'''
    ndbc_cwind = Noaa_Ndbc_Wind(base_dir, station, 'cwind', record_flag=True, download_flag=True)
    ndbc_cwind.get_ndbc_gauge()
    
    '''Standard Meterology Example'''    
    ndbc_stdmet = Noaa_Ndbc_Wind(base_dir, station, 'stdmet', record_flag=True, download_flag=True)
    ndbc_stdmet.get_ndbc_gauge()
    

CRTA1 - Searching for Stdmet data: https://www.ndbc.noaa.gov/measdes.shtml#stdmet
Gauge CRTA1 has 9 year long record from 2011 - 2019.
Save filepath: C:\User Settings\Desktop\To Do\ADCNR\Coffee Island\Modeling\noaa_ndbc\stdmet
Making directory C:\User Settings\Desktop\To Do\ADCNR\Coffee Island\Modeling\noaa_ndbc\stdmet\CRTA1

Download file crta1h2011.txt.gz
100% [##############################################################################]     90K / 90K
Download file crta1h2012.txt.gz
100% [############################################################################]     162K / 162K
Download file crta1h2013.txt.gz
100% [############################################################################]     156K / 156K
Download file crta1h2014.txt.gz
100% [############################################################################]     158K / 158K
Download file crta1h2015.txt.gz
100% [############################################################################]     159K / 159K
Download fil

In [45]:
# lpbf_stations = [
#     'KATA1',
#     'CRTA1',
#     'WKXA1',
#     'DPIA1',
#     'DPHA1',
#     'MBLA1',
#     'FMOA1',    
# ]

# C:\User Settings\Desktop\To Do\Virgina DOT Birds\ndbc_stations
# VDOT_stations = [
#     'SWPV2',
#     'WDSV2',
#     'CHYV2',
#     'CRYV2'
# ]