# Ireland wave spectral data 

#### Buoys & dates available from-to:

1. AMETS Berth B Wave Buoy	&   2009-12-15T16:36:00Z ~     
2. Westwave Wave Buoy       &   2015-05-14T11:26:00Z - 2019-03-27T13:00:00Z	
3. SmartBay Wave Buoy       &   2008-05-01T00:11:00Z ~
4. AMETS Berth A Wave Buoy	&   2012-05-16T11:52:00Z ~   
5. AMETS Berth C Wave Buoy	&   2014-09-10T11:00:00Z - 2015-02-23T12:00:00Z	

In [1]:
import pandas as pd
import numpy as np
import requests
import io
import os
from urllib.parse import quote 
from datetime import datetime,timedelta
import math
import urllib3
import urllib
from bs4 import BeautifulSoup
import lxml.html as lh

In [2]:
import sys
sys.path.insert(0,'./support')
from ipynb.fs.defs.Coordinates import Coordinates
from ipynb.fs.defs.Sites import wave_columns, Site
from ipynb.fs.defs.Periods import Period
from ipynb.fs.defs.time_index import convert_index, stitch, clean_data

In [3]:
'''
Erddap Data Server
For data access form, see 'https://erddap.marine.ie/erddap/tabledap/IWaveBNetwork_spectral.html'
'''
db_name = 'IWaveBNetwork_spectral.csv'

In [3]:
'''
A function that returns wave data belonging to a specific year and a station taken as parameters.
If the data is not saved in the system, fetches it from ERDDAP server via request URL.
'''

def get_year_data(station_id, year):
    path = 'data/Ireland_Marine_Institute/'
    filename = str(station_id)+'_'+str(year)+'_'+db_name
    
    if os.path.split(os.getcwd())[1] == 'site_data':here='./'
    else:here='./site_data/'
        
    path = here + path
    if not os.path.exists(path):
        os.makedirs(path)
            
    filename= path + filename     
    if os.path.isfile(filename):
        wave_data = pd.read_csv(filename)
        
    else:
        url = 'https://erddap.marine.ie/erddap/tabledap/' + db_name + '?'
        variables = 'time,latitude,longitude,station_id,SignificantWaveHeight,PeakPeriod'
        time_start = str(year) + '-01-01T00:00:00Z'
        time_end = str(year) + '-12-31T23:59:00Z'
        url_add = variables
        url_add += '&time>=' + time_start
        url_add += '&time<=' + time_end
        url_add += '&station_id="' + station_id + '"' 

        #encode query in order to assure correct url format
        url += urllib.parse.quote(url_add , safe='=&')
        print(url)
        response = requests.get(url, verify=False)

        #If no data has queued (possibly due to no data at a given date etc.)
        #code 200 = ok
        if response.status_code != 200 : 
            print('station' + str(station_id) + ' has no data available at given times')
            return pd.DataFrame()
        
        response = response.content

        #first row of the data is reserved for variable units 
        wave_data = pd.read_csv(io.StringIO(response.decode('utf-8')) , sep = ',' , header=0, skiprows=[1])
        
        if not wave_data.empty:
            wave_data.columns = ['Time (UTC)' , 'Latitude' , 'Longitude' , 'Station_id' , 'Hs(m)_Ireland' , 'Tp(s)_Ireland']        
            wave_data.drop(['Latitude','Longitude','Station_id'], axis=1, inplace=True)
           
            #save the year data to a csv file
            wave_data.to_csv(filename, index=False)
    return wave_data

In [6]:
'''
Main function that takes user inputs as parameter 
and returns dictionary structure containing Ireland data with the desired stations and the time frame.

Inputs are; a site object(site_name, *coordinates), start time, end time

Station id is assigned according to the given coordinates. 
If that coordinate is not in Ireland stations list, then the closest one within distance limit is calculated and assigned. 

Current distance limit is 200 km.
'''
def get_Ireland_data(time_start, time_end, station_id, swell=False):
    print('Getting data from Ireland db')
    if swell:
        print('Ireland has no swell data available')
        
    start_year = int(time_start[0:4])
    end_year = int(time_end[0:4])
    years = list(range(start_year, end_year+1))    
        
    site_data=pd.DataFrame()  
    for y in years:
        single_data = get_year_data(station_id , y)
        if single_data.empty: continue
            
        if site_data.empty: site_data = clean_data(single_data, station_id)[time_start:time_end]
        else: site_data = stitch(site_data , clean_data(single_data, station_id)[time_start:time_end])
           
    return site_data