In [17]:
import os
import datetime
import json
import base64 
import concurrent.futures


import numpy as np
import pandas as pd 
import ipywidgets as widgets
from ipywidgets import VBox, HBox, interactive, interact
from IPython.display import display, HTML, Javascript
import matplotlib.pyplot as plt

In [2]:
def fetchDataFromERDDAP(dataset_id, metadata, variable_name, start_date, end_date):
    try:
        # Set ERDDAP server details
        s = 'https://erddap.marine.ie/erddap'
        p = 'tabledap'
        r = 'csv'

        param = [variable_name]

        # Generate parameter component of URL
        plist = ''
        for item in metadata + param:
            plist = plist+item+'%2C'
        plist = plist[0:-3]

        # Create dataframe for population
        df = pd.DataFrame()

        # Create ERDDAP url and load data for selected dates  
        url = s+"/"+p+"/"+dataset_id+"."+r+"?"+plist+"&time%3E="+start_date+"T00:00:00Z&time%3C"+end_date+"T23:59:59Z"
        df= pd.read_csv(url,index_col=[0],header=[0],skiprows=[1],parse_dates=True,infer_datetime_format=True)

        #Replace all NaN values with -5
        df = df.fillna(-5)

        return df,url
    except Exception as e:
        print(e)
        
# def create_download_link(df,Filename,title = "Download CSV file"):  
#     csv = df.to_csv(index = True)
#     b64 = base64.b64encode(csv.encode())
#     payload = b64.decode()
#     html = '<a download="{filename}" href="data:text/csv;base64,{payload}" target="_blank">{title}</a>'
#     html = html.format(payload=payload,title=title,filename=Filename+'.csv')
#     display(HTML(html))

# Select paramters

## Select station

In [8]:
dataset = 'IWBNetwork'
dataset_id = dataset
metadata = ['time', 'station_id']
columns = ['SeaTemperature']
default_start_date = datetime.datetime.strptime('2001-02-06', "%Y-%m-%d").date() 
default_end_date = datetime.datetime.strptime('2021-03-06', "%Y-%m-%d").date() 

# default_end_date = datetime.date.today()

print("Data for this dataset is only avaialable from "+str(default_start_date)+" to "+str(default_end_date)+ ". Please select appropriately!")
    
def load_data(v,sd,ed):
    try:
        global data_full
        global variable_name
        global start_date
        global end_date
        variable_name = v
        if  (default_start_date <= sd <= default_end_date) and (default_start_date <= ed <= default_end_date):
            start_date = str(sd)
            end_date = str(ed)
            data_full,url = fetchDataFromERDDAP(dataset_id, metadata, variable_name, start_date, end_date)
            print("Data Loaded!")
            print("If you wish to download raw dataset as csv file, please click below link:")
            #display(HTML("<a href="+url+">Download</a>"))
            datasetFilename = dataset_id+'_'+variable_name+'_'+start_date+'_'+end_date
            create_download_link(data_full, datasetFilename)
        else:
            raise TypeError("Please select dates within above mentioned date range!")
    except Exception as e:
        print(e)

# Parameter Widgets
## Variable
vtype = widgets.Dropdown(options = columns, description='Variable:')

## Start-End Dates

start_date = default_end_date - datetime.timedelta(days=183)

sdtype = widgets.DatePicker(
    description='Start Date',
    disabled=False,
    value = start_date
)
edtype = widgets.DatePicker(
    description='End Date',
    disabled=False,
    value = default_end_date
)

interactive(load_data,{'manual': True, 'manual_name':'Load'}, v=vtype,sd=sdtype,ed=edtype)

Data for this dataset is only avaialable from 2001-02-06 to 2001-03-06. Please select appropriately!


interactive(children=(Dropdown(description='Variable:', options=('SeaTemperature',), value='SeaTemperature'), …

In [20]:
dataset = 'IWBNetwork'
dataset_id = dataset
metadata = ['time', 'station_id']
columns = ['SeaTemperature']
variable_name = columns[0]
default_start_date = datetime.datetime.strptime('2001-02-06', "%Y-%m-%d").date() 
default_end_date = datetime.datetime.strptime('2001-02-06', "%Y-%m-%d").date() 


sd = default_start_date
ed = default_end_date
start_date = str(sd)
end_date = str(ed)
data_full,url = fetchDataFromERDDAP(dataset_id, metadata, variable_name, start_date, end_date)
data_full.head()

Unnamed: 0_level_0,station_id,SeaTemperature
time,Unnamed: 1_level_1,Unnamed: 2_level_1
2001-02-06 13:00:00+00:00,M1,9.0
2001-02-06 14:00:00+00:00,M1,9.0
2001-02-06 15:00:00+00:00,M1,9.0
2001-02-06 16:00:00+00:00,M1,9.0
2001-02-06 18:00:00+00:00,M1,9.0


In [37]:
repeatN = 10
def serialFetch(repeatN):
    results = []
    return [fetchDataFromERDDAP(dataset_id, metadata, variable_name, start_date, end_date) for _ in range(repeatN)]

serial = serialFetch(repeatN=repeatN)
assert isinstance(serial,list)
assert all([isinstance(el,tuple) for el in serial])
assert all([isinstance(el[0],pd.DataFrame) for el in serial])
assert all([isinstance(el[1],str) for el in serial])

In [38]:
def concurrentFetch(repeatN):
    results = []
    repeatN = 2
    with concurrent.futures.ProcessPoolExecutor() as pool:
        futr_results = [pool.submit(fetchDataFromERDDAP,dataset_id, metadata, variable_name, start_date, end_date) for _ in range(repeatN)]
        done_results = concurrent.futures.as_completed(futr_results)
        for _ in futr_results: 
            results.append(next(done_results).result())
    return results

concResults = concurrentFetch(repeatN=repeatN)
assert isinstance(concResults,list)
assert all([isinstance(el,tuple) for el in concResults])
assert all([isinstance(el[0],pd.DataFrame) for el in concResults])
assert all([isinstance(el[1],str) for el in concResults])
assert all([(pd.testing.assert_frame_equal(ser[0],conc[0]))==None and (ser[1] == conc[1]) for ser,conc in zip(serial,concResults)])

AssertionError: DataFrame.iloc[:, 0] (column name="station_id") are different

DataFrame.iloc[:, 0] (column name="station_id") values are different (46.41524 %)
[index]: [2001-02-06 13:00:00+00:00, 2001-02-06 14:00:00+00:00, 2001-02-06 15:00:00+00:00, 2001-02-06 16:00:00+00:00, 2001-02-06 18:00:00+00:00, 2001-02-06 20:00:00+00:00, 2001-02-06 21:00:00+00:00, 2001-02-06 22:00:00+00:00, 2001-02-06 23:00:00+00:00, 2001-02-07 00:00:00+00:00, 2001-02-07 01:00:00+00:00, 2001-02-07 03:00:00+00:00, 2001-02-07 05:00:00+00:00, 2001-02-07 06:00:00+00:00, 2001-02-07 07:00:00+00:00, 2001-02-07 08:00:00+00:00, 2001-02-07 09:00:00+00:00, 2001-02-07 10:00:00+00:00, 2001-02-07 13:00:00+00:00, 2001-02-07 14:00:00+00:00, 2001-02-07 15:00:00+00:00, 2001-02-07 16:00:00+00:00, 2001-02-07 17:00:00+00:00, 2001-02-07 19:00:00+00:00, 2001-02-07 20:00:00+00:00, 2001-02-07 21:00:00+00:00, 2001-02-07 22:00:00+00:00, 2001-02-07 23:00:00+00:00, 2001-02-08 00:00:00+00:00, 2001-02-08 01:00:00+00:00, 2001-02-08 02:00:00+00:00, 2001-02-08 03:00:00+00:00, 2001-02-08 05:00:00+00:00, 2001-02-08 06:00:00+00:00, 2001-02-08 07:00:00+00:00, 2001-02-08 08:00:00+00:00, 2001-02-08 09:00:00+00:00, 2001-02-08 10:00:00+00:00, 2001-02-08 11:00:00+00:00, 2001-02-08 12:00:00+00:00, 2001-02-08 13:00:00+00:00, 2001-02-08 14:00:00+00:00, 2001-02-08 15:00:00+00:00, 2001-02-08 16:00:00+00:00, 2001-02-08 17:00:00+00:00, 2001-02-08 19:00:00+00:00, 2001-02-08 20:00:00+00:00, 2001-02-08 21:00:00+00:00, 2001-02-08 22:00:00+00:00, 2001-02-08 23:00:00+00:00, 2001-02-09 00:00:00+00:00, 2001-02-09 01:00:00+00:00, 2001-02-09 11:00:00+00:00, 2001-02-09 12:00:00+00:00, 2001-02-09 13:00:00+00:00, 2001-02-09 14:00:00+00:00, 2001-02-09 15:00:00+00:00, 2001-02-09 16:00:00+00:00, 2001-02-09 17:00:00+00:00, 2001-02-09 18:00:00+00:00, 2001-02-09 19:00:00+00:00, 2001-02-09 20:00:00+00:00, 2001-02-09 21:00:00+00:00, 2001-02-09 23:00:00+00:00, 2001-02-10 00:00:00+00:00, 2001-02-10 02:00:00+00:00, 2001-02-10 03:00:00+00:00, 2001-02-10 04:00:00+00:00, 2001-02-10 05:00:00+00:00, 2001-02-10 06:00:00+00:00, 2001-02-10 07:00:00+00:00, 2001-02-10 08:00:00+00:00, 2001-02-10 10:00:00+00:00, 2001-02-10 11:00:00+00:00, 2001-02-10 12:00:00+00:00, 2001-02-10 13:00:00+00:00, 2001-02-10 14:00:00+00:00, 2001-02-10 15:00:00+00:00, 2001-02-10 16:00:00+00:00, 2001-02-10 18:00:00+00:00, 2001-02-10 20:00:00+00:00, 2001-02-10 21:00:00+00:00, 2001-02-10 22:00:00+00:00, 2001-02-10 23:00:00+00:00, 2001-02-11 00:00:00+00:00, 2001-02-11 01:00:00+00:00, 2001-02-11 02:00:00+00:00, 2001-02-11 03:00:00+00:00, 2001-02-11 05:00:00+00:00, 2001-02-11 06:00:00+00:00, 2001-02-11 07:00:00+00:00, 2001-02-11 08:00:00+00:00, 2001-02-11 09:00:00+00:00, 2001-02-11 10:00:00+00:00, 2001-02-11 11:00:00+00:00, 2001-02-11 12:00:00+00:00, 2001-02-11 13:00:00+00:00, 2001-02-11 14:00:00+00:00, 2001-02-11 15:00:00+00:00, 2001-02-11 17:00:00+00:00, ...]
[left]:  [M1, M1, M1, M1, M1, M1, M1, M1, M1, M1, M1, M1, M1, M1, M1, M1, M1, M1, M1, M1, M1, M1, M1, M1, M1, M1, M1, M1, M1, M1, M1, M1, M1, M1, M1, M1, M1, M1, M1, M1, M1, M1, M1, M1, M1, M1, M1, M1, M1, M1, M1, M1, M1, M1, M1, M1, M1, M1, M1, M1, M1, M1, M1, M1, M1, M1, M1, M1, M1, M1, M1, M1, M1, M1, M1, M1, M1, M1, M1, M1, M1, M1, M1, M1, M1, M1, M1, M1, M1, M1, M1, M1, M1, M1, M1, M1, M1, M1, M1, M1, ...]
[right]: [M1, M1, M1, M1, M1, M1, M1, M1, M1, M1, M1, M1, M1, M1, M1, M1, M1, M1, M1, M1, M1, M1, M1, M1, M1, M1, M1, M1, M1, M1, M1, M1, M1, M1, M1, M1, M1, M1, M1, M1, M1, M1, M1, M1, M1, M1, M1, M1, M1, M1, M1, M1, M1, M1, M1, M1, M1, M1, M1, M1, M1, M1, M1, M1, M1, M1, M1, M1, M1, M1, M1, M1, M1, M1, M1, M1, M1, M1, M1, M1, M1, M1, M1, M1, M1, M1, M1, M1, M1, M1, M1, M1, M1, M1, M1, M1, M1, M1, M1, M1, ...]

In [36]:
default_start_date = datetime.datetime.strptime('2001-02-06', "%Y-%m-%d").date() 
default_end_date = datetime.datetime.strptime('2011-02-06', "%Y-%m-%d").date() 


sd = default_start_date
ed = default_end_date
start_date = str(sd)
end_date = str(ed)
repeatN = 5
print("Serial")
%timeit -n 1 -r 1 serialFetch(repeatN)
print("Concurrent")
%timeit -n 1 -r 1 concurrentFetch(repeatN)

Serial
24 s ± 0 ns per loop (mean ± std. dev. of 1 run, 1 loop each)
Concurrent
5.27 s ± 0 ns per loop (mean ± std. dev. of 1 run, 1 loop each)


Unnamed: 0_level_0,station_id,SeaTemperature
time,Unnamed: 1_level_1,Unnamed: 2_level_1
2001-02-06 13:00:00+00:00,M1,9.0
2001-02-06 14:00:00+00:00,M1,9.0
2001-02-06 15:00:00+00:00,M1,9.0
2001-02-06 16:00:00+00:00,M1,9.0
2001-02-06 18:00:00+00:00,M1,9.0
2001-02-06 20:00:00+00:00,M1,9.0
2001-02-06 21:00:00+00:00,M1,9.0
2001-02-06 22:00:00+00:00,M1,9.0
2001-02-06 23:00:00+00:00,M1,9.0


Data for this dataset is only avaialable from 2001-02-06 to 2021-09-23. Please select appropriately!

In [None]:
# station_list = data_full.station_id.unique()
# scaption = widgets.Label(value='Select station to use:')
# stype = widgets.Dropdown(
#     options=station_list,
#     disabled=False,
# )

# def select_station(Station):
#     global station
#     station = Station
#     print("Selected Station: "+station)

# s = interactive(select_station,{'manual': True, 'manual_name':'Select'}, Station=stype)
# display(scaption,s)