# Gathers weather data 
## Based on https://github.com/IrishMarineInstitute/OGI_MIPilot

In [1]:
# Import necessary libraries

import datetime

import numpy as np
import os
import pandas as pd

In [2]:
typed = 'wave_spectral'
print(typed)

# Set ERDDAP server details
s = 'https://erddap.marine.ie/erddap'
p = 'tabledap'
r = 'csv'

# Set global variables for the parameters without 'buoy_id', 'latitude' and 'longitude' in the list
now = datetime.date.today()
now_string = now.strftime('%Y-%m-%d')

metadata = ['station_id',
            'time']

# Set variables based on data type (typed)

dataset_id = 'IWaveBNetwork_spectral'
syear = 2008    
master_params = ['PeakPeriod',
                  'PeakDirection',
                  'PeakSpread',
                  'SignificantWaveHeight',
                  'EnergyPeriod',
                  'MeanWavePeriod_Tm01',
                  'MeanWavePeriod_Tm02',
                    'qcflag']



# Generate parameter component of URL
plist = ''
for item in metadata + master_params:
    plist = plist+item+'%2C'
plist = plist[0:-3]

# Create dataframe for population
df = pd.DataFrame()

# Iterate by year to reduce risk of time out on large time-series
years = range(syear,now.year)
for year in years:    
    url = s+"/"+p+"/"+dataset_id+"."+r+"?"+plist+"&time%3E="+str(year)+"-01-01T00:00:00Z&time%3C"+str(year+1)+"-01-01T00:00:00Z"
    dfbyyear = pd.read_csv(url,index_col=1,header=[0],skiprows=[1],parse_dates=True,infer_datetime_format=True)
    df = pd.concat([df,dfbyyear])
    print("Downloaded %s" % (year))

# Final call for data from start of current year upto midnight of the current day
url = s+"/"+p+"/"+dataset_id+"."+r+"?"+plist+"&time%3E="+str(now.year)+"-01-01T00:00:00Z&time%3C"+now_string+"T00:00:00Z"
dfbyyear = pd.read_csv(url,index_col=1,header=[0],skiprows=[1],parse_dates=True,infer_datetime_format=True)
df = pd.concat([df,dfbyyear])
print("Downloaded %s" % (str(now.year)))

# Make a copy of the unaltered data download
data_full_wave_spectral = df.copy()
print("Full resolution data downloaded. Available as 'data_full_wave_spectral'.")
df.to_csv('full_data/wave_spectral.csv', encoding='utf-8')

# Utilise quality control flags to clean data set
# Code to be added...



# Take a working copy of the downloaded data
data = df.copy()

# Add columns for date variable
data['Date'] = data.index.date

# Get a count of data points grouped by station and date
data_summ = data.groupby(['station_id','Date']).count().reset_index(level=['station_id','Date'])

# Create master availability dataframe to hold count converted to percentage of expected data points per day
data_avail = pd.DataFrame()  

# Loop through each station due to different data resolutions and calculate percentages
for stn in data_summ.station_id.unique().tolist():
    data_stn = data_summ[data_summ['station_id']==stn].copy()

    # Set the expected number of data points per day for a buoy type or station
    if typed == 'weather' or stn == 'Westwave MK4':
        res=24
    else:
        res=48

    # Convert counts to percentage
    data_stn.loc[:,master_params] = data_stn.loc[:,master_params]/res*100
    

    # Expand date range to cover full months.
    # Enables accurate calculation of monthly perentage return from the daily data when plotting
    data_fulldates = pd.DataFrame(index = pd.date_range(data_stn.Date.min() - datetime.timedelta(days=data_stn.Date.min().day-1), 
                                                      data_stn.Date.max()))

    # Add date factors to faciliate plotting
    data_fulldates['Date'] = data_fulldates.index.date
    data_fulldates['Year'] = data_fulldates.index.year
    data_fulldates['Month'] = data_fulldates.index.month
    data_fulldates['DOY'] = data_fulldates.index.dayofyear
    
    # Merge individual station dataframe into master availability dataframe and fill blanks dates with zero
    data_fulldates = data_fulldates.merge(data_stn, how='outer', left_on='Date', right_on='Date').fillna(0)
    # Set station
    data_fulldates.loc[:,'station_id'] = stn
    # Add data for the station to the master availability data frame
    data_avail = pd.concat([data_avail,data_fulldates])

# Set indices and tidy up dataframe
data_avail = data_avail.set_index(['station_id', 'Date','Year','Month','DOY'])

if typed != 'weather':
    qc = 'qcflag'
else:
    qc = 'QC_Flag'

data_avail = data_avail.drop(['qcflag'], axis=1)
data_avail.columns = pd.MultiIndex.from_product([data_avail.columns, ['avail']])

print("Daily availability generated. Available as 'data_avail'.")
data_avail.to_csv('full_data/wave_spectral_availability.csv', encoding='utf-8')

wave_spectral
Downloaded 2008
Downloaded 2009
Downloaded 2010
Downloaded 2011
Downloaded 2012
Downloaded 2013
Downloaded 2014
Downloaded 2015
Downloaded 2016
Downloaded 2017
Downloaded 2018
Downloaded 2019
Downloaded 2020
Downloaded 2021
Full resolution data downloaded. Available as 'data_full_wave_spectral'.
Daily availability generated. Available as 'data_avail'.


In [3]:
typed = 'wave_zero'
print(typed)

# Set ERDDAP server details
s = 'https://erddap.marine.ie/erddap'
p = 'tabledap'
r = 'csv'

# Set global variables for the parameters without 'buoy_id', 'latitude' and 'longitude' in the list
now = datetime.date.today()
now_string = now.strftime('%Y-%m-%d')

metadata = ['station_id',
            'time']

# Set variables based on data type (typed)


dataset_id = 'IWaveBNetwork_zerocrossing'
syear = 2008    
master_params = ['Hmax',
                  'HmaxPeriod',
                  'Havg',
                  'Tavg',
                  'qcflag']



# Generate parameter component of URL
plist = ''
for item in metadata + master_params:
    plist = plist+item+'%2C'
plist = plist[0:-3]

# Create dataframe for population
df = pd.DataFrame()

# Iterate by year to reduce risk of time out on large time-series
years = range(syear,now.year)
for year in years:    
    url = s+"/"+p+"/"+dataset_id+"."+r+"?"+plist+"&time%3E="+str(year)+"-01-01T00:00:00Z&time%3C"+str(year+1)+"-01-01T00:00:00Z"
    dfbyyear = pd.read_csv(url,index_col=1,header=[0],skiprows=[1],parse_dates=True,infer_datetime_format=True)
    df = pd.concat([df,dfbyyear])
    print("Downloaded %s" % (year))

# Final call for data from start of current year upto midnight of the current day
url = s+"/"+p+"/"+dataset_id+"."+r+"?"+plist+"&time%3E="+str(now.year)+"-01-01T00:00:00Z&time%3C"+now_string+"T00:00:00Z"
dfbyyear = pd.read_csv(url,index_col=1,header=[0],skiprows=[1],parse_dates=True,infer_datetime_format=True)
df = pd.concat([df,dfbyyear])
print("Downloaded %s" % (str(now.year)))

# Make a copy of the unaltered data download
data_full_wave_zero = df.copy()
print("Full resolution data downloaded. Available as 'data_full_wave_zero'.")
df.to_csv('full_data/wave_zero.csv', encoding='utf-8')


# Utilise quality control flags to clean data set
# Code to be added...



# Take a working copy of the downloaded data
data = df.copy()

# Add columns for date variable
data['Date'] = data.index.date

# Get a count of data points grouped by station and date
data_summ = data.groupby(['station_id','Date']).count().reset_index(level=['station_id','Date'])

# Create master availability dataframe to hold count converted to percentage of expected data points per day
data_avail = pd.DataFrame()  

# Loop through each station due to different data resolutions and calculate percentages
for stn in data_summ.station_id.unique().tolist():
    data_stn = data_summ[data_summ['station_id']==stn].copy()

    # Set the expected number of data points per day for a buoy type or station
    if typed == 'weather' or stn == 'Westwave MK4':
        res=24
    else:
        res=48

    # Convert counts to percentage
    data_stn.loc[:,master_params] = data_stn.loc[:,master_params]/res*100
    

    # Expand date range to cover full months.
    # Enables accurate calculation of monthly perentage return from the daily data when plotting
    data_fulldates = pd.DataFrame(index = pd.date_range(data_stn.Date.min() - datetime.timedelta(days=data_stn.Date.min().day-1), 
                                                      data_stn.Date.max()))

    # Add date factors to faciliate plotting
    data_fulldates['Date'] = data_fulldates.index.date
    data_fulldates['Year'] = data_fulldates.index.year
    data_fulldates['Month'] = data_fulldates.index.month
    data_fulldates['DOY'] = data_fulldates.index.dayofyear
    
    # Merge individual station dataframe into master availability dataframe and fill blanks dates with zero
    data_fulldates = data_fulldates.merge(data_stn, how='outer', left_on='Date', right_on='Date').fillna(0)
    # Set station
    data_fulldates.loc[:,'station_id'] = stn
    # Add data for the station to the master availability data frame
    data_avail = pd.concat([data_avail,data_fulldates])

# Set indices and tidy up dataframe
data_avail = data_avail.set_index(['station_id', 'Date','Year','Month','DOY'])


qc = 'qcflag'


data_avail = data_avail.drop(['qcflag'], axis=1)
data_avail.columns = pd.MultiIndex.from_product([data_avail.columns, ['avail']])

print("Daily availability generated. Available as 'data_avail'.")
data_avail.to_csv('full_data/wave_zero_availability.csv', encoding='utf-8')

wave_zero
Downloaded 2008
Downloaded 2009
Downloaded 2010
Downloaded 2011
Downloaded 2012
Downloaded 2013
Downloaded 2014
Downloaded 2015
Downloaded 2016
Downloaded 2017
Downloaded 2018
Downloaded 2019
Downloaded 2020
Downloaded 2021
Full resolution data downloaded. Available as 'data_full_wave_zero'.
Daily availability generated. Available as 'data_avail'.


In [4]:
typed = 'weather'
print(typed)

# Set ERDDAP server details
s = 'https://erddap.marine.ie/erddap'
p = 'tabledap'
r = 'csv'

# Set global variables for the parameters without 'buoy_id', 'latitude' and 'longitude' in the list
now = datetime.date.today()
now_string = now.strftime('%Y-%m-%d')

metadata = ['station_id',
            'time']

# Set variables based on data type (typed)


dataset_id = 'IWBNetwork'
syear = 2001
master_params = ['AtmosphericPressure',
                    'WindDirection',
                    'WindSpeed',
                    'Gust',
                    'WaveHeight',
                    'WavePeriod',
                    'MeanWaveDirection',
                    'Hmax',
                    'AirTemperature',
                    'DewPoint',
                    'SeaTemperature',
                    'RelativeHumidity',
                    'QC_Flag']

# Generate parameter component of URL
plist = ''
for item in metadata + master_params:
    plist = plist+item+'%2C'
plist = plist[0:-3]

# Create dataframe for population
df = pd.DataFrame()

# Iterate by year to reduce risk of time out on large time-series
years = range(syear,now.year)
for year in years:    
    url = s+"/"+p+"/"+dataset_id+"."+r+"?"+plist+"&time%3E="+str(year)+"-01-01T00:00:00Z&time%3C"+str(year+1)+"-01-01T00:00:00Z"
    dfbyyear = pd.read_csv(url,index_col=1,header=[0],skiprows=[1],parse_dates=True,infer_datetime_format=True)
    df = pd.concat([df,dfbyyear])
    print("Downloaded %s" % (year))

# Final call for data from start of current year upto midnight of the current day
url = s+"/"+p+"/"+dataset_id+"."+r+"?"+plist+"&time%3E="+str(now.year)+"-01-01T00:00:00Z&time%3C"+now_string+"T00:00:00Z"
dfbyyear = pd.read_csv(url,index_col=1,header=[0],skiprows=[1],parse_dates=True,infer_datetime_format=True)
df = pd.concat([df,dfbyyear])
print("Downloaded %s" % (str(now.year)))

# Make a copy of the unaltered data download
data_full_wave_zero = df.copy()
print("Full resolution data downloaded. Available as 'data_full_weather'.")
df.to_csv('full_data/weather.csv', encoding='utf-8')

# Utilise quality control flags to clean data set
# Code to be added...



# Take a working copy of the downloaded data
data = df.copy()

# Add columns for date variable
data['Date'] = data.index.date

# Get a count of data points grouped by station and date
data_summ = data.groupby(['station_id','Date']).count().reset_index(level=['station_id','Date'])

# Create master availability dataframe to hold count converted to percentage of expected data points per day
data_avail = pd.DataFrame()  

# Loop through each station due to different data resolutions and calculate percentages
for stn in data_summ.station_id.unique().tolist():
    data_stn = data_summ[data_summ['station_id']==stn].copy()

    # Set the expected number of data points per day for a buoy type or station
    if typed == 'weather' or stn == 'Westwave MK4':
        res=24
    else:
        res=48

    # Convert counts to percentage
    data_stn.loc[:,master_params] = data_stn.loc[:,master_params]/res*100
    

    # Expand date range to cover full months.
    # Enables accurate calculation of monthly perentage return from the daily data when plotting
    data_fulldates = pd.DataFrame(index = pd.date_range(data_stn.Date.min() - datetime.timedelta(days=data_stn.Date.min().day-1), 
                                                      data_stn.Date.max()))

    # Add date factors to faciliate plotting
    data_fulldates['Date'] = data_fulldates.index.date
    data_fulldates['Year'] = data_fulldates.index.year
    data_fulldates['Month'] = data_fulldates.index.month
    data_fulldates['DOY'] = data_fulldates.index.dayofyear
    
    # Merge individual station dataframe into master availability dataframe and fill blanks dates with zero
    data_fulldates = data_fulldates.merge(data_stn, how='outer', left_on='Date', right_on='Date').fillna(0)
    # Set station
    data_fulldates.loc[:,'station_id'] = stn
    # Add data for the station to the master availability data frame
    data_avail = pd.concat([data_avail,data_fulldates])

# Set indices and tidy up dataframe
data_avail = data_avail.set_index(['station_id', 'Date','Year','Month','DOY'])



data_avail = data_avail.drop(['QC_Flag'], axis=1)
data_avail.columns = pd.MultiIndex.from_product([data_avail.columns, ['avail']])

print("Daily availability generated. Available as 'data_avail'.")
data_avail.to_csv('full_data/weather_availability.csv', encoding='utf-8')


weather
Downloaded 2001
Downloaded 2002
Downloaded 2003
Downloaded 2004
Downloaded 2005
Downloaded 2006
Downloaded 2007
Downloaded 2008
Downloaded 2009
Downloaded 2010
Downloaded 2011
Downloaded 2012
Downloaded 2013
Downloaded 2014
Downloaded 2015
Downloaded 2016
Downloaded 2017
Downloaded 2018
Downloaded 2019
Downloaded 2020
Downloaded 2021
Full resolution data downloaded. Available as 'data_full_weather'.
Daily availability generated. Available as 'data_avail'.
