In [None]:
# import the h5pyd package as h5py
## resource for code (https://github.com/NREL/hsds-examples/tree/master)
import h5pyd as h5py

## other helpful packages
import numpy as np
import pandas as pd

In [5]:
# inspect the contents of the directory
folder = h5py.Folder("/nrel/US_wave/", bucket="nrel-pds-hsds")
list(folder)

['Alaska', 'Atlantic', 'Hawaii', 'West_Coast', 'maine', 'virtual_buoy']

In [6]:
atlantic = h5py.Folder("/nrel/US_wave/Atlantic/", bucket="nrel-pds-hsds")
list(atlantic)

['Atlantic_wave_1979.h5',
 'Atlantic_wave_1980.h5',
 'Atlantic_wave_1981.h5',
 'Atlantic_wave_1982.h5',
 'Atlantic_wave_1983.h5',
 'Atlantic_wave_1984.h5',
 'Atlantic_wave_1985.h5',
 'Atlantic_wave_1986.h5',
 'Atlantic_wave_1987.h5',
 'Atlantic_wave_1988.h5',
 'Atlantic_wave_1989.h5',
 'Atlantic_wave_1990.h5',
 'Atlantic_wave_1991.h5',
 'Atlantic_wave_1992.h5',
 'Atlantic_wave_1993.h5',
 'Atlantic_wave_1994.h5',
 'Atlantic_wave_1995.h5',
 'Atlantic_wave_1996.h5',
 'Atlantic_wave_1997.h5',
 'Atlantic_wave_1998.h5',
 'Atlantic_wave_1999.h5',
 'Atlantic_wave_2000.h5',
 'Atlantic_wave_2001.h5',
 'Atlantic_wave_2002.h5',
 'Atlantic_wave_2003.h5',
 'Atlantic_wave_2004.h5',
 'Atlantic_wave_2005.h5',
 'Atlantic_wave_2006.h5',
 'Atlantic_wave_2007.h5',
 'Atlantic_wave_2008.h5',
 'Atlantic_wave_2009.h5',
 'Atlantic_wave_2010.h5']

In [None]:
# ***note: code does not work on computer, but hsls should list contents within the Puerto Rico folder
! hsls -H -v --bucket nrel-pds-hsds /nrel/nsrdb/puerto_rico/

In [7]:
# datasets from WindToolkit (https://github.com/NREL/hsds-examples/blob/master/datasets/wtk-us.md)
## data exist between 2001 and 2020
# f01 = h5py.File("/nrel/wtk/pr100/5min/puerto_rico_wind_5min_2001.h5")
# f02 = h5py.File("/nrel/wtk/pr100/5min/puerto_rico_wind_5min_2002.h5")
# f03 = h5py.File("/nrel/wtk/pr100/5min/puerto_rico_wind_5min_2003.h5")
# f04 = h5py.File("/nrel/wtk/pr100/5min/puerto_rico_wind_5min_2004.h5")
# f05 = h5py.File("/nrel/wtk/pr100/5min/puerto_rico_wind_5min_2005.h5")
# f06 = h5py.File("/nrel/wtk/pr100/5min/puerto_rico_wind_5min_2006.h5")
f07 = h5py.File("/nrel/US_wave/Atlantic/Atlantic_wave_2007.h5")
# f08 = h5py.File("/nrel/wtk/pr100/5min/puerto_rico_wind_5min_2008.h5")
# f09 = h5py.File("/nrel/wtk/pr100/5min/puerto_rico_wind_5min_2009.h5")
# f10 = h5py.File("/nrel/wtk/pr100/5min/puerto_rico_wind_5min_2010.h5")

In [8]:
# inspect the datasets within each year's file
list(f07)

['coordinates',
 'directionality_coefficient',
 'energy_period',
 'maximum_energy_direction',
 'mean_absolute_period',
 'mean_wave_direction',
 'mean_zero-crossing_period',
 'meta',
 'omni-directional_wave_power',
 'peak_period',
 'significant_wave_height',
 'spectral_width',
 'time_index',
 'water_depth']

The data important for this work are: coordinates (for each site) and wind speed at 160m

In [9]:
# inspect the metadata
meta07 = pd.DataFrame(f07['meta'][...])
meta07.head()

Unnamed: 0,latitude,longitude,water_depth,timezone,distance,jurisdiction
0,39.564098,-75.976997,1.3426,-5,647.230042,b'Maryland'
1,39.565899,-75.977997,1.3118,-5,432.485596,b'Maryland'
2,39.566101,-75.980003,1.2325,-5,320.27359,b'Maryland'
3,39.562302,-75.976997,1.3392,-5,685.832764,b'Maryland'
4,39.5639,-75.978996,1.302,-5,574.726929,b'Maryland'


In [11]:
unique_values_set = set(meta07['jurisdiction'])
list(unique_values_set)

[b'Federal',
 b'Massachusetts',
 b'Connecticut',
 b'Maine',
 b'New York',
 b'New Hampshire',
 b'Delaware',
 b'Canada',
 b'Rhode Island',
 b'Virginia',
 b'Florida',
 b'Bahamas',
 b'Georgia',
 b'South Carolina',
 b'Maryland',
 b'New Jersey',
 b'North Carolina']

In [None]:
# list the attributes for the datasets and determine the scale factor and units
list(f01['windspeed_160m'].attrs)

# inspect shape, type, etc.
print(f01['windspeed_160m'].shape)

## display the scale factor (100) and units (meters per second)
print(f01['windspeed_160m'].attrs['scale_factor'])
print(f01['windspeed_160m'].attrs['units'])

# set the scale factor value
sf = f01['windspeed_160m'].attrs['scale_factor']
print(sf)

In [None]:
# Retrieve the wind speed data for each year

%time

wind01 = pd.DataFrame(f01['windspeed_160m'][...]) / sf
wind02 = pd.DataFrame(f02['windspeed_160m'][...]) / sf
wind03 = pd.DataFrame(f03['windspeed_160m'][...]) / sf
wind04 = pd.DataFrame(f04['windspeed_160m'][...]) / sf
wind05 = pd.DataFrame(f05['windspeed_160m'][...]) / sf
wind06 = pd.DataFrame(f06['windspeed_160m'][...]) / sf
wind07 = pd.DataFrame(f07['windspeed_160m'][...]) / sf
wind08 = pd.DataFrame(f08['windspeed_160m'][...]) / sf
wind09 = pd.DataFrame(f09['windspeed_160m'][...]) / sf
wind10 = pd.DataFrame(f10['windspeed_160m'][...]) / sf
wind11 = pd.DataFrame(f11['windspeed_160m'][...]) / sf
wind12 = pd.DataFrame(f12['windspeed_160m'][...]) / sf
wind13 = pd.DataFrame(f13['windspeed_160m'][...]) / sf
wind14 = pd.DataFrame(f14['windspeed_160m'][...]) / sf
wind15 = pd.DataFrame(f15['windspeed_160m'][...]) / sf
wind16 = pd.DataFrame(f16['windspeed_160m'][...]) / sf
wind17 = pd.DataFrame(f17['windspeed_160m'][...]) / sf
wind18 = pd.DataFrame(f18['windspeed_160m'][...]) / sf
wind19 = pd.DataFrame(f19['windspeed_160m'][...]) / sf
wind20 = pd.DataFrame(f20['windspeed_160m'][...]) / sf

In [None]:
# # save wind data to csv
wind01.to_csv('pr_wind01.csv')
wind02.to_csv('pr_wind02.csv')
wind03.to_csv('pr_wind03.csv')
wind04.to_csv('pr_wind04.csv')
wind05.to_csv('pr_wind05.csv')
wind06.to_csv('pr_wind06.csv')
wind07.to_csv('pr_wind07.csv')
wind08.to_csv('pr_wind08.csv')
wind09.to_csv('pr_wind09.csv')
wind10.to_csv('pr_wind10.csv')
wind11.to_csv('pr_wind11.csv')
wind12.to_csv('pr_wind12.csv')
wind13.to_csv('pr_wind13.csv')
wind14.to_csv('pr_wind14.csv')
wind15.to_csv('pr_wind15.csv')
wind16.to_csv('pr_wind16.csv')
wind17.to_csv('pr_wind17.csv')
wind18.to_csv('pr_wind18.csv')
wind19.to_csv('pr_wind19.csv')
wind20.to_csv('pr_wind20.csv')

In [None]:
# Retrieve the coordinates data for each year

%time

coords01 = pd.DataFrame(f01['coordinates'][...])
coords02 = pd.DataFrame(f02['coordinates'][...])
coords03 = pd.DataFrame(f03['coordinates'][...])
coords04 = pd.DataFrame(f04['coordinates'][...])
coords05 = pd.DataFrame(f05['coordinates'][...])
coords06 = pd.DataFrame(f06['coordinates'][...])
coords07 = pd.DataFrame(f07['coordinates'][...])
coords08 = pd.DataFrame(f08['coordinates'][...])
coords09 = pd.DataFrame(f09['coordinates'][...])
coords10 = pd.DataFrame(f10['coordinates'][...])
coords11 = pd.DataFrame(f11['coordinates'][...])
coords12 = pd.DataFrame(f12['coordinates'][...])
coords13 = pd.DataFrame(f13['coordinates'][...])
coords14 = pd.DataFrame(f14['coordinates'][...])
coords15 = pd.DataFrame(f15['coordinates'][...])
coords16 = pd.DataFrame(f16['coordinates'][...])
coords17 = pd.DataFrame(f17['coordinates'][...])
coords18 = pd.DataFrame(f18['coordinates'][...])
coords19 = pd.DataFrame(f19['coordinates'][...])
coords20 = pd.DataFrame(f20['coordinates'][...])

# # inspect dataset shapes to verify they are the same dimensions
# print(coords01)
# coords01, coords02, coords03, coords04, coords05, coords06, coords07, coords08, coords09, coords10, \
#     coords11, coords12, coords13, coords14, coords15, coords16, coords17, coords18, coords19, coords20 

coords01 = coords01.rename(columns={0: "lat", 1: "long"})
coords02 = coords02.rename(columns={0: "lat", 1: "long"})
coords03 = coords03.rename(columns={0: "lat", 1: "long"})
coords04 = coords04.rename(columns={0: "lat", 1: "long"})
coords05 = coords05.rename(columns={0: "lat", 1: "long"})
coords06 = coords06.rename(columns={0: "lat", 1: "long"})
coords07 = coords07.rename(columns={0: "lat", 1: "long"})
coords08 = coords08.rename(columns={0: "lat", 1: "long"})
coords09 = coords09.rename(columns={0: "lat", 1: "long"})
coords10 = coords10.rename(columns={0: "lat", 1: "long"})
coords11 = coords11.rename(columns={0: "lat", 1: "long"})
coords12 = coords12.rename(columns={0: "lat", 1: "long"})
coords13 = coords13.rename(columns={0: "lat", 1: "long"})
coords14 = coords14.rename(columns={0: "lat", 1: "long"})
coords15 = coords15.rename(columns={0: "lat", 1: "long"})
coords16 = coords16.rename(columns={0: "lat", 1: "long"})
coords17 = coords17.rename(columns={0: "lat", 1: "long"})
coords18 = coords18.rename(columns={0: "lat", 1: "long"})
coords19 = coords19.rename(columns={0: "lat", 1: "long"})
coords20 = coords20.rename(columns={0: "lat", 1: "long"})

print(coords01.head(), coords02.head())


In [None]:
# export the coordinate data as csv files

coords01.to_csv("wind01_coords.csv")
coords02.to_csv("wind02_coords.csv")
coords03.to_csv("wind03_coords.csv")
coords04.to_csv("wind04_coords.csv")
coords05.to_csv("wind05_coords.csv")
coords06.to_csv("wind06_coords.csv")
coords07.to_csv("wind07_coords.csv")
coords08.to_csv("wind08_coords.csv")
coords09.to_csv("wind09_coords.csv")
coords10.to_csv("wind10_coords.csv")
coords11.to_csv("wind11_coords.csv")
coords12.to_csv("wind12_coords.csv")
coords13.to_csv("wind13_coords.csv")
coords14.to_csv("wind14_coords.csv")
coords15.to_csv("wind15_coords.csv")
coords16.to_csv("wind16_coords.csv")
coords17.to_csv("wind17_coords.csv")
coords18.to_csv("wind18_coords.csv")
coords19.to_csv("wind19_coords.csv")
coords20.to_csv("wind20_coords.csv")


In [None]:
# annual averages of each site
avg01 = np.transpose(pd.DataFrame(wind01.mean(axis = 0)))
avg02 = np.transpose(pd.DataFrame(wind02.mean(axis = 0)))
avg03 = np.transpose(pd.DataFrame(wind03.mean(axis = 0)))
avg04 = np.transpose(pd.DataFrame(wind04.mean(axis = 0)))
avg05 = np.transpose(pd.DataFrame(wind05.mean(axis = 0)))
avg06 = np.transpose(pd.DataFrame(wind06.mean(axis = 0)))
avg07 = np.transpose(pd.DataFrame(wind07.mean(axis = 0)))
avg08 = np.transpose(pd.DataFrame(wind08.mean(axis = 0)))
avg09 = np.transpose(pd.DataFrame(wind09.mean(axis = 0)))
avg10 = np.transpose(pd.DataFrame(wind10.mean(axis = 0)))
avg11 = np.transpose(pd.DataFrame(wind11.mean(axis = 0)))
avg12 = np.transpose(pd.DataFrame(wind12.mean(axis = 0)))
avg13 = np.transpose(pd.DataFrame(wind13.mean(axis = 0)))
avg14 = np.transpose(pd.DataFrame(wind14.mean(axis = 0)))
avg15 = np.transpose(pd.DataFrame(wind15.mean(axis = 0)))
avg16 = np.transpose(pd.DataFrame(wind16.mean(axis = 0)))
avg17 = np.transpose(pd.DataFrame(wind17.mean(axis = 0)))
avg18 = np.transpose(pd.DataFrame(wind18.mean(axis = 0)))
avg19 = np.transpose(pd.DataFrame(wind19.mean(axis = 0)))
avg20 = np.transpose(pd.DataFrame(wind20.mean(axis = 0)))

print(avg01.head())
print(avg01.shape)

In [None]:
# export the averaged annual data as CSV files

avg01.to_csv("wind01_avg.csv")
avg02.to_csv("wind02_avg.csv")
avg03.to_csv("wind03_avg.csv")
avg04.to_csv("wind04_avg.csv")
avg05.to_csv("wind05_avg.csv")
avg06.to_csv("wind06_avg.csv")
avg07.to_csv("wind07_avg.csv")
avg08.to_csv("wind08_avg.csv")
avg09.to_csv("wind09_avg.csv")
avg10.to_csv("wind10_avg.csv")
avg11.to_csv("wind11_avg.csv")
avg12.to_csv("wind12_avg.csv")
avg13.to_csv("wind13_avg.csv")
avg14.to_csv("wind14_avg.csv")
avg15.to_csv("wind15_avg.csv")
avg16.to_csv("wind16_avg.csv")
avg17.to_csv("wind17_avg.csv")
avg18.to_csv("wind18_avg.csv")
avg19.to_csv("wind19_avg.csv")
avg20.to_csv("wind20_avg.csv")

# # export the average 20-year datasets as CSVs
wind_avg.to_csv("wind_avg.csv")
twenty_wind_avg.to_csv("wind_01_20_avg.csv")

In [None]:
coords01_csv = pd.read_csv('/Users/Brian/Documents/Personal/Jobs/Past/CSS/NOAA/pr_wind_analysis/data/b_coordinates/wind01_coords.csv', index_col=0)

avg01_csv = pd.read_csv('/Users/Brian/Documents/Personal/Jobs/Past/CSS/NOAA/pr_wind_analysis/data/c_annual_average/wind01_avg.csv', index_col=0)
avg02_csv = pd.read_csv('/Users/Brian/Documents/Personal/Jobs/Past/CSS/NOAA/pr_wind_analysis/data/c_annual_average/wind02_avg.csv', index_col=0)
avg03_csv = pd.read_csv('/Users/Brian/Documents/Personal/Jobs/Past/CSS/NOAA/pr_wind_analysis/data/c_annual_average/wind03_avg.csv', index_col=0)
avg04_csv = pd.read_csv('/Users/Brian/Documents/Personal/Jobs/Past/CSS/NOAA/pr_wind_analysis/data/c_annual_average/wind04_avg.csv', index_col=0)
avg05_csv = pd.read_csv('/Users/Brian/Documents/Personal/Jobs/Past/CSS/NOAA/pr_wind_analysis/data/c_annual_average/wind05_avg.csv', index_col=0)
avg06_csv = pd.read_csv('/Users/Brian/Documents/Personal/Jobs/Past/CSS/NOAA/pr_wind_analysis/data/c_annual_average/wind06_avg.csv', index_col=0)
avg07_csv = pd.read_csv('/Users/Brian/Documents/Personal/Jobs/Past/CSS/NOAA/pr_wind_analysis/data/c_annual_average/wind07_avg.csv', index_col=0)
avg08_csv = pd.read_csv('/Users/Brian/Documents/Personal/Jobs/Past/CSS/NOAA/pr_wind_analysis/data/c_annual_average/wind08_avg.csv', index_col=0)
avg09_csv = pd.read_csv('/Users/Brian/Documents/Personal/Jobs/Past/CSS/NOAA/pr_wind_analysis/data/c_annual_average/wind09_avg.csv', index_col=0)
avg10_csv = pd.read_csv('/Users/Brian/Documents/Personal/Jobs/Past/CSS/NOAA/pr_wind_analysis/data/c_annual_average/wind10_avg.csv', index_col=0)
avg11_csv = pd.read_csv('/Users/Brian/Documents/Personal/Jobs/Past/CSS/NOAA/pr_wind_analysis/data/c_annual_average/wind11_avg.csv', index_col=0)
avg12_csv = pd.read_csv('/Users/Brian/Documents/Personal/Jobs/Past/CSS/NOAA/pr_wind_analysis/data/c_annual_average/wind12_avg.csv', index_col=0)
avg13_csv = pd.read_csv('/Users/Brian/Documents/Personal/Jobs/Past/CSS/NOAA/pr_wind_analysis/data/c_annual_average/wind13_avg.csv', index_col=0)
avg14_csv = pd.read_csv('/Users/Brian/Documents/Personal/Jobs/Past/CSS/NOAA/pr_wind_analysis/data/c_annual_average/wind14_avg.csv', index_col=0)
avg15_csv = pd.read_csv('/Users/Brian/Documents/Personal/Jobs/Past/CSS/NOAA/pr_wind_analysis/data/c_annual_average/wind15_avg.csv', index_col=0)
avg16_csv = pd.read_csv('/Users/Brian/Documents/Personal/Jobs/Past/CSS/NOAA/pr_wind_analysis/data/c_annual_average/wind16_avg.csv', index_col=0)
avg17_csv = pd.read_csv('/Users/Brian/Documents/Personal/Jobs/Past/CSS/NOAA/pr_wind_analysis/data/c_annual_average/wind17_avg.csv', index_col=0)
avg18_csv = pd.read_csv('/Users/Brian/Documents/Personal/Jobs/Past/CSS/NOAA/pr_wind_analysis/data/c_annual_average/wind18_avg.csv', index_col=0)
avg19_csv = pd.read_csv('/Users/Brian/Documents/Personal/Jobs/Past/CSS/NOAA/pr_wind_analysis/data/c_annual_average/wind19_avg.csv', index_col=0)
avg20_csv = pd.read_csv('/Users/Brian/Documents/Personal/Jobs/Past/CSS/NOAA/pr_wind_analysis/data/c_annual_average/wind20_avg.csv', index_col=0)


In [None]:
print(coords01_csv, avg01_csv)

In [None]:
wind_avg = pd.concat([avg01_csv,avg02_csv,avg03_csv,avg04_csv,avg05_csv,avg06_csv,avg07_csv,avg08_csv,avg09_csv,avg10_csv,avg11_csv,avg12_csv,avg13_csv,avg14_csv,avg15_csv,avg16_csv,avg17_csv,avg18_csv,avg19_csv,avg20_csv])
twenty_wind_avg = pd.DataFrame(wind_avg.mean(axis=0))

# wind_avg = pd.concat([wind_avg, year_wind], axis=1)
print(twenty_wind_avg, coords01_csv)

In [None]:
print(twenty_wind_avg.dtypes, coords01_csv.dtypes)

In [None]:
# clean up the averaged 20-year dataset

# add the coordinates to the 20-year dataset
twenty_wind_coords = pd.concat([twenty_wind_avg, coords01_csv])

# # rename the first column to have it be mean wind speed
twenty_wind_coords = twenty_wind_coords.rename(columns = {0: "mean_wind"})

# # rename the index name to indicate it is for the sites
twenty_wind_coords.index.names = ['site']

# # inspect the top of the dataset
print(twenty_wind_coords)

# # export the data as a CSV file
twenty_wind_coords.to_csv("wind_01_20_avg_coords.csv")

In [None]:
# arr01 = arr01.mean(axis = 0)
# arr02 = arr02.mean(axis = 0)
# arr03 = arr03.mean(axis = 0)
# arr04 = arr04.mean(axis = 0)
# arr05 = arr05.mean(axis = 0)
# arr06 = arr06.mean(axis = 0)
# arr07 = arr07.mean(axis = 0)
# arr08 = arr08.mean(axis = 0)
# arr09 = arr09.mean(axis = 0)
# arr10 = arr10.mean(axis = 0)
# arr11 = arr11.mean(axis = 0)
# arr12 = arr12.mean(axis = 0)
# arr13 = arr13.mean(axis = 0)
# arr14 = arr14.mean(axis = 0)
# arr15 = arr15.mean(axis = 0)
# arr16 = arr16.mean(axis = 0)
# arr17 = arr17.mean(axis = 0)
# arr18 = arr18.mean(axis = 0)
# arr19 = arr19.mean(axis = 0)
# arr20 = arr20.mean(axis = 0)

In [None]:
# avg01 = np.transpose(pd.DataFrame(np.array(arr01)))
# avg02 = np.transpose(pd.DataFrame(np.array(arr02)))
# avg03 = np.transpose(pd.DataFrame(np.array(arr03)))
# avg04 = np.transpose(pd.DataFrame(np.array(arr04)))
# avg05 = np.transpose(pd.DataFrame(np.array(arr05)))
# avg06 = np.transpose(pd.DataFrame(np.array(arr06)))
# avg07 = np.transpose(pd.DataFrame(np.array(arr07)))
# avg08 = np.transpose(pd.DataFrame(np.array(arr08)))
# avg09 = np.transpose(pd.DataFrame(np.array(arr09)))
# avg10 = np.transpose(pd.DataFrame(np.array(arr10)))
# avg11 = np.transpose(pd.DataFrame(np.array(arr11)))
# avg12 = np.transpose(pd.DataFrame(np.array(arr12)))
# avg13 = np.transpose(pd.DataFrame(np.array(arr13)))
# avg14 = np.transpose(pd.DataFrame(np.array(arr14)))
# avg15 = np.transpose(pd.DataFrame(np.array(arr15)))
# avg16 = np.transpose(pd.DataFrame(np.array(arr16)))
# avg17 = np.transpose(pd.DataFrame(np.array(arr17)))
# avg18 = np.transpose(pd.DataFrame(np.array(arr18)))
# avg19 = np.transpose(pd.DataFrame(np.array(arr19)))
# avg20 = np.transpose(pd.DataFrame(np.array(arr20)))

In [None]:
# years = list(range(2001, 2020))
# years
# year_wind = pd.DataFrame({'year': years})
# print(year_wind)

In [None]:
# wind_avg = pd.concat([avg01,avg02,avg03,avg04,avg05,avg06,avg07,avg08,avg09,avg10,avg11,avg12,avg13,avg14,avg15,avg16,avg17,avg18,avg19,avg20])
# twenty_wind_avg = pd.DataFrame(wind_avg.mean(axis=0))
# # wind_avg = pd.concat([wind_avg, year_wind], axis=1)
# print(wind_avg)
# print(twenty_wind_avg)

In [None]:
# for i in range(10):
#     arr = dset[i,:]
#     print(i,arr.mean())

In [None]:
# time_index = f["time_index"]
# time_index

In [None]:
# time_index[0:4]