## Preprocess Monthly 90th Percentile Hs Data
Derives the the monthly 90th percentile hs time series to be used in Figure 4, trend analysis. 

### Libraries

In [1]:
import xarray as xr
import numpy as np
import os
import geopandas as gpd
import shapely.vectorized
import pandas as pd
from itertools import product
import pandas as pd
import pymannkendall as mk
import statsmodels.api as sm
from scipy.optimize import curve_fit

### Define function to find, read, and concatenate yearly ww3 data

In [2]:
def find_ww3_file(root_directory,year):
    for root, dirs, files in os.walk(root_directory):
        for file in files:
            if str(year) in file:
                matching_file = (os.path.join(root, file))
    return matching_file

def extract_ww3_data(ncfile,var,year,month):
    with xr.open_dataset(ncfile) as ds:
        ds = ds.sel(time=ds.time.dt.month.isin([month]))
        dt = np.array(ds.coords['time'][:])    
        x,y = np.array(ds.variables['longitude'][:]),np.array(ds.variables['latitude'][:])
        var_field = np.array(ds.variables[var][:])
    return var_field,x,y,dt

def concatenate_ww3(rootdir,var,year0,yearn,month):
    if var == 'hs':
        rootvardir = rootdir + var
    years = np.arange(year0,yearn,1)
    for count,year in enumerate(years):
        file = find_ww3_file(rootvardir,year)
        temp,x,y,temp_dt = extract_ww3_data(file,var,year,month)
        if count == 0:
            series = temp
            dt = temp_dt
        else:
            series = np.vstack((series,temp))
            dt = np.append(dt,temp_dt)
    series = np.where(np.isnan(series),0,series)
    return series,dt

def wavefield_stats(hs,percentile):
    hs_med = np.nanmedian(hs,axis =0)
    hs_per = np.percentile(hs, percentile,axis = 0) 
    return hs_med, hs_per

def wavefield_stats_3d(hs,percentile,year0,yearn):
    years = np.arange(year0,yearn,1)
    tsteps = hs.shape[0]//len(years)
    for count,year in enumerate(years):
        month_arr =hs[(count*tsteps):((count+1)*tsteps),:]
        temp_med = np.nanmedian(month_arr,axis =0)
        temp_per = np.percentile(month_arr, percentile,axis = 0) 
        if count == 0:
            hs_med = temp_med
            hs_per = temp_per
        else:
            hs_med = np.vstack((hs_med,temp_med))
            hs_per = np.vstack((hs_per,temp_per))
    return hs_med,hs_per

### Define function to save the data

In [15]:
def savedata(hs_per,output_filename):
    nodes = hs_per.shape[1]
    years = hs_per.shape[0]
    hs_per_array = xr.DataArray(hs_per,
                                dims=('time','nodes'),
                                coords={'time':np.arange(years),
                                        'nodes': np.arange(nodes)},
                                name='hs_per')

    dataset = xr.Dataset({'hs_per': hs_per_array})
    dataset.to_netcdf(output_filename)
    print(f"NetCDF file '{output_filename}' saved successfully.")

## Derive monthly 90th percentile Hs for each model 2020-2070

In [7]:
models = ['CNRM','ECEARTH','MPI','MRI']
months = [7,9,11]
year0 = 2020
yearn = 2070
for model in models:
    rootdir = f'/WW3/{model}/'
    for month in months:
        output_filename = f'{model}_{year0}_{yearn}_{month}.nc'
        hs,dt = concatenate_ww3(rootdir,'hs',year0,yearn,month)
        hs_med,hs_per = wavefield_stats_3d(hs,90,year0,yearn)
        savedata(hs_per,output_filename)

NetCDF file 'CNRM_2020_2070_7.nc' saved successfully.
NetCDF file 'CNRM_2020_2070_9.nc' saved successfully.
NetCDF file 'CNRM_2020_2070_11.nc' saved successfully.
NetCDF file 'ECEARTH_2020_2070_7.nc' saved successfully.
NetCDF file 'ECEARTH_2020_2070_9.nc' saved successfully.
NetCDF file 'ECEARTH_2020_2070_11.nc' saved successfully.
NetCDF file 'MPI_2020_2070_7.nc' saved successfully.
NetCDF file 'MPI_2020_2070_9.nc' saved successfully.
NetCDF file 'MPI_2020_2070_11.nc' saved successfully.
NetCDF file 'MRI_2020_2070_7.nc' saved successfully.
NetCDF file 'MRI_2020_2070_9.nc' saved successfully.
NetCDF file 'MRI_2020_2070_11.nc' saved successfully.
