# Preprocess WW3 Series


In [1]:
import xarray as xr
import numpy as np
import os
import geopandas as gpd
import shapely.vectorized
import pandas as pd
from itertools import product


### Function to find, read, and concatenate data

In [2]:
def find_ww3_file(root_directory,year):
    for root, dirs, files in os.walk(root_directory):
        for file in files:
            if str(year) in file:
                matching_file = (os.path.join(root, file))
    return matching_file

def extract_ww3_data(ncfile,var,year,month):
    with xr.open_dataset(ncfile) as ds:
        ds = ds.sel(time=ds.time.dt.month.isin([month]))
        dt = np.array(ds.coords['time'][:])        
        x,y = np.array(ds.variables['longitude'][:]),np.array(ds.variables['latitude'][:])
        var_field = np.array(ds.variables[var][:])
    return var_field,x,y,dt

def concatenate_ww3(rootdir,var,year0,yearn,month):
    if var == 'hs':
        rootvardir = rootdir + var
    elif var == 'tp':
        rootvardir = rootdir + var
    
    years = np.arange(year0,yearn,1)
    
    for count,year in enumerate(years):
        file = find_ww3_file(rootvardir,year)

        temp,x,y,temp_dt = extract_ww3_data(file,var,year,month)

        if count == 0:
            series = temp
            dt = temp_dt
        else:
            series = np.vstack((series,temp))
            dt = np.append(dt,temp_dt)
    series = np.where(np.isnan(series),0,series)
    return series,dt

def wavefield_stats(hs,percentile):
    hs_med = np.nanmedian(hs,axis =0)
    hs_per = np.percentile(hs, percentile,axis = 0) 
    return hs_med, hs_per

def wavefield_stats_3d(hs,percentile,year0,yearn):
    years = np.arange(year0,yearn,1)
    tsteps = hs.shape[0]//len(years)
    for count,year in enumerate(years):
        month_arr =hs[(count*tsteps):((count+1)*tsteps),:]
        temp_med = np.nanmedian(month_arr,axis =0)
        temp_per = np.percentile(month_arr, percentile,axis = 0) 
        if count == 0:
            hs_med = temp_med
            hs_per = temp_per
        else:
            hs_med = np.vstack((hs_med,temp_med))
            hs_per = np.vstack((hs_per,temp_per))
    return hs_med,hs_per

### Load the depth file and mask

In [3]:
depthdir = 'ww3.2020_dpt.nc'
with xr.open_dataset(depthdir) as ds:
    depth = np.array(ds.variables['dpt'][0,:])
    x,y = np.array(ds.variables['longitude'][:]),np.array(ds.variables['latitude'][:])
    tris = np.array(ds.variables['tri'][:])
    
def depth_mask(depth,hs):
    depth_ids = np.where(depth<15)
    hs[:,depth_ids] = np.nan
    return hs
    

### Define the function to take regional averages

In [5]:
def sea_aves(hs,seas,x,y):
    sea_names = []
    for index, row in seas.iterrows():
        name = row['name']
        sea_names.append(name)
        poly = row['geometry']
        nodes= np.where(shapely.vectorized.contains(poly, x, y))[0]
        sea_hs = hs[:,nodes]
        if index == 0:
            sea_ave_hs = np.nanmedian(sea_hs,axis = 1)
        else:
            temp = np.nanmedian(sea_hs,axis =1)
            sea_ave_hs = np.vstack((sea_ave_hs,temp)) 
    return sea_ave_hs,sea_names



# Derive the timeseries interating over models

In [9]:
months = [7,9,11]
year0 = 2020
yearn = 2070

for model_name in ['CNRM','ECEARTH','MPI','MRI']:
    for i,month in enumerate(months):
        
        #Read the WW3 data
        rootdir = f'/WW3/{model_name}/'
        hs,dt = concatenate_ww3(rootdir,'hs',year0,yearn,month)
        hs_med,hs_per = wavefield_stats_3d(hs,90,year0,yearn)
        hs_med,hs_per = depth_mask(depth,hs_med),depth_mask(depth,hs_per)
        
        #Iterate over the months deriving monthly regional mean
        fn = 'Regions.shp'
        seas = gpd.GeoDataFrame.from_file(fn)
        seas_temp,regions = sea_aves(hs_per,seas,x,y)
        if i == 0:
            seas_per = seas_temp
        else:
            seas_per = np.vstack((seas_per,seas_temp))
    
    #Create monthly regional column names
    months = ['July','September','November']
    cols = [x+str(' ')+y for (x,y) in product(months,regions)]

    #Create the pandas dataframe and save it.
    df = pd.DataFrame(seas_per.T, columns=cols)
    years = np.arange(year0,yearn,1)
    df.insert(0, ('year', ''), years)
    df.to_csv(f'{model_name}_Hs90_series.csv', index=False)
    print(f'{model_name}_Hs90_series.csv file saved successfully.')


CSV file saved successfully.
