- reprocessing Nov 2021 DAS results using empirical correction coefficients
- up to date for CODAS data as of August 2023

In [1]:
import glob
import xarray as xr
import pandas as pd
import numpy as np
from matplotlib import pyplot as plt
import matplotlib.colors as colors
from scipy import signal

from datetime import date, timedelta, datetime
import matplotlib.dates as mdates
plt.rcParams.update({'font.size': 12})

from scipy.signal import argrelextrema

pltdir = '/Users/msmith/Documents/DAS/2021Test/'

import scipy.io
import warnings
warnings.filterwarnings('once')
warnings.filterwarnings("ignore", category=DeprecationWarning)

# Functions to reprocess using frequency-dependent conversion factor for each channel

In [182]:
#rewritten wave conversion to use strain_factor as function of frequency...
# april 5, 2023: now includes attenuation, as this is not accounted for in empirical correction factor 

def DAS_wave_conversion_f(das_data,f_cutoff,E_corr_factor,freq,depth):
    #function to use simple pwelch to estimate wave spectra and bulk wave parameters for DAS data
    #assume some arbitrary strain factor that results in decent looking results, for now...
    
    #E_corr_factor is frequency dependent correction factor to apply to SPECTRA - no depth correction needed??
    
    #pwelch - defualt is 50% overlap
    window = 128
    nfft = 256
    f_psd, ds_psd = signal.welch((das_data.data),fs=2,nfft=nfft,nperseg=window)#,window=[128]
    

    #depth attenuation correction (using depth from netcdf)
    #depth = -das_data.bathy_m
    
    k = (2*np.pi*freq)**2 / 9.8
    attenuation = np.exp(k*depth)
    attenuation = attenuation**2; # square for energy 

    #interpolate onto SWIFT frequencies, then multiply by empirical correction factor
    from scipy.interpolate import interp1d
    ds_psd = interp1d(f_psd, ds_psd)(freq)
    ds_psd_corr = (ds_psd*E_corr_factor/attenuation).values
    
    """
    f_cutoff_i = [i for i, x in enumerate(ds_DAS.frequency > f_cutoff) if x][0] #first value greater than defined cutoff
    f_eqrange = ds_DAS.frequency[f_cutoff_i::]
    spec_new = ds_psd_corr
    spec_new[f_cutoff_i::]= ds_psd_corr[f_cutoff_i]/(f_eqrange[0]**(-4))*(f_eqrange**(-4))
    ds_psd_corr = spec_new
    #plt.loglog(ds_DAS.frequency,spec_new,linestyle='dotted',color=colors_spectra[ti])
    
    ## NEW METHOD FOR ROLLOFF OF SPECTRA
    spec_sel = ds_psd_corr
    max_i = argrelextrema(np.array(spec_sel), np.less)
    peak_minfreq = 0.15
    peak_i = max_i[0][[i for i, x in enumerate(freq[max_i]>peak_minfreq) if x]][0]
    
    
    noise_floor = 350
    floor_i = np.array([i for i, x in enumerate(ds_psd < noise_floor) if x] )
    peak_i = floor_i[floor_i > 17][0]
    
    
    f_eqrange = ds_DAS.frequency[peak_i::]
    spec_new = ds_psd_corr
    spec_new[peak_i::]= ds_psd_corr[peak_i]/(f_eqrange[0]**(-4))*(f_eqrange**(-4))
    ds_psd_corr = spec_new
    #plt.loglog(ds_DAS.frequency,spec_new,linestyle='dotted',color=colors_spectra[ti])
    ##
    """
    
    
    
    """
    plt.loglog(freq,ds_psd_corr)
    plt.xlabel('Hz');plt.ylabel('E');plt.grid(True,which='minor');plt.xlim(left=.09)
    plt.show(block=False)
    plt.pause(1)
    plt.close()
    """
    
    #calculate bulk wave characteristics
    max_i = np.argmax(ds_psd_corr)
    Tp = 1/(f_psd[max_i])
    
    psd_fwaves = ((freq > 0.03) & (freq < .5))
    fe = ((ds_psd_corr[psd_fwaves] * freq[psd_fwaves]) /ds_psd_corr[psd_fwaves].sum() ).sum() #(f*E)/E
    #fe = ((ds_psd_corr[psd_fwaves] * f_psd[psd_fwaves]) /ds_psd_corr[psd_fwaves].sum() ).sum() #(f*E)/E
    Te = 1/fe
    
    bandwidth = (freq[1::].values - freq[0:-1].values).mean()
    Hs = 4*np.sqrt( ds_psd_corr[psd_fwaves].sum() * bandwidth ) 
    
    return f_psd, ds_psd, ds_psd_corr, Tp, Te, Hs


In [184]:
def CODAS_waves_to_netcdf_f(das_directory, output_directory,f_cutoff_file):
    #using frequency and channel dependent E_corr_factor from August 2022 calibration 

    #get all channel folders for this period
    das_chanfolders = glob.glob(das_directory + '*')
    das_channels = [round(float(x.split('/')[-1])) for x in das_chanfolders]
    das_channels.sort()

    #get all times 
    #das_timestr = sorted([x[60:75] for x in glob.glob(das_chanfolders[0]+'/*.ncdf')])
    das_times = sorted([datetime.strptime(x[60:75],'%Y%m%d_%H%M%S') for x in glob.glob(das_chanfolders[0]+'/*.ncdf')])

    f_cutoff = 1

    #create stacked arrays of outputs for all times, channels
    das_Hs = []
    das_Tp = []
    das_Te = []
    das_psd = []
    das_psd_corr = []
    for di,das_channel in enumerate(das_channels[393:394]): #393:495 CHECK THIS IS CORRECT!!, match in inputs to xarray
        
        das_info = pd.read_csv('CODAS_info.csv')
        das_info_channel = das_info[das_info['Channel']==das_channel]
        depth = -das_info_channel['Water Depth'].values
    
        #correction coefficient files 
        """coeff_dir = '/Users/msmith/Documents/DAS/CODAS/DASresults_JT_0324/'
        mat = scipy.io.loadmat(coeff_dir+'DASspecta_channel'+str(das_channel)+'.mat')
        mat['E_ratio'][22,:] = np.nan
        mat['f'] = np.concatenate( mat['f'], axis=0 )
        E_ratio_f = np.nanmedian(mat['E_ratio'],0) #average ratio as a function of frequency
        """
        coeffdir = '/Users/msmith/Documents/DAS/CODAS/202208_reprocessing/'
        correction_channel = pd.read_csv(coeffdir+'SpectralCorrectionFactor_channel'+str(das_channel)+'_1Hz.csv')
        E_ratio_f = correction_channel['corr_factor']
        freq = SWIFT.freq#correction_channel['freq']

        Hs_inner = []
        Tp_inner = []
        Te_inner = []

        psd_inner = []
        psd_corr_inner = []
        for ti,das_time in enumerate(das_times):
            das_timestr = datetime.strftime(das_time,format='%Y%m%d_%H%M%S')
            das_file = glob.glob(das_directory + str(das_channel) + '/' + 'CODAS.D*__' + das_timestr+'.*__chn*'+str(das_channel)+'.ncdf')[0]
            ds_disk = xr.open_dataset(das_file)
            
            f_cutoff = f_cutoff_file[ti][0]
            f_psd, ds_psd, ds_psd_corr, Tp_psd, Te_psd, Hs_psd = DAS_wave_conversion_f(ds_disk,f_cutoff,E_ratio_f,freq,depth)
            Hs_inner.append(Hs_psd)
            Tp_inner.append(Tp_psd)
            Te_inner.append(Te_psd)

            psd_inner.append(ds_psd)
            psd_corr_inner.append(ds_psd_corr)
        das_Hs.append(Hs_inner)
        das_Tp.append(Tp_inner)
        das_Te.append(Te_inner)

        das_psd.append(psd_inner)
        das_psd_corr.append(psd_corr_inner)



    #consolidate into xarray

    # define data with variable attributes
    data_Hs = {'Hs':(['channels','time'], das_Hs, 
                             {'units': 'm', 
                              'long_name':'significant wave height'})}
    data_Tp = {'Tp':(['channels','time'], das_Tp, 
                             {'units': 's', 
                              'long_name':'peak wave period'})}
    data_Te = {'Te':(['channels','time'], das_Te, 
                             {'units': 's', 
                              'long_name':'energy-weighted wave period'})}

    data_E = {'E':(['channels','time','frequency'], das_psd, 
                             {'units': 'm', 
                              'long_name':'energy spectrum'})}
    data_E_corr = {'E_corr':(['channels','time','frequency'], das_psd_corr, 
                             {'units': 'm', 
                              'long_name':'corrected energy spectrum'})}

    # define coordinates
    coords = {'time': (['time'], das_times),
              'channels': (['channels'], das_channels[393:394]),
             'frequency': (['frequency'], freq)}

    # define global attributes
    attrs = {'creation_date':datetime.now(), 
             'author':'M Smith', 
             'email':'madisonmsmith@whoi.edu'}

    # create dataset
    ds_Hs = xr.Dataset(data_vars=data_Hs, 
                    coords=coords, 
                    attrs=attrs)
    ds_Tp = xr.Dataset(data_vars=data_Tp, 
                    coords=coords, 
                    attrs=attrs)
    ds_Te = xr.Dataset(data_vars=data_Te, 
                    coords=coords, 
                    attrs=attrs)

    ds_E = xr.Dataset(data_vars=data_E, 
                    coords=coords, 
                    attrs=attrs)
    ds_Ecorr = xr.Dataset(data_vars=data_E_corr, 
                    coords=coords, 
                    attrs=attrs)

    #merge datasets of each variable
    ds_DAS = xr.merge((ds_Hs,ds_Tp,ds_Te,ds_E,ds_Ecorr))


    ds_DAS.to_netcdf(output_directory +'/uw_'+datetime.strftime(das_times[0],'%Y-%m')+'_waveoutputs_23AprCorrFactors_1Hz_v4.nc')
    
    return ds_DAS

In [185]:
#INPUTS
das_directory = '/Users/msmith/Documents/DAS/CODAS/uw_2022_08/'
output_directory = '/Users/msmith/Documents/DAS/CODAS'

f_cutoff_file = pd.read_csv('202208_freqcutoff_manualCopy2.txt',header=None)
ds_DAS = CODAS_waves_to_netcdf_f(das_directory,output_directory,f_cutoff_file)

