In [9]:
# Reads NetCDF4 file, preprocesses data, and writes hdf5 file.
# This is much simpler than aggregating multiple files, then
# performing preprocessing.
# Author: Peter Sadowski, Dec 2020
import os, sys, h5py
import numpy as np
from netCDF4 import Dataset
sys.path = ['../'] + sys.path
from sarhs import preprocess

# Source and destination filenames.
file_src  = "/mnt/lts/nfs_fs02/sadow_lab/preserve/stopa/sar_hs/data/S1B_201905_test01S/S1B_201905_test01S.nc"  # Example file containing single observation.
file_dest = "/mnt/lts/nfs_fs02/sadow_lab/preserve/stopa/sar_hs/data/S1B_201905_test01S/S1B_201905_test01S_processed.h5"
satellite = 0 # 1=S1A, 0=S1B

# These variables are expected in the source file.
keys = ['timeSAR', 'lonSAR',  'latSAR', 'incidenceAngle', 'sigma0', 'normalizedVariance', 'S', 'cspcRe', 'cspcIm'] # Needed for predictions.
   
with Dataset(file_src) as fs, h5py.File(file_dest, 'w') as fd:
    # Check input file.
    src = fs.variables
    for k in keys:
        if k not in src.keys():
            raise IOError(f'Variable {k} not found in input file.')
    num_examples = src[keys[0]].shape[0]
    print(f'Found {num_examples} events.')

    # Get 22 CWAVE features. Concatenate 20 parameters with sigma0 and normVar.
    src['S'].set_auto_scale(False) # Some of the NetCDF4 files had some weird scaling.
    S = np.array(src['S'][:] * float(src['S'].scale_factor))
    cwave = np.hstack([S, src['sigma0'][:].reshape(-1,1), src['normalizedVariance'][:].reshape(-1,1)])
    cwave = preprocess.conv_cwave(cwave) # Remove extrema, then standardize with hardcoded mean, vars.
    fd.create_dataset('cwave', data=cwave)

    # Observation meta data.
    latSAR, lonSAR = src['latSAR'][:], src['lonSAR'][:]
    latSARcossin = preprocess.conv_position(latSAR) # Computes cos and sin used by NN.
    lonSARcossin = preprocess.conv_position(lonSAR)
    fd.create_dataset('latlonSAR', data=np.column_stack([latSAR, lonSAR]))
    fd.create_dataset('latlonSARcossin', data=np.hstack([latSARcossin, lonSARcossin]))

    timeSAR = src['timeSAR'][:]
    todSAR = preprocess.conv_time(timeSAR)
    fd.create_dataset('timeSAR', data=timeSAR, shape=(timeSAR.shape[0], 1))
    fd.create_dataset('todSAR', data=todSAR, shape=(todSAR.shape[0], 1))

    incidence = preprocess.conv_incidence(src['incidenceAngle'][:]) # Separates into 2 var.
    fd.create_dataset('incidence', data=incidence)

    satellite_indicator = np.ones((src['timeSAR'].shape[0], 1), dtype=float) * satellite
    fd.create_dataset('satellite', data=satellite_indicator, shape=(satellite_indicator.shape[0], 1))
    
    # Spectral data.
    re = preprocess.conv_real(src['cspcRe'][:])
    im = preprocess.conv_imaginary(src['cspcIm'][:])
    x = np.stack((re, im), axis=3)
    fd.create_dataset('spectrum', data=x)
    
    # Altimeter features.
    #hsALT = src['hsALT'][:]
    #fd.create_dataset('hsALT', data=hsALT, shape=(hsALT.shape[0], 1))
    #dx = preprocess.conv_dx(src['dx'][:])
    #dt = preprocess.conv_dt(src['dt'][:])
    #fd.create_dataset('dxdt', data=np.column_stack([dx, dt]))


Found 1 events.
