In [1]:
from netCDF4 import Dataset
import numpy as np
import glob
import h5py
import pandas as pd
from tqdm import tqdm
import re
import os

In [37]:
# Aggregate netCDF4 files into large h5 file.
files_src = glob.glob("/mnt/lts/nfs_fs02/sadow_lab/personal/quachb/sar_hs/*.nc")
files_src = [f for f in files_src if 'ALT' in f]
#file_dest =  "/mnt/tmp/psadow/sar/aggregated_ALT.h5"
file_dest =  "/mnt/lts/nfs_fs02/sadow_lab/preserve/stopa/sar_hs/data/alt/aggregated_ALT.h5"

keys = ['timeSAR', 'timeALT', 'lonSAR', 'lonALT', 'latSAR', 'latALT', 'hsALT', 'dx', 'dt', 'nk', 'hsSM', 'incidenceAngle', 'sigma0', 'normalizedVariance', 'S']
keys += ['cspcRe', 'cspcIm']

def parse_filename(filename):
    """
    Grab some meta data from filename.
    """
    filename = os.path.basename(filename)
    platform, _alt, date, _ext = re.split('_|\.', filename)
    assert _alt == 'ALT', _alt
    assert _ext == 'nc', _ext
    satellite = int(platform[2] == 'A') # Encodes type A as 1 and B as 0
    year = int(date[5:9])
    month = int(date[9:11])
    return {'satellite':satellite, 'year':year, 'month':month}

def process(x, key):
    """
    Process a netcdf variable data.variables[key]
    """
    if key == 'S':
        x.set_auto_scale(False)
        x = np.array(x[:] * float(x.scale_factor))
    return x

def aggregate(files_src, file_dest, keys=None):
    """
    Aggregate list of netcdf files into single hdf5.
    Args:
    files_src: list of netcdf filenames
    file_dest: filename of h5
    keys: If specified, only extract these fields.
    """
    
    for i, filename in tqdm(enumerate(files_src)):
        # Add file of data to large hdf5.
        #print(filename)
        data = Dataset(filename)
        meta = parse_filename(filename)        
        
        if i == 0:
            if keys is None:
                # Grab keys from first file.
                keys = data.variables.keys()
            with h5py.File(file_dest, 'w') as fdest:
                for key in keys:
                    print(key)
                    x = process(data.variables[key], key)
                    maxshape = (None,) if len(x.shape)==1 else (None, ) + x.shape[1:]
                    fdest.create_dataset(key, data=x, maxshape=maxshape)
                for key in meta:
                    temp = np.ones((data.variables[keys[0]].shape[0], ), dtype=int) * meta[key] 
                    fdest.create_dataset(key, data=temp, maxshape = (None,))
        else:
            with h5py.File(file_dest, 'a') as fdest:
                for key in keys:
                    num_prev = fdest[key].shape[0]
                    num_add = data.variables[key].shape[0]
                    fdest[key].resize(num_prev + num_add, axis = 0)
                    fdest[key][-num_add:] = process(data.variables[key], key)
                for key in meta:
                    num_prev = fdest[key].shape[0]
                    fdest[key].resize(num_prev + num_add, axis = 0)
                    fdest[key][-num_add:] = np.ones((data.variables[keys[0]].shape[0], ), dtype=int) * meta[key] 

aggregate(files_src, file_dest, keys=keys)
print("Done")

0it [00:00, ?it/s]

/mnt/lts/nfs_fs02/sadow_lab/personal/quachb/sar_hs/S1A_ALT_coloc201505S.nc
timeSAR
timeALT
lonSAR
lonALT
latSAR
latALT
hsALT
dx
dt
nk
hsSM
incidenceAngle
sigma0
normalizedVariance
S
cspcRe
cspcIm


1it [00:00,  1.57it/s]

/mnt/lts/nfs_fs02/sadow_lab/personal/quachb/sar_hs/S1A_ALT_coloc201612S.nc


2it [00:06,  2.13s/it]

/mnt/lts/nfs_fs02/sadow_lab/personal/quachb/sar_hs/S1A_ALT_coloc201502S.nc


3it [00:06,  1.60s/it]

/mnt/lts/nfs_fs02/sadow_lab/personal/quachb/sar_hs/S1B_ALT_coloc201702S.nc


4it [00:11,  2.72s/it]

/mnt/lts/nfs_fs02/sadow_lab/personal/quachb/sar_hs/S1A_ALT_coloc201703S.nc


5it [00:17,  3.51s/it]

/mnt/lts/nfs_fs02/sadow_lab/personal/quachb/sar_hs/S1A_ALT_coloc201712S.nc


6it [00:21,  3.70s/it]

/mnt/lts/nfs_fs02/sadow_lab/personal/quachb/sar_hs/S1A_ALT_coloc201802S.nc


7it [00:26,  4.24s/it]

/mnt/lts/nfs_fs02/sadow_lab/personal/quachb/sar_hs/S1B_ALT_coloc201804S.nc


8it [00:33,  4.98s/it]

/mnt/lts/nfs_fs02/sadow_lab/personal/quachb/sar_hs/S1B_ALT_coloc201803S.nc


9it [00:42,  6.08s/it]

/mnt/lts/nfs_fs02/sadow_lab/personal/quachb/sar_hs/S1A_ALT_coloc201805S.nc


10it [00:48,  6.20s/it]

/mnt/lts/nfs_fs02/sadow_lab/personal/quachb/sar_hs/S1A_ALT_coloc201604S.nc


11it [00:55,  6.27s/it]

/mnt/lts/nfs_fs02/sadow_lab/personal/quachb/sar_hs/S1A_ALT_coloc201603S.nc


12it [00:59,  5.69s/it]

/mnt/lts/nfs_fs02/sadow_lab/personal/quachb/sar_hs/S1A_ALT_coloc201702S.nc


13it [01:04,  5.51s/it]

/mnt/lts/nfs_fs02/sadow_lab/personal/quachb/sar_hs/S1B_ALT_coloc201703S.nc


15it [01:07,  3.34s/it]

/mnt/lts/nfs_fs02/sadow_lab/personal/quachb/sar_hs/S1A_ALT_coloc201705S.nc
/mnt/lts/nfs_fs02/sadow_lab/personal/quachb/sar_hs/S1A_ALT_coloc201503S.nc


16it [01:07,  2.42s/it]

/mnt/lts/nfs_fs02/sadow_lab/personal/quachb/sar_hs/S1B_ALT_coloc201612S.nc


18it [01:16,  2.95s/it]

/mnt/lts/nfs_fs02/sadow_lab/personal/quachb/sar_hs/S1A_ALT_coloc201504S.nc
/mnt/lts/nfs_fs02/sadow_lab/personal/quachb/sar_hs/S1A_ALT_coloc201602S.nc


19it [01:23,  4.15s/it]

/mnt/lts/nfs_fs02/sadow_lab/personal/quachb/sar_hs/S1A_ALT_coloc201605S.nc


20it [01:27,  4.35s/it]

/mnt/lts/nfs_fs02/sadow_lab/personal/quachb/sar_hs/S1A_ALT_coloc201512S.nc


21it [01:32,  4.38s/it]

/mnt/lts/nfs_fs02/sadow_lab/personal/quachb/sar_hs/S1B_ALT_coloc201802S.nc


22it [01:37,  4.66s/it]

/mnt/lts/nfs_fs02/sadow_lab/personal/quachb/sar_hs/S1A_ALT_coloc201804S.nc


23it [01:42,  4.72s/it]

/mnt/lts/nfs_fs02/sadow_lab/personal/quachb/sar_hs/S1A_ALT_coloc201803S.nc


24it [01:50,  5.55s/it]

/mnt/lts/nfs_fs02/sadow_lab/personal/quachb/sar_hs/S1B_ALT_coloc201805S.nc


25it [01:56,  5.86s/it]

/mnt/lts/nfs_fs02/sadow_lab/personal/quachb/sar_hs/S1B_ALT_coloc201712S.nc


26it [02:00,  5.37s/it]

/mnt/lts/nfs_fs02/sadow_lab/personal/quachb/sar_hs/S1A_ALT_coloc201606S.nc


27it [02:04,  4.72s/it]

/mnt/lts/nfs_fs02/sadow_lab/personal/quachb/sar_hs/S1A_ALT_coloc201511S.nc


28it [02:07,  4.23s/it]

/mnt/lts/nfs_fs02/sadow_lab/personal/quachb/sar_hs/S1B_ALT_coloc201607S.nc


29it [02:10,  4.00s/it]

/mnt/lts/nfs_fs02/sadow_lab/personal/quachb/sar_hs/S1A_ALT_coloc201601S.nc


30it [02:14,  3.86s/it]

/mnt/lts/nfs_fs02/sadow_lab/personal/quachb/sar_hs/S1B_ALT_coloc201609S.nc


31it [02:18,  4.04s/it]

/mnt/lts/nfs_fs02/sadow_lab/personal/quachb/sar_hs/S1A_ALT_coloc201608S.nc


32it [02:24,  4.60s/it]

/mnt/lts/nfs_fs02/sadow_lab/personal/quachb/sar_hs/S1B_ALT_coloc201806S.nc


33it [02:27,  4.07s/it]

/mnt/lts/nfs_fs02/sadow_lab/personal/quachb/sar_hs/S1B_ALT_coloc201801S.nc


34it [02:33,  4.69s/it]

/mnt/lts/nfs_fs02/sadow_lab/personal/quachb/sar_hs/S1A_ALT_coloc201807S.nc


35it [02:35,  4.00s/it]

/mnt/lts/nfs_fs02/sadow_lab/personal/quachb/sar_hs/S1A_ALT_coloc201710S.nc


36it [02:40,  4.04s/it]

/mnt/lts/nfs_fs02/sadow_lab/personal/quachb/sar_hs/S1B_ALT_coloc201711S.nc


37it [02:45,  4.52s/it]

/mnt/lts/nfs_fs02/sadow_lab/personal/quachb/sar_hs/S1A_ALT_coloc201706S.nc


38it [02:49,  4.27s/it]

/mnt/lts/nfs_fs02/sadow_lab/personal/quachb/sar_hs/S1A_ALT_coloc201701S.nc


39it [02:55,  4.72s/it]

/mnt/lts/nfs_fs02/sadow_lab/personal/quachb/sar_hs/S1B_ALT_coloc201707S.nc


40it [03:02,  5.56s/it]

/mnt/lts/nfs_fs02/sadow_lab/personal/quachb/sar_hs/S1B_ALT_coloc201709S.nc


41it [03:08,  5.73s/it]

/mnt/lts/nfs_fs02/sadow_lab/personal/quachb/sar_hs/S1A_ALT_coloc201708S.nc


42it [03:14,  5.69s/it]

/mnt/lts/nfs_fs02/sadow_lab/personal/quachb/sar_hs/S1A_ALT_coloc201509S.nc


43it [03:18,  5.09s/it]

/mnt/lts/nfs_fs02/sadow_lab/personal/quachb/sar_hs/S1A_ALT_coloc201507S.nc


44it [03:20,  4.19s/it]

/mnt/lts/nfs_fs02/sadow_lab/personal/quachb/sar_hs/S1A_ALT_coloc201610S.nc


45it [03:28,  5.42s/it]

/mnt/lts/nfs_fs02/sadow_lab/personal/quachb/sar_hs/S1B_ALT_coloc201611S.nc


46it [03:35,  5.85s/it]

/mnt/lts/nfs_fs02/sadow_lab/personal/quachb/sar_hs/S1B_ALT_coloc201710S.nc


47it [03:39,  5.28s/it]

/mnt/lts/nfs_fs02/sadow_lab/personal/quachb/sar_hs/S1A_ALT_coloc201711S.nc


48it [03:44,  5.27s/it]

/mnt/lts/nfs_fs02/sadow_lab/personal/quachb/sar_hs/S1A_ALT_coloc201806S.nc


49it [03:47,  4.47s/it]

/mnt/lts/nfs_fs02/sadow_lab/personal/quachb/sar_hs/S1A_ALT_coloc201801S.nc


50it [03:52,  4.69s/it]

/mnt/lts/nfs_fs02/sadow_lab/personal/quachb/sar_hs/S1B_ALT_coloc201807S.nc


51it [03:55,  4.11s/it]

/mnt/lts/nfs_fs02/sadow_lab/personal/quachb/sar_hs/S1A_ALT_coloc201609S.nc


52it [04:00,  4.46s/it]

/mnt/lts/nfs_fs02/sadow_lab/personal/quachb/sar_hs/S1B_ALT_coloc201608S.nc


53it [04:06,  4.86s/it]

/mnt/lts/nfs_fs02/sadow_lab/personal/quachb/sar_hs/S1B_ALT_coloc201606S.nc


54it [04:06,  3.54s/it]

/mnt/lts/nfs_fs02/sadow_lab/personal/quachb/sar_hs/S1A_ALT_coloc201607S.nc


55it [04:11,  3.94s/it]

/mnt/lts/nfs_fs02/sadow_lab/personal/quachb/sar_hs/S1A_ALT_coloc201510S.nc


57it [04:15,  2.76s/it]

/mnt/lts/nfs_fs02/sadow_lab/personal/quachb/sar_hs/S1A_ALT_coloc201501S.nc
/mnt/lts/nfs_fs02/sadow_lab/personal/quachb/sar_hs/S1B_ALT_coloc201610S.nc


58it [04:22,  4.11s/it]

/mnt/lts/nfs_fs02/sadow_lab/personal/quachb/sar_hs/S1A_ALT_coloc201506S.nc


59it [04:24,  3.33s/it]

/mnt/lts/nfs_fs02/sadow_lab/personal/quachb/sar_hs/S1A_ALT_coloc201611S.nc


60it [04:32,  4.88s/it]

/mnt/lts/nfs_fs02/sadow_lab/personal/quachb/sar_hs/S1A_ALT_coloc201508S.nc


61it [04:36,  4.44s/it]

/mnt/lts/nfs_fs02/sadow_lab/personal/quachb/sar_hs/S1A_ALT_coloc201709S.nc


62it [04:40,  4.52s/it]

/mnt/lts/nfs_fs02/sadow_lab/personal/quachb/sar_hs/S1B_ALT_coloc201708S.nc


63it [04:46,  5.01s/it]

/mnt/lts/nfs_fs02/sadow_lab/personal/quachb/sar_hs/S1B_ALT_coloc201701S.nc


64it [04:53,  5.36s/it]

/mnt/lts/nfs_fs02/sadow_lab/personal/quachb/sar_hs/S1A_ALT_coloc201707S.nc


65it [05:00,  4.63s/it]

Done



