In [1]:
import h5py
import numpy as np
import pandas as pd
from astropy.time import Time
from tqdm import tqdm
import os
import vaex

In [2]:
def gps2dyr(time):
    """Converts GPS time to datetime (can also do decimal years)."""
    return Time(time, format='gps').datetime

In [3]:
def read_atl06(fname):
    """Read one ATL06 file and output 6 reduced files. 
    
    Extract variables of interest and separate the ATL06 file 
    into each beam (ground track) and ascending/descending orbits.
    """

    # Each beam is a group
    group = ['/gt1l', '/gt1r', '/gt2l', '/gt2r', '/gt3l', '/gt3r']
    
    # Loop trough beams
    dataframes = []  # one dataframe per track
    
    with h5py.File(fname, 'r') as fi:
        # Check which ground tracks are present in this file
        gtracks = sorted(['/'+k for k in fi.keys() if k.startswith('gt')])
    
        for k, g in enumerate(gtracks): 
            # Read in data for a single beam
            data = {}
            # this is our unique key (per beam)
            data['id'] = fi[g+'/land_ice_segments/segment_id'][:]
            npts = len(data['id'])
            # Load vars into memory (include as many as you want)
            data['lat'] = fi[g+'/land_ice_segments/latitude'][:]
            data['lon'] = fi[g+'/land_ice_segments/longitude'][:]
            
            data['slope_y'] = fi[g+'/land_ice_segments/fit_statistics/dh_fit_dy'][:]
            data['slope_x'] = fi[g+'/land_ice_segments/fit_statistics/dh_fit_dx'][:]
            data['slope_x_sigma'] = fi[g+'/land_ice_segments/fit_statistics/dh_fit_dx_sigma'][:]

            data['h_li'] = fi[g+'/land_ice_segments/h_li'][:]
            data['s_li'] = fi[g+'/land_ice_segments/h_li_sigma'][:]
            data['q_flag'] = fi[g+'/land_ice_segments/atl06_quality_summary'][:]
            data['s_fg'] = fi[g+'/land_ice_segments/fit_statistics/signal_selection_source'][:]
            data['snr'] = fi[g+'/land_ice_segments/fit_statistics/snr_significance'][:]
            data['h_rb'] = fi[g+'/land_ice_segments/fit_statistics/h_robust_sprd'][:]
            data['bsnow_conf'] = fi[g+'/land_ice_segments/geophysical/bsnow_conf'][:]
            
            data['cloud_flg_asr'] = fi[g+'/land_ice_segments/geophysical/cloud_flg_asr'][:]
            data['cloud_flg_atm'] = fi[g+'/land_ice_segments/geophysical/cloud_flg_atm'][:]
            data['msw_flag'] = fi[g+'/land_ice_segments/geophysical/msw_flag'][:]
            data['fbsnow_h'] = fi[g+'/land_ice_segments/geophysical/bsnow_h'][:]
            data['bsnow_od'] = fi[g+'/land_ice_segments/geophysical/bsnow_od'][:]
            data['layer_flag'] = fi[g+'/land_ice_segments/geophysical/layer_flag'][:]
            data['bckgrd'] = fi[g+'/land_ice_segments/geophysical/bckgrd'][:]
            data['e_bckgrd'] = fi[g+'/land_ice_segments/geophysical/e_bckgrd'][:]
            data['n_fit_photons'] = fi[g+'/land_ice_segments/fit_statistics/n_fit_photons'][:]
            data['w_surface_window_final'] = fi[g+'/land_ice_segments/fit_statistics/w_surface_window_final'][:]
            
            delta_t = fi[g+'/land_ice_segments/delta_time'][:]     # for time conversion
            t_ref = fi['/ancillary_data/atlas_sdp_gps_epoch'][:]     # single value
            
            # Time in GPS seconds (secs since Jan 5, 1980)
            t_gps = t_ref + delta_t

            # GPS sec to datetime
            data['t_year'] = gps2dyr(t_gps)
            data['cycle'] = np.ones(npts, dtype=np.int8)*3
            # Make a dataframe out of our data dict and store it.
            dataframes.append(pd.DataFrame.from_dict(data))
        
    return dataframes

In [4]:
#seed = read_atl06('./ATL06_20190329071316_13870211_004_01.h5')

In [5]:
#Pgt1l = seed[0].iloc[:2]
#Pgt1r = seed[1].iloc[:2]
#Pgt2l = seed[2].iloc[:2]
#Pgt2r = seed[3].iloc[:2]
#Pgt3l = seed[4].iloc[:2]
#Pgt3r = seed[5].iloc[:2]

In [7]:
#gt1l = vaex.from_pandas(Pgt1l, copy_index=False)
#gt1r = vaex.from_pandas(Pgt1r, copy_index=False)
#gt2l = vaex.from_pandas(Pgt2l, copy_index=False)
#gt2r = vaex.from_pandas(Pgt2r, copy_index=False)
#gt3l = vaex.from_pandas(Pgt3l, copy_index=False)
#gt3r = vaex.from_pandas(Pgt3r, copy_index=False)

#gt1l = vaex.open('./gt1l.arrow')
gt1r = vaex.open('./gt1r.arrow')
gt2l = vaex.open('./gt2l.arrow')
gt2r = vaex.open('./gt2r.arrow')
gt3l = vaex.open('./gt3l.arrow')
gt3r = vaex.open('./gt3r.arrow')

In [None]:
import 

In [10]:
gt1r

#,id,lat,lon,slope_y,slope_x,slope_x_sigma,h_li,s_li,q_flag,s_fg,snr,h_rb,bsnow_conf,cloud_flg_asr,cloud_flg_atm,msw_flag,fbsnow_h,bsnow_od,layer_flag,bckgrd,e_bckgrd,n_fit_photons,w_surface_window_final,t_year
0,1443576,-79.00247122325653,152.62909329494573,-0.0034930305555462837,-0.0038709579966962337,0.00046775228111073375,2112.461181640625,0.010477527044713497,0,0,0.0,0.12101969867944717,-1,1,0,0,98.97747802734375,0.1588064730167389,0,3330481.5,1531305.625,521,3.0,2018-12-28 13:08:17.870353
1,1443577,-79.00264603693901,152.6289116414406,-0.0033072757069021463,-0.008206631988286972,0.0005228850059211254,2112.3388671875,0.009917333722114563,0,0,0.0,0.11104678362607956,-1,1,0,0,96.86956024169922,0.15470755100250244,0,3344116.5,1531321.625,504,3.0,2018-12-28 13:08:17.873166
2,1443581,-79.00752245258117,-70.6943691405194,0.014864666387438774,0.008721026591956615,0.0013260508421808481,183.06307983398438,0.027073273435235023,0,0,0.0,0.16084477305412292,4,2,1,4,202.50680541992188,0.2390720546245575,0,739329.125,384736.78125,112,3.0,2019-02-23 01:20:25.464276
3,1443582,-79.00769747403395,-70.69454532910781,0.014720231294631958,0.00877853762358427,0.0010059431660920382,183.2300262451172,0.020507626235485077,0,0,0.0,0.1285490095615387,4,2,1,4,200.3974609375,0.2356618493795395,0,725386.6875,384747.40625,119,3.0,2019-02-23 01:20:25.467113
4,1443583,-79.00787249736605,-70.69472147124445,0.014575871638953686,0.007440008223056793,0.0013597721699625254,183.38632202148438,0.03146978095173836,0,0,0.0,0.15692085027694702,4,2,1,4,198.28993225097656,0.23225460946559906,0,720453.75,384761.53125,103,3.0,2019-02-23 01:20:25.469949
...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...
133682447,1565346,-79.00705393268645,-132.21556773731749,-0.006477783899754286,0.0013406918151304126,0.0035417911130934954,1102.985107421875,0.0550558902323246,0,0,0.0,0.14228999614715576,-1,4,1,2,209.85472106933594,0.691652774810791,1,451009.6875,354604.03125,20,3.0,2019-03-28 17:10:59.942291
133682448,1565347,-79.00687892642271,-132.2157441847106,-0.005692235194146633,-0.008524035103619099,0.004340614657849073,1102.9365234375,0.04776083678007126,0,0,0.0,0.20151501893997192,-1,4,1,2,209.85472106933594,0.6932626962661743,1,433255.3125,354612.0,18,3.0,2019-03-28 17:10:59.945103
133682449,1565348,-79.00670402584493,-132.21592350411126,-0.005990581586956978,0.0020828170236200094,0.007239909842610359,1102.9647216796875,0.0763402134180069,0,0,0.0,0.34467920660972595,-1,4,1,2,209.85472106933594,0.6948735117912292,1,420491.84375,354619.09375,21,3.0,2019-03-28 17:10:59.947921
133682450,1565349,-79.00652927174954,-132.21610675971414,-0.006612603086978197,0.0049421414732933044,0.003202562453225255,1103.0465087890625,0.04750864952802658,0,0,0.0,0.19493642449378967,-1,4,1,2,209.85472106933594,0.6964864730834961,1,410111.875,354627.96875,25,3.0,2019-03-28 17:10:59.950749


In [9]:
np.ones(len(gt1l), dtype=np.int8)

array([1., 1., 1., ..., 1., 1., 1.])

In [10]:
for df in [gt1l, gt1r, gt2l, gt2r, gt3l, gt3r]:
    df['cycle'] = np.ones(len(df), dtype=np.int8)*2

In [13]:
files = os.listdir('./data/')

In [14]:
for file in tqdm(files):
    dfs = read_atl06('./data/' + file)
    gt1l = gt1l.concat(vaex.from_pandas(dfs[0], copy_index=False))
    gt1r = gt1r.concat(vaex.from_pandas(dfs[1], copy_index=False))
    gt2l = gt2l.concat(vaex.from_pandas(dfs[2], copy_index=False))
    gt2r = gt2r.concat(vaex.from_pandas(dfs[3], copy_index=False))
    gt3l = gt3l.concat(vaex.from_pandas(dfs[4], copy_index=False))
    gt3r = gt3r.concat(vaex.from_pandas(dfs[5], copy_index=False))

100%|██████████| 1335/1335 [2:06:32<00:00,  5.69s/it] 


In [15]:
gt1l

#,id,lat,lon,slope_y,slope_x,slope_x_sigma,h_li,s_li,q_flag,s_fg,snr,h_rb,bsnow_conf,cloud_flg_asr,cloud_flg_atm,msw_flag,fbsnow_h,bsnow_od,layer_flag,bckgrd,e_bckgrd,n_fit_photons,w_surface_window_final,t_year,cycle
0,1443576,-79.00265175898602,152.6338938942904,-0.0034930305555462837,-0.00199329387396574,0.0012799798278138041,2112.09716796875,0.02197273075580597,0,0,0.0,0.13674001395702362,-1,1,0,0,98.97747802734375,0.1588064730167389,0,3543741.5,1567040.375,138,3.0,2018-12-28 13:08:17.510551,2
1,1443577,-79.00282660535785,152.6337131144976,-0.0033072757069021463,-0.005460350774228573,0.0012954295380041003,2111.994140625,0.015203893184661865,0,0,0.0,0.12070798128843307,-1,1,0,0,96.86956024169922,0.15470755100250244,0,3428445.25,1567056.875,144,3.0,2018-12-28 13:08:17.513373,2
2,1443581,-79.00770226316052,-70.68951907383446,0.014864666387438774,0.008418873883783817,0.000533556507434696,184.6269989013672,0.01060151495039463,0,0,0.0,0.12233275175094604,4,2,1,4,202.50680541992188,0.2390720546245575,0,821368.3125,456252.125,397,3.0,2019-02-23 01:20:25.102900,2
3,1443582,-79.00787704800452,-70.68970163845202,0.014720231294631958,0.005704349372535944,0.0005324581870809197,184.77682495117188,0.01133840810507536,0,0,0.0,0.12369103729724884,4,2,1,4,200.3974609375,0.2356618493795395,0,794943.6875,456265.75,381,3.0,2019-02-23 01:20:25.105714,2
4,1443583,-79.00805186741138,-70.68988326285287,0.014575871638953686,0.009426143020391464,0.0006639459170401096,184.91607666015625,0.01203470304608345,0,0,0.0,0.14692498743534088,4,2,1,4,198.28993225097656,0.23225460946559906,0,780100.625,456282.53125,388,3.0,2019-02-23 01:20:25.108526,2
...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...
278383015,1565333,-79.00691642820257,157.7220973285058,0.020282894372940063,-0.0026600060518831015,0.0019890449475497007,1352.0867919921875,0.042511556297540665,0,0,0.0,0.19723959267139435,-1,5,1,1,3.4028234663852886e+38,3.4028234663852886e+38,1,5876.80126953125,0.0,78,3.0,2019-06-23 17:41:48.858320,3
278383016,1565334,-79.00674149912395,157.7219186494466,0.022569799795746803,0.0054223197512328625,0.0014901709510013461,1352.1932373046875,0.03254437819123268,0,0,0.0,0.1583598554134369,-1,5,1,1,3.4028234663852886e+38,3.4028234663852886e+38,1,5149.26953125,0.0,81,3.0,2019-06-23 17:41:48.861145,3
278383017,1565335,-79.00656656409018,157.72174013784968,0.022776860743761063,0.0020033831242471933,0.0012969662202522159,1352.2843017578125,0.029721293598413467,0,0,0.0,0.12487281858921051,-1,5,1,1,3.4028234663852886e+38,3.4028234663852886e+38,1,6185.1708984375,0.0,70,3.0,2019-06-23 17:41:48.863969,3
278383018,1565336,-79.00639162651767,157.72156170763117,0.02158433012664318,0.0016407748917117715,0.0014232911635190248,1352.3492431640625,0.034284960478544235,0,0,0.0,0.14026929438114166,-1,5,1,1,3.4028234663852886e+38,3.4028234663852886e+38,1,6597.64794921875,0.0,75,3.0,2019-06-23 17:41:48.866792,3


In [16]:
gt1l.export_arrow('./gt1l.arrow')
gt1r.export_arrow('./gt1r.arrow')
gt2l.export_arrow('./gt2l.arrow')
gt2r.export_arrow('./gt2r.arrow')
gt3l.export_arrow('./gt3l.arrow')
gt3r.export_arrow('./gt3r.arrow')


OSError: [Errno 14] Bad address

In [None]:
gt1l