In [1]:
import h5py
import numpy as np
import pandas as pd
from astropy.time import Time
from tqdm import tqdm
import os
import vaex

In [2]:
def gps2dyr(time):
    """Converts GPS time to datetime (can also do decimal years)."""
    return Time(time, format='gps').datetime

In [3]:
def read_atl06(fname, cycle):
    """Read one ATL06 file and output 6 reduced files. 
    
    Extract variables of interest and separate the ATL06 file 
    into each beam (ground track) and ascending/descending orbits.
    """

    # Each beam is a group
    group = ['/gt1l', '/gt1r', '/gt2l', '/gt2r', '/gt3l', '/gt3r']
    
    # Loop trough beams
    dataframes = []  # one dataframe per track
    
    with h5py.File(fname, 'r') as fi:
        # Check which ground tracks are present in this file
        gtracks = sorted(['/'+k for k in fi.keys() if k.startswith('gt')])
    
        for k, g in enumerate(gtracks): 
            # Read in data for a single beam
            data = {}
            # this is our unique key (per beam)
            data['id'] = fi[g+'/land_ice_segments/segment_id'][:]
            npts = len(data['id'])
            # Load vars into memory (include as many as you want)
            data['lat'] = fi[g+'/land_ice_segments/latitude'][:]
            data['lon'] = fi[g+'/land_ice_segments/longitude'][:]
            
            data['slope_y'] = fi[g+'/land_ice_segments/fit_statistics/dh_fit_dy'][:]
            data['slope_x'] = fi[g+'/land_ice_segments/fit_statistics/dh_fit_dx'][:]
            data['slope_x_sigma'] = fi[g+'/land_ice_segments/fit_statistics/dh_fit_dx_sigma'][:]

            data['h_li'] = fi[g+'/land_ice_segments/h_li'][:]
            data['s_li'] = fi[g+'/land_ice_segments/h_li_sigma'][:]
            data['q_flag'] = fi[g+'/land_ice_segments/atl06_quality_summary'][:]
            data['s_fg'] = fi[g+'/land_ice_segments/fit_statistics/signal_selection_source'][:]
            data['snr'] = fi[g+'/land_ice_segments/fit_statistics/snr_significance'][:]
            data['h_rb'] = fi[g+'/land_ice_segments/fit_statistics/h_robust_sprd'][:]
            data['bsnow_conf'] = fi[g+'/land_ice_segments/geophysical/bsnow_conf'][:]
            
            data['cloud_flg_asr'] = fi[g+'/land_ice_segments/geophysical/cloud_flg_asr'][:]
            data['cloud_flg_atm'] = fi[g+'/land_ice_segments/geophysical/cloud_flg_atm'][:]
            data['msw_flag'] = fi[g+'/land_ice_segments/geophysical/msw_flag'][:]
            data['fbsnow_h'] = fi[g+'/land_ice_segments/geophysical/bsnow_h'][:]
            data['bsnow_od'] = fi[g+'/land_ice_segments/geophysical/bsnow_od'][:]
            data['layer_flag'] = fi[g+'/land_ice_segments/geophysical/layer_flag'][:]
            data['bckgrd'] = fi[g+'/land_ice_segments/geophysical/bckgrd'][:]
            data['e_bckgrd'] = fi[g+'/land_ice_segments/geophysical/e_bckgrd'][:]
            data['n_fit_photons'] = fi[g+'/land_ice_segments/fit_statistics/n_fit_photons'][:]
            data['w_surface_window_final'] = fi[g+'/land_ice_segments/fit_statistics/w_surface_window_final'][:]
            
            delta_t = fi[g+'/land_ice_segments/delta_time'][:]     # for time conversion
            t_ref = fi['/ancillary_data/atlas_sdp_gps_epoch'][:]     # single value
            
            # Time in GPS seconds (secs since Jan 5, 1980)
            t_gps = t_ref + delta_t

            # GPS sec to datetime
            data['t_year'] = gps2dyr(t_gps)
            data['cycle'] = np.ones(npts, dtype=np.int8)*cycle
            data['track'] = np.repeat(g[1:], npts)
                
            # Make a dataframe out of our data dict and store it.
            dataframes.append(pd.DataFrame.from_dict(data))
        
    return dataframes

In [4]:
pwd

'/home/jupyter/iceDivides'

In [5]:
seed = read_atl06('./ATL06_20190329071316_13870211_004_01.h5',cycle=int(2))

In [10]:
test = h5py.File('./ATL06_20190329071316_13870211_004_01.h5')

In [13]:
test.keys()

<KeysViewHDF5 ['METADATA', 'ancillary_data', 'gt1l', 'gt1r', 'gt2l', 'gt2r', 'gt3l', 'gt3r', 'orbit_info', 'quality_assessment']>

In [8]:
!du -csh ./seed.h5

6.5M	./seed.h5
6.5M	total


In [9]:
seed[1]

Unnamed: 0,id,lat,lon,slope_y,slope_x,slope_x_sigma,h_li,s_li,q_flag,s_fg,...,fbsnow_h,bsnow_od,layer_flag,bckgrd,e_bckgrd,n_fit_photons,w_surface_window_final,t_year,cycle,track
0,1443587,-79.005890,176.340839,0.003497,0.047339,0.001724,-19.894785,0.024615,0,0,...,29.979246,0.268619,1,9655.979492,0.00000,127,3.0,2019-03-29 07:13:53.331979,2,gt1r
1,1443588,-79.006065,176.340659,0.004264,0.046678,0.001681,-18.983292,0.029364,0,0,...,29.979246,0.268275,1,10321.526367,0.00000,135,3.0,2019-03-29 07:13:53.334803,2,gt1r
2,1443589,-79.006240,176.340477,0.004327,0.053177,0.001822,-17.971397,0.020860,0,0,...,29.979246,0.267931,1,10114.483398,0.00000,141,3.0,2019-03-29 07:13:53.337636,2,gt1r
3,1443590,-79.006415,176.340294,0.003481,0.057083,0.002062,-16.854248,0.028489,0,0,...,29.979246,0.267589,1,7262.786133,0.00000,135,3.0,2019-03-29 07:13:53.340472,2,gt1r
4,1443591,-79.006590,176.340111,0.003538,0.052252,0.001790,-15.775147,0.030507,0,0,...,29.979246,0.267246,1,7116.794434,0.00000,126,3.0,2019-03-29 07:13:53.343306,2,gt1r
...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...
117375,1565332,-79.006675,15.232945,-0.002697,-0.003359,0.000917,3086.250732,0.019657,0,0,...,29.979246,0.008732,0,486649.375000,322994.62500,126,3.0,2019-03-29 07:19:36.446224,2,gt1r
117376,1565333,-79.006500,15.232763,-0.002456,0.001718,0.000857,3086.243652,0.016813,0,0,...,29.979246,0.008707,0,497868.093750,323002.59375,135,3.0,2019-03-29 07:19:36.449031,2,gt1r
117377,1565334,-79.006325,15.232582,-0.003366,0.003669,0.000857,3086.329102,0.014059,0,0,...,29.979246,0.008682,0,508122.000000,323010.56250,135,3.0,2019-03-29 07:19:36.451840,2,gt1r
117378,1565335,-79.006150,15.232403,-0.003827,0.010012,0.001056,3086.454346,0.019259,0,0,...,29.979246,0.008658,0,490499.656250,323017.65625,143,3.0,2019-03-29 07:19:36.454650,2,gt1r


In [6]:
Pgt1l = seed[0].iloc[:2]
Pgt1r = seed[1].iloc[:2]
Pgt2l = seed[2].iloc[:2]
Pgt2r = seed[3].iloc[:2]
Pgt3l = seed[4].iloc[:2]
Pgt3r = seed[5].iloc[:2]

In [7]:
Pgt1l

Unnamed: 0,id,lat,lon,slope_y,slope_x,slope_x_sigma,h_li,s_li,q_flag,s_fg,...,fbsnow_h,bsnow_od,layer_flag,bckgrd,e_bckgrd,n_fit_photons,w_surface_window_final,t_year,cycle,track
0,1443587,-79.006042,176.344895,0.003497,0.045053,0.00083,-19.586994,0.012193,0,0,...,29.979246,0.268619,1,28285.419922,0.0,522,3.0,2019-03-29 07:13:52.971019,2,gt1l
1,1443588,-79.006216,176.344717,0.004264,0.050562,0.000883,-18.607821,0.012879,0,0,...,29.979246,0.268275,1,25933.798828,0.0,541,3.0,2019-03-29 07:13:52.973831,2,gt1l


In [8]:
gt1l = vaex.from_pandas(Pgt1l, copy_index=False)
gt1r = vaex.from_pandas(Pgt1r, copy_index=False)
gt2l = vaex.from_pandas(Pgt2l, copy_index=False)
gt2r = vaex.from_pandas(Pgt2r, copy_index=False)
gt3l = vaex.from_pandas(Pgt3l, copy_index=False)
gt3r = vaex.from_pandas(Pgt3r, copy_index=False)

In [9]:
files = os.listdir('./data02/')

In [10]:
for file in tqdm(files):
    dfs = read_atl06('./data02/' + file, cycle=int(2))
    gt1l = gt1l.concat(vaex.from_pandas(dfs[0], copy_index=False))
    gt1r = gt1r.concat(vaex.from_pandas(dfs[1], copy_index=False))
    gt2l = gt2l.concat(vaex.from_pandas(dfs[2], copy_index=False))
    gt2r = gt2r.concat(vaex.from_pandas(dfs[3], copy_index=False))
    gt3l = gt3l.concat(vaex.from_pandas(dfs[4], copy_index=False))
    gt3r = gt3r.concat(vaex.from_pandas(dfs[5], copy_index=False))

100%|██████████| 1327/1327 [1:37:34<00:00,  4.41s/it]


In [11]:
df = vaex.concat([gt1l, gt1r, gt2l, gt2r, gt3l, gt3r])

In [12]:
df

#,id,lat,lon,slope_y,slope_x,slope_x_sigma,h_li,s_li,q_flag,s_fg,snr,h_rb,bsnow_conf,cloud_flg_asr,cloud_flg_atm,msw_flag,fbsnow_h,bsnow_od,layer_flag,bckgrd,e_bckgrd,n_fit_photons,w_surface_window_final,t_year,cycle,track
0,1443587,-79.00604150693235,176.34489505559952,0.0034970236,0.045052722,0.0008298112,-19.586994,0.012193246,0,0,0.0,0.17003135,-1,2,1,3,29.979246,0.26861945,1,28285.42,0.0,522,3.0,2019-03-29 07:13:52.971019000,2,gt1l
1,1443588,-79.00621643747844,176.34471657932238,0.0042643934,0.050562415,0.00088303734,-18.60782,0.01287923,0,0,0.0,0.18802492,-1,2,1,3,29.979246,0.2682749,1,25933.799,0.0,541,3.0,2019-03-29 07:13:52.973831000,2,gt1l
2,1443581,-79.00770226316052,-70.68951907383446,0.014864666,0.008418874,0.0005335565,184.627,0.010601515,0,0,0.0,0.12233275,4,2,1,4,202.5068,0.23907205,0,821368.3,456252.12,397,3.0,2019-02-23 01:20:25.102900000,2,gt1l
3,1443582,-79.00787704800452,-70.68970163845202,0.014720231,0.0057043494,0.0005324582,184.77682,0.011338408,0,0,0.0,0.12369104,4,2,1,4,200.39746,0.23566185,0,794943.7,456265.75,381,3.0,2019-02-23 01:20:25.105714000,2,gt1l
4,1443583,-79.00805186741138,-70.68988326285287,0.014575872,0.009426143,0.0006639459,184.91608,0.012034703,0,0,0.0,0.14692499,4,2,1,4,198.28993,0.23225461,0,780100.6,456282.53,388,3.0,2019-02-23 01:20:25.108526000,2,gt1l
...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...
800941655,1565346,-78.99534064981206,-131.90544493760905,-0.0029634854,0.0021240213,0.009571512,1133.6165,0.15513632,0,0,0.0,0.61787885,6,5,3,4,198.37785,0.4589239,1,428876.75,401975.88,32,3.7072732,2019-03-28 17:10:59.907981000,2,gt3r
800941656,1565347,-78.99516588714636,-131.90562836605906,-0.0045786337,0.015404461,0.004680639,1133.849,0.084664375,0,0,0.0,0.34098354,6,5,3,4,200.48244,0.45847726,1,433235.34,401984.78,38,3.0,2019-03-28 17:10:59.910800000,2,gt3r
800941657,1565348,-78.99499111883044,-131.90581166617216,-0.005145013,-0.009226994,0.009038533,1133.9199,0.13839921,0,0,0.0,0.6033738,6,5,3,4,202.58574,0.45803085,1,441646.28,401993.66,35,3.6202428,2019-03-28 17:10:59.913623000,2,gt3r
800941658,1565349,-78.99481635547642,-131.9059950895626,-0.0058715898,0.010735066,0.012288599,1134.0288,0.31806636,0,0,0.0,0.77089083,6,5,3,4,204.68959,0.45758432,1,461248.1,402002.56,32,4.625345,2019-03-28 17:10:59.916447000,2,gt3r


In [13]:
import pyproj

In [14]:
# lon / lat
psgs = pyproj.Proj(3976)


In [23]:
%time
df['x'] , df['y'] = df.apply(psgs, arguments=[df.lon, df.lat], vectorize=True)

CPU times: user 6 µs, sys: 0 ns, total: 6 µs
Wall time: 12.6 µs


AttributeError: 'Proj' object has no attribute '__name__'

In [24]:
lat = df.lat.to_numpy()

In [25]:
lon = df.lon.to_numpy()

In [26]:
x, y = psgs(lon,lat)

In [32]:
psgs(176.345, -79.006 )

(76145.73032234018, -1192040.8616217363)

In [33]:
psgs(lon[0],lat[0])

(76147.62450344954, -1192036.1950727522)

In [28]:
x[:10], y[:10]

(array([   76147.62450345,    76150.11884254, -1127094.5101001 ,
        -1127077.74092842, -1127060.96166475, -1127044.1691401 ,
        -1127027.36171066, -1127010.54039467, -1126993.69593272,
        -1126976.82405109]),
 array([-1192036.19507275, -1192016.87855612,   394934.35917266,
          394924.45104703,   394914.56027988,   394904.69221576,
          394894.8496659 ,   394885.03085838,   394875.25162114,
          394865.51934456]))

In [34]:
df['x'], df['y'] = x, y

In [35]:
df

#,id,lat,lon,slope_y,slope_x,slope_x_sigma,h_li,s_li,q_flag,s_fg,snr,h_rb,bsnow_conf,cloud_flg_asr,cloud_flg_atm,msw_flag,fbsnow_h,bsnow_od,layer_flag,bckgrd,e_bckgrd,n_fit_photons,w_surface_window_final,t_year,cycle,track,x,y
0,1443587,-79.00604150693235,176.34489505559952,0.0034970236,0.045052722,0.0008298112,-19.586994,0.012193246,0,0,0.0,0.17003135,-1,2,1,3,29.979246,0.26861945,1,28285.42,0.0,522,3.0,2019-03-29 07:13:52.971019000,2,gt1l,76147.62450344954,-1192036.1950727522
1,1443588,-79.00621643747844,176.34471657932238,0.0042643934,0.050562415,0.00088303734,-18.60782,0.01287923,0,0,0.0,0.18802492,-1,2,1,3,29.979246,0.2682749,1,25933.799,0.0,541,3.0,2019-03-29 07:13:52.973831000,2,gt1l,76150.11884253839,-1192016.878556122
2,1443581,-79.00770226316052,-70.68951907383446,0.014864666,0.008418874,0.0005335565,184.627,0.010601515,0,0,0.0,0.12233275,4,2,1,4,202.5068,0.23907205,0,821368.3,456252.12,397,3.0,2019-02-23 01:20:25.102900000,2,gt1l,-1127094.5101001023,394934.35917266377
3,1443582,-79.00787704800452,-70.68970163845202,0.014720231,0.0057043494,0.0005324582,184.77682,0.011338408,0,0,0.0,0.12369104,4,2,1,4,200.39746,0.23566185,0,794943.7,456265.75,381,3.0,2019-02-23 01:20:25.105714000,2,gt1l,-1127077.7409284168,394924.45104703243
4,1443583,-79.00805186741138,-70.68988326285287,0.014575872,0.009426143,0.0006639459,184.91608,0.012034703,0,0,0.0,0.14692499,4,2,1,4,198.28993,0.23225461,0,780100.6,456282.53,388,3.0,2019-02-23 01:20:25.108526000,2,gt1l,-1127060.9616647481,394914.5602798831
...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...
800941655,1565346,-78.99534064981206,-131.90544493760905,-0.0029634854,0.0021240213,0.009571512,1133.6165,0.15513632,0,0,0.0,0.61787885,6,5,3,4,198.37785,0.4589239,1,428876.75,401975.88,32,3.7072732,2019-03-28 17:10:59.907981000,2,gt3r,-889849.3438772612,-798568.80904827
800941656,1565347,-78.99516588714636,-131.90562836605906,-0.0045786337,0.015404461,0.004680639,1133.849,0.084664375,0,0,0.0,0.34098354,6,5,3,4,200.48244,0.45847726,1,433235.34,401984.78,38,3.0,2019-03-28 17:10:59.910800000,2,gt3r,-889861.0025500112,-798584.4149609152
800941657,1565348,-78.99499111883044,-131.90581166617216,-0.005145013,-0.009226994,0.009038533,1133.9199,0.13839921,0,0,0.0,0.6033738,6,5,3,4,202.58574,0.45803085,1,441646.28,401993.66,35,3.6202428,2019-03-28 17:10:59.913623000,2,gt3r,-889872.6633843429,-798600.0193792139
800941658,1565349,-78.99481635547642,-131.9059950895626,-0.0058715898,0.010735066,0.012288599,1134.0288,0.31806636,0,0,0.0,0.77089083,6,5,3,4,204.68959,0.45758432,1,461248.1,402002.56,32,4.625345,2019-03-28 17:10:59.916447000,2,gt3r,-889884.322010054,-798615.6254363636


In [37]:
df.export_arrow('./atl06_02_11.arrow')

In [38]:
!df -h

Filesystem                                   Size  Used Avail Use% Mounted on
udev                                         252G     0  252G   0% /dev
tmpfs                                         51G  2.4M   51G   1% /run
/dev/mapper/ubuntu--vg-root                  2.4T  2.2T  151G  94% /
tmpfs                                        252G   80K  252G   1% /dev/shm
tmpfs                                        5.0M     0  5.0M   0% /run/lock
tmpfs                                        252G     0  252G   0% /sys/fs/cgroup
/dev/nvme0n1p2                               473M  157M  292M  35% /boot
/dev/nvme0n1p1                               511M  6.7M  505M   2% /boot/efi
tmpfs                                         51G   24K   51G   1% /run/user/120
198.59.51.52:/volume2/Bhaltos2                44T   31T   13T  71% /home/lihu9680/Bhaltos2
tmpfs                                         51G     0   51G   0% /run/user/30039
tmpfs                                         51G   24K   51G   1% /r

In [11]:
gt1l.export_arrow('./gt1l.arrow')
gt1r.export_arrow('./gt1r.arrow')
gt2l.export_arrow('./gt2l.arrow')
gt2r.export_arrow('./gt2r.arrow')
gt3l.export_arrow('./gt3l.arrow')
gt3r.export_arrow('./gt3r.arrow')


In [10]:
gt1l

#,id,lat,lon,slope_y,slope_x,slope_x_sigma,h_li,s_li,q_flag,s_fg,snr,h_rb,bsnow_conf,cloud_flg_asr,cloud_flg_atm,msw_flag,fbsnow_h,bsnow_od,layer_flag,bckgrd,e_bckgrd,n_fit_photons,w_surface_window_final,t_year
0,1443576,-79.00265175898602,152.6338938942904,-0.0034930306,-0.0019932939,0.0012799798,2112.0972,0.02197273,0,0,0.0,0.13674001,-1,1,0,0,98.97748,0.15880647,0,3543741.5,1567040.4,138,3.0,2018-12-28 13:08:17.510551000
1,1443577,-79.00282660535785,152.6337131144976,-0.0033072757,-0.005460351,0.0012954295,2111.9941,0.015203893,0,0,0.0,0.12070798,-1,1,0,0,96.86956,0.15470755,0,3428445.2,1567056.9,144,3.0,2018-12-28 13:08:17.513373000
2,1443581,-79.00770226316052,-70.68951907383446,0.014864666,0.008418874,0.0005335565,184.627,0.010601515,0,0,0.0,0.12233275,4,2,1,4,202.5068,0.23907205,0,821368.3,456252.12,397,3.0,2019-02-23 01:20:25.102900000
3,1443582,-79.00787704800452,-70.68970163845202,0.014720231,0.0057043494,0.0005324582,184.77682,0.011338408,0,0,0.0,0.12369104,4,2,1,4,200.39746,0.23566185,0,794943.7,456265.75,381,3.0,2019-02-23 01:20:25.105714000
4,1443583,-79.00805186741138,-70.68988326285287,0.014575872,0.009426143,0.0006639459,184.91608,0.012034703,0,0,0.0,0.14692499,4,2,1,4,198.28993,0.23225461,0,780100.6,456282.53,388,3.0,2019-02-23 01:20:25.108526000
...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...
133682447,1565346,-79.00721045006628,-132.21977068251854,-0.006477784,0.004741233,0.0016565407,1102.3906,0.03348249,0,0,0.0,0.21513973,-1,4,1,2,209.85472,0.6916528,1,494481.06,420435.2,111,3.0,2019-03-28 17:10:59.581486000
133682448,1565347,-79.00703565872891,-132.2199529490756,-0.005692235,0.002207704,0.0018826145,1102.4156,0.029799085,0,0,0.0,0.21348575,-1,4,1,2,209.85472,0.6932627,1,514356.84,420444.66,102,3.0,2019-03-28 17:10:59.584297000
133682449,1565348,-79.00686084266192,-132.22013452710036,-0.0059905816,-0.003254145,0.0019431423,1102.4171,0.04439576,0,0,0.0,0.21254022,-1,4,1,2,209.85472,0.6948735,1,513579.88,420454.1,90,3.0,2019-03-28 17:10:59.587110000
133682450,1565349,-79.00668598596165,-132.22031501663284,-0.006612603,0.0012488412,0.0021047168,1102.4423,0.037854582,0,0,0.0,0.21960956,-1,4,1,2,209.85472,0.6964865,1,496849.22,420463.56,92,3.0,2019-03-28 17:10:59.589927000
