In [1]:
import h5py
import numpy as np
import pandas as pd
from astropy.time import Time
from tqdm import tqdm
import os
import vaex

In [2]:
def gps2dyr(time):
    """Converts GPS time to datetime (can also do decimal years)."""
    return Time(time, format='gps').datetime

In [3]:
def read_atl06(fname, cycle):
    """Read one ATL06 file and output 6 reduced files. 
    
    Extract variables of interest and separate the ATL06 file 
    into each beam (ground track) and ascending/descending orbits.
    """

    # Each beam is a group
    group = ['/gt1l', '/gt1r', '/gt2l', '/gt2r', '/gt3l', '/gt3r']
    
    # Loop trough beams
    dataframes = []  # one dataframe per track
    
    with h5py.File(fname, 'r') as fi:
        # Check which ground tracks are present in this file
        gtracks = sorted(['/'+k for k in fi.keys() if k.startswith('gt')])
    
        for k, g in enumerate(gtracks): 
            # Read in data for a single beam
            data = {}
            # this is our unique key (per beam)
            data['id'] = fi[g+'/land_ice_segments/segment_id'][:]
            npts = len(data['id'])
            # Load vars into memory (include as many as you want)
            data['lat'] = fi[g+'/land_ice_segments/latitude'][:]
            data['lon'] = fi[g+'/land_ice_segments/longitude'][:]
            
            data['slope_y'] = fi[g+'/land_ice_segments/fit_statistics/dh_fit_dy'][:]
            data['slope_x'] = fi[g+'/land_ice_segments/fit_statistics/dh_fit_dx'][:]
            data['slope_x_sigma'] = fi[g+'/land_ice_segments/fit_statistics/dh_fit_dx_sigma'][:]

            data['h_li'] = fi[g+'/land_ice_segments/h_li'][:]
            data['s_li'] = fi[g+'/land_ice_segments/h_li_sigma'][:]
            data['q_flag'] = fi[g+'/land_ice_segments/atl06_quality_summary'][:]
            data['s_fg'] = fi[g+'/land_ice_segments/fit_statistics/signal_selection_source'][:]
            data['snr'] = fi[g+'/land_ice_segments/fit_statistics/snr_significance'][:]
            data['h_rb'] = fi[g+'/land_ice_segments/fit_statistics/h_robust_sprd'][:]
            data['bsnow_conf'] = fi[g+'/land_ice_segments/geophysical/bsnow_conf'][:]
            
            data['cloud_flg_asr'] = fi[g+'/land_ice_segments/geophysical/cloud_flg_asr'][:]
            data['cloud_flg_atm'] = fi[g+'/land_ice_segments/geophysical/cloud_flg_atm'][:]
            data['msw_flag'] = fi[g+'/land_ice_segments/geophysical/msw_flag'][:]
            data['fbsnow_h'] = fi[g+'/land_ice_segments/geophysical/bsnow_h'][:]
            data['bsnow_od'] = fi[g+'/land_ice_segments/geophysical/bsnow_od'][:]
            data['layer_flag'] = fi[g+'/land_ice_segments/geophysical/layer_flag'][:]
            data['bckgrd'] = fi[g+'/land_ice_segments/geophysical/bckgrd'][:]
            data['e_bckgrd'] = fi[g+'/land_ice_segments/geophysical/e_bckgrd'][:]
            data['n_fit_photons'] = fi[g+'/land_ice_segments/fit_statistics/n_fit_photons'][:]
            data['w_surface_window_final'] = fi[g+'/land_ice_segments/fit_statistics/w_surface_window_final'][:]
            
            delta_t = fi[g+'/land_ice_segments/delta_time'][:]     # for time conversion
            t_ref = fi['/ancillary_data/atlas_sdp_gps_epoch'][:]     # single value
            
            # Time in GPS seconds (secs since Jan 5, 1980)
            t_gps = t_ref + delta_t

            # GPS sec to datetime
            data['t_year'] = gps2dyr(t_gps)
            data['cycle'] = np.ones(npts, dtype=np.int8)*cycle
            data['track'] = np.repeat(g[1:], npts)
                
            # Make a dataframe out of our data dict and store it.
            dataframes.append(pd.DataFrame.from_dict(data))
        
    return dataframes

In [4]:
pwd

'/home/jupyter/iceDivides'

In [5]:
seed = read_atl06('./ATL06_20190329071316_13870211_004_01.h5', cycle=int(4))

In [6]:
Pgt1l = seed[0].iloc[:2]
Pgt1r = seed[1].iloc[:2]
Pgt2l = seed[2].iloc[:2]
Pgt2r = seed[3].iloc[:2]
Pgt3l = seed[4].iloc[:2]
Pgt3r = seed[5].iloc[:2]

In [7]:
Pgt1l

Unnamed: 0,id,lat,lon,slope_y,slope_x,slope_x_sigma,h_li,s_li,q_flag,s_fg,...,fbsnow_h,bsnow_od,layer_flag,bckgrd,e_bckgrd,n_fit_photons,w_surface_window_final,t_year,cycle,track
0,1443587,-79.006042,176.344895,0.003497,0.045053,0.00083,-19.586994,0.012193,0,0,...,29.979246,0.268619,1,28285.419922,0.0,522,3.0,2019-03-29 07:13:52.971019,4,gt1l
1,1443588,-79.006216,176.344717,0.004264,0.050562,0.000883,-18.607821,0.012879,0,0,...,29.979246,0.268275,1,25933.798828,0.0,541,3.0,2019-03-29 07:13:52.973831,4,gt1l


In [8]:
gt1l = vaex.from_pandas(Pgt1l, copy_index=False)
gt1r = vaex.from_pandas(Pgt1r, copy_index=False)
gt2l = vaex.from_pandas(Pgt2l, copy_index=False)
gt2r = vaex.from_pandas(Pgt2r, copy_index=False)
gt3l = vaex.from_pandas(Pgt3l, copy_index=False)
gt3r = vaex.from_pandas(Pgt3r, copy_index=False)

In [9]:
files = os.listdir('./data04/')

In [10]:
for file in tqdm(files):
    dfs = read_atl06('./data04/' + file, cycle=int(4))
    gt1l = gt1l.concat(vaex.from_pandas(dfs[0], copy_index=False))
    gt1r = gt1r.concat(vaex.from_pandas(dfs[1], copy_index=False))
    gt2l = gt2l.concat(vaex.from_pandas(dfs[2], copy_index=False))
    gt2r = gt2r.concat(vaex.from_pandas(dfs[3], copy_index=False))
    gt3l = gt3l.concat(vaex.from_pandas(dfs[4], copy_index=False))
    gt3r = gt3r.concat(vaex.from_pandas(dfs[5], copy_index=False))

100%|██████████| 1183/1183 [1:38:14<00:00,  4.98s/it]


In [11]:
import pyproj

psgs = pyproj.Proj(3976)

In [12]:
df = vaex.concat([gt1l, gt1r, gt2l, gt2r, gt3l, gt3r])

In [15]:
df.lon

Expression = lon
Length: 751,393,194 dtype: float64 (column)
-------------------------------------------
        0  176.345
        1  176.345
        2  131.964
        3  131.964
        4  131.964
       ...        
751393189  122.206
751393190  122.205
751393191  122.205
751393192  122.205
751393193  122.205

In [16]:
lat = df.lat.to_numpy()
lon = df.lon.to_numpy()

In [17]:
x, y = psgs(lon,lat)

In [18]:
df['x'], df['y'] = x, y

In [19]:
df

#,id,lat,lon,slope_y,slope_x,slope_x_sigma,h_li,s_li,q_flag,s_fg,snr,h_rb,bsnow_conf,cloud_flg_asr,cloud_flg_atm,msw_flag,fbsnow_h,bsnow_od,layer_flag,bckgrd,e_bckgrd,n_fit_photons,w_surface_window_final,t_year,cycle,track,x,y
0,1443587,-79.00604150693235,176.34489505559952,0.0034970236,0.045052722,0.0008298112,-19.586994,0.012193246,0,0,0.0,0.17003135,-1,2,1,3,29.979246,0.26861945,1,28285.42,0.0,522,3.0,2019-03-29 07:13:52.971019000,4,gt1l,76147.62450344954,-1192036.1950727522
1,1443588,-79.00621643747844,176.34471657932238,0.0042643934,0.050562415,0.00088303734,-18.60782,0.01287923,0,0,0.0,0.18802492,-1,2,1,3,29.979246,0.2682749,1,25933.799,0.0,541,3.0,2019-03-29 07:13:52.973831000,4,gt1l,76150.11884253839,-1192016.878556122
2,1443566,-79.00595912587283,131.9638885933059,-0.0078001074,-0.007624898,0.00072996994,2694.2922,0.01342969,0,0,0.0,0.15974869,-1,3,1,1,29.979246,3.5172894,1,31344.887,0.0,349,3.0,2019-08-13 03:39:08.411206000,4,gt1l,888171.4001540921,-798700.0837204859
3,1443567,-79.0061341640174,131.96371306495894,-0.009097683,0.0016801931,0.0006594451,2694.236,0.011044861,0,0,0.0,0.14315589,-1,3,1,1,29.979246,3.508708,1,24857.996,0.0,370,3.0,2019-08-13 03:39:08.413974000,4,gt1l,888159.6225666268,-798684.5713220087
4,1443568,-79.00630916165724,131.9635364478629,-0.0090947375,-0.005134519,0.000681793,2694.2112,0.01374981,0,0,0.0,0.14606725,-1,3,1,1,29.979246,3.5001132,1,18611.582,0.0,351,3.0,2019-08-13 03:39:08.416746000,4,gt1l,888147.8633645977,-798669.045089982
...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...
751393189,1565322,-78.99498722259055,122.20563290468425,-0.0019472371,-0.0039466233,0.000513232,3011.0813,0.010244543,0,0,0.0,0.13470326,6,1,0,4,89.93774,0.13574995,1,30091.52,0.0,526,3.0,2019-09-24 15:38:58.984244000,4,gt3r,1011708.541638234,-637245.7870177798
751393190,1565323,-78.99481244460544,122.20544982005265,-0.0011542421,0.00054058724,0.00054023886,3011.042,0.010337847,0,0,0.0,0.14055006,6,1,0,4,89.93774,0.13408506,1,29101.316,0.0,505,3.0,2019-09-24 15:38:58.987084000,4,gt3r,1011726.7408161714,-637252.734645272
751393191,1565324,-78.9946376773271,122.20526646088268,-0.0013028648,0.0010834709,0.00054830353,3011.047,0.010585114,0,0,0.0,0.14434388,6,1,0,4,89.93774,0.13242358,1,29514.148,0.0,503,3.0,2019-09-24 15:38:58.989918000,4,gt3r,1011744.9421167629,-637259.6766943152
751393192,1565325,-78.99446293594019,122.20508242886729,-0.0017108024,-0.0010359868,0.00060883845,3011.0618,0.011584576,0,0,0.0,0.15835683,6,1,0,4,89.93774,0.1307686,1,29702.807,0.0,518,3.0,2019-09-24 15:38:58.992741000,4,gt3r,1011763.1485659989,-637266.6052466437


In [20]:
df.export_arrow('./atl06_04_11.arrow')

In [9]:
ls

Assimilation.ipynb  Untitled.ipynb  download.py                  utils.py
Climate_bash.ipynb  [0m[01;34m_build[0m/         download_icesat2.ipynb       utils_atl03.py
CloudMask.ipynb     [01;34mdata[0m/           [01;34mexamples[0m/                    utils_atl06.py
ExampleFetch.ipynb  default.nix     mixed_graphical_model.ipynb  utils_cloud.py
IngestATL03.ipynb   des.nix         seed.h5


In [11]:
gt1l.export_arrow('./gt1l.arrow')
gt1r.export_arrow('./gt1r.arrow')
gt2l.export_arrow('./gt2l.arrow')
gt2r.export_arrow('./gt2r.arrow')
gt3l.export_arrow('./gt3l.arrow')
gt3r.export_arrow('./gt3r.arrow')


In [10]:
gt1l

#,id,lat,lon,slope_y,slope_x,slope_x_sigma,h_li,s_li,q_flag,s_fg,snr,h_rb,bsnow_conf,cloud_flg_asr,cloud_flg_atm,msw_flag,fbsnow_h,bsnow_od,layer_flag,bckgrd,e_bckgrd,n_fit_photons,w_surface_window_final,t_year
0,1443576,-79.00265175898602,152.6338938942904,-0.0034930306,-0.0019932939,0.0012799798,2112.0972,0.02197273,0,0,0.0,0.13674001,-1,1,0,0,98.97748,0.15880647,0,3543741.5,1567040.4,138,3.0,2018-12-28 13:08:17.510551000
1,1443577,-79.00282660535785,152.6337131144976,-0.0033072757,-0.005460351,0.0012954295,2111.9941,0.015203893,0,0,0.0,0.12070798,-1,1,0,0,96.86956,0.15470755,0,3428445.2,1567056.9,144,3.0,2018-12-28 13:08:17.513373000
2,1443581,-79.00770226316052,-70.68951907383446,0.014864666,0.008418874,0.0005335565,184.627,0.010601515,0,0,0.0,0.12233275,4,2,1,4,202.5068,0.23907205,0,821368.3,456252.12,397,3.0,2019-02-23 01:20:25.102900000
3,1443582,-79.00787704800452,-70.68970163845202,0.014720231,0.0057043494,0.0005324582,184.77682,0.011338408,0,0,0.0,0.12369104,4,2,1,4,200.39746,0.23566185,0,794943.7,456265.75,381,3.0,2019-02-23 01:20:25.105714000
4,1443583,-79.00805186741138,-70.68988326285287,0.014575872,0.009426143,0.0006639459,184.91608,0.012034703,0,0,0.0,0.14692499,4,2,1,4,198.28993,0.23225461,0,780100.6,456282.53,388,3.0,2019-02-23 01:20:25.108526000
...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...
133682447,1565346,-79.00721045006628,-132.21977068251854,-0.006477784,0.004741233,0.0016565407,1102.3906,0.03348249,0,0,0.0,0.21513973,-1,4,1,2,209.85472,0.6916528,1,494481.06,420435.2,111,3.0,2019-03-28 17:10:59.581486000
133682448,1565347,-79.00703565872891,-132.2199529490756,-0.005692235,0.002207704,0.0018826145,1102.4156,0.029799085,0,0,0.0,0.21348575,-1,4,1,2,209.85472,0.6932627,1,514356.84,420444.66,102,3.0,2019-03-28 17:10:59.584297000
133682449,1565348,-79.00686084266192,-132.22013452710036,-0.0059905816,-0.003254145,0.0019431423,1102.4171,0.04439576,0,0,0.0,0.21254022,-1,4,1,2,209.85472,0.6948735,1,513579.88,420454.1,90,3.0,2019-03-28 17:10:59.587110000
133682450,1565349,-79.00668598596165,-132.22031501663284,-0.006612603,0.0012488412,0.0021047168,1102.4423,0.037854582,0,0,0.0,0.21960956,-1,4,1,2,209.85472,0.6964865,1,496849.22,420463.56,92,3.0,2019-03-28 17:10:59.589927000
