# Extraction files

## ICESat 2

In [None]:
import glob
import os
import pandas as pd
import geopandas as gpd
from pathlib import Path
import h5py
import re
import numpy as np
import matplotlib.pyplot as plt
import matplotlib.dates as mdates
import time
from astropy.time import Time
import datetime
import cartopy.crs as ccrs
import cartopy.io.img_tiles as cimgt
import statistics
%matplotlib widget
%load_ext autoreload
%autoreload 2

In [None]:
##### load files
## set the directory
data_home = Path('/home/jovyan/ICESat_water_level/extraction/download/')
## list them up and check them
files= list(data_home.glob('*.h5'))
### bounds of Tonle Sap Lake
sp_ex = [103.643, 104.667, 12.375, 13.287]

### Raw files

In [None]:
### I made a function for stacking the information
def alt13_to_df_beams(filename):    
    f = h5py.File(filename, 'r')
    rgt = str(filename).split("_")[5][0:4]
    cycle = int(str(filename).split("_")[5][4:6])
    version = int(str(filename).split("_")[5][6:8])
    ymd = str(filename).split("_")[4][0:8]
    ymd_trans = datetime.datetime(int(ymd[0:4]),int(ymd[4:6]),int(ymd[6:8]))
    date = ymd_trans.strftime("%Y-%m-%d")
    beam_lst = list(f)[2:-1]
    alt13_df = pd.DataFrame()
    for beam in beam_lst:
        f_beam = f[beam]
        lat = f_beam['segment_lat'][:]
        long = f_beam['segment_lon'][:]
        ws = f_beam['ht_water_surf'][:]
        ws_sd = f_beam['stdev_water_surf'][:]
        ws_err = f_beam['err_ht_water_surf'][:]
        ortho = f_beam['ht_ortho'][:]
        wd = f_beam['water_depth'][:]
        df_beam = pd.DataFrame({'Beam': beam ,'RGT':rgt,'Cycle': cycle, 'Date':date, 'Date_num':int(ymd),'Ver.':version,
                                 'Latitude':lat,'Longitude':long,'SurfaceH':ws,
                                'SH_SD':ws_sd, 'SH_error':ws_err,'OrthoH':ortho,
                                'WaterD':wd})
        alt13_df = alt13_df.append(df_beam, ignore_index = True)
        
    return alt13_df


In [None]:
a_tracks = pd.DataFrame()
for ff in files:
    a_tracks = a_tracks.append(alt13_to_df_beams(ff), ignore_index = True)
a_tracks.sort_values(by=['Date_num'], inplace=True)
print('done')

In [None]:
icesat2 = a_tracks

In [None]:
icesat2.to_csv('/home/jovyan/ICESat_water_level/extraction/data/icesat2.csv')

### Without outliers

In [None]:
### outliers
def out_rmv(df,var):
    ### lower (25%)
    lwq = df[var].quantile(0.25)
    ### upper (75%)
    hwq = df[var].quantile(0.75)

    ### IQR
    iqr = hwq - lwq

    lw_out = lwq-1.5*iqr
    hw_out = hwq+1.5*iqr

    ###LSHoutlier > q0.75 + 1.5 ∗ IQR or LSHoutlier < q0.25 − 1.5 ∗ IQR
    return df.loc[(df[var] >= lw_out) & (df[var] <= hw_out)]

In [None]:
### I made a function for stacking the information
def alt13_a_out(db,var2):
    alt13_df = pd.DataFrame()
    date_list = db['Date_num'].unique()
    for prd in date_list:
        s_db = db.loc[db['Date_num']==prd]
        beam_lst = db['Beam'].unique()
        for beam_type in beam_lst:
            df_select = s_db.loc[s_db['Beam']==beam_type]
            alt13_df = alt13_df.append(out_rmv(df_select,var2), ignore_index = True)
    return alt13_df

In [None]:
icesat2_out=alt13_a_out(icesat2,'SurfaceH')

In [None]:
icesat2_out.to_csv('/home/jovyan/ICESat_water_level/extraction/data/icesat2_out.csv')

### Average

In [None]:
### I made a function for averaginf the height
def alt13_mean(db,var2):
    mean_df_list = pd.DataFrame()
    date_list = db['Date_num'].unique()
    for prd in date_list:
        s_db = db.loc[db['Date_num']==prd]
        av_height = statistics.mean(s_db[var2])
        mean_df = pd.DataFrame({'Av_level':av_height,'Date':s_db['Date'].unique(), 'Date_num':s_db['Date_num'].unique()})
        mean_df_list = mean_df_list.append(mean_df, ignore_index = True)
    return mean_df_list

In [None]:
icesat2_av = alt13_mean(icesat2_out,'SurfaceH')

In [None]:
icesat2_av.to_csv('/home/jovyan/ICESat_water_level/extraction/data/icesat2_av.csv')

## ICESat 1

In [None]:
##### load files
## set the directory
data_home = Path('/home/jovyan/ICESat_water_level/extraction/icesat/')
## list them up and check them
files= list(data_home.glob('*.H5'))
### Spatial Bounds: 
tsl_sp_ex = [103.643, 104.667, 12.375, 13.287]
### load the water mask
tsl_wm = gpd.read_file('/home/jovyan/ICESat_water_level/extraction/shp/tsl_sample_dis.shp') ## water mask: Tonle Sap Lake

In [None]:
def glah14_to_df(filename,bounds):
    ## sp_ex = [103.643, 104.667, 12.375, 13.287]
    ## Bounds are [Longitude_West, Longitude_East, Latitude_South, Latitude_North]
    f = h5py.File(filename, 'r')
    lat = f['Data_40HZ']['Geolocation']['d_lat'][:]
    lon = f['Data_40HZ']['Geolocation']['d_lon'][:]
    elev = f['Data_40HZ']['Elevation_Surfaces']['d_elev'][:]
    sec = f['Data_40HZ']['Elevation_Corrections']['d_satElevCorr'][:]
    scf = f['Data_40HZ']['Quality']['sat_corr_flg'][:]
    satndx = f['Data_40HZ']['Quality']['i_satNdx'][:]
    dem = f['Data_40HZ']['Geophysical']['d_DEM_elv'][:]
    date_gps = f['Data_40HZ']['DS_UTCTime_40'][1] + 630763213 
    date_ymd = Time(date_gps,format='gps').datetime.strftime("%Y-%m-%d")
    
    glah14_df = pd.DataFrame({'Latitude':lat,'Longitude':lon,'Elevation':elev,
                            's_El_Corr':sec, 's_Corr_f':scf,'in_sat':satndx,
                            'DEM':dem,'Date':date_ymd})
    #### Subsetting
    glah14_df_subset = glah14_df.loc[(glah14_df['Longitude']>=bounds[0]) 
                          & (glah14_df['Longitude']<=bounds[1])
                          & (glah14_df['Latitude']>=bounds[2])
                          & (glah14_df['Latitude']<=bounds[3])]
    return glah14_df_subset

In [None]:
test1=glah14_to_df(files[0],tsl_sp_ex)

In [None]:
test2=glah14_to_df(files[9],tsl_sp_ex)

In [None]:
test1.append(test2)


In [None]:
is1_pd = pd.DataFrame()
for kk in range(0,5):
    is1_pd = is1_pd.append(glah14_to_df(files[kk],tsl_sp_ex), ignore_index = True)
    print(kk)
    

In [None]:
print(is1_pd)

In [None]:
#### convert to geoDF and clip it. 
is1_pd = pd.DataFrame()
for num in range(1,len(files)):
    gdf = gpd.GeoDataFrame(glah14_to_df(files[num],tsl_sp_ex))
    gdf.set_geometry(
        gpd.points_from_xy(gdf['Longitude'], gdf['Latitude']),
        inplace=True, crs='EPSG:4326')
    gdf_wm = gpd.clip(gdf, tsl_wm)
    is1_pd = is1_pd.append(gdf_wm)
    print(num)

In [77]:
icesat1 = is1_pd

icesat1.sort_values(by=['Date'], inplace=True)

In [78]:
icesat1.to_csv('/home/jovyan/ICESat_water_level/extraction/data/icesat1.csv')

### remove outliers

In [69]:
### I made a function for stacking the information
def gl14_a_out(db,var2):
    gl14_df = pd.DataFrame()
    date_list = db['Date'].unique()
    for prd in date_list:
        s_db = db.loc[db['Date']==prd]
        gl14_df = gl14_df.append(out_rmv(s_db,var2), ignore_index = True)
    return gl14_df

In [79]:
icesat1_out = gl14_a_out(icesat1,'Elevation')

In [80]:
icesat1_out.to_csv('/home/jovyan/ICESat_water_level/extraction/data/icesat1_out.csv')

### Average values

In [74]:
### I made a function for averaginf the height
def gl14_mean(db,var2):
    mean_df_list = pd.DataFrame()
    date_list = db['Date'].unique()
    for prd in date_list:
        s_db = db.loc[db['Date']==prd]
        av_height = statistics.mean(s_db[var2])
        mean_df = pd.DataFrame({'Av_level':av_height,'Date':s_db['Date'].unique()})
        mean_df_list = mean_df_list.append(mean_df, ignore_index = True)
    return mean_df_list

In [81]:
icesat1_av = gl14_mean(icesat1_out,'Elevation')

In [83]:
icesat1_av.to_csv('/home/jovyan/ICESat_water_level/extraction/data/icesat1_av.csv')