# Initial visualization of GRACE mascons
## Dataset
GSFC mascons: [data](https://earth.gsfc.nasa.gov/geo/data/grace-mascons), [documentation](https://earth.gsfc.nasa.gov/sites/default/files/2022-05/gsfc_mascons_hdf5_format_rl06v2.pdf)

## Environments and packages


In [1]:
grace_filename = 'gsfc.glb_.200204_202211_rl06v2.0_obp-ice6gd.h5'
grace_filepath = '/global/scratch/users/ann_scheliga/'

In [2]:
import numpy as np
import pandas as pd
import h5py

In [3]:
f = h5py.File(grace_filepath + grace_filename,'r')

In [4]:
def h5group_to_df(h5group):
    """
    Input
    -----
    h5group: h5py._hl.group.Group
    
    Output
    ------
    """

In [5]:
# (works) mascon metadata
mascon_cols = list(f['mascon'])       # grab dataset names in mascon group
mascon_cols.remove('location_legend') # remove unused dataset name
mascon_df = pd.DataFrame()            # create empty pd.DataFrame
for key in mascon_cols:               # fill df
    mascon_df[key] = np.array(f['mascon'][key]).T.squeeze()

In [6]:
# (works) WE solutions
cmwe = pd.DataFrame(f['solution']['cmwe'])

In [7]:
# (works) dates
# REFERENCE DATE (aka 2002 Jan 0)
start_date = pd.Timestamp('2001-12-31')

time_cols = list(f['time'])                # grab dataset names in time group
time_cols.remove('list_ref_days_solution') # remove unused dataset name
time_df = pd.DataFrame()                   # create empty pd.DataFrame for reference dates
for key in time_cols[2:-1]:                # fill df with days since reference day
    time_df[key] = np.array(f['time'][key]).T.squeeze()

date_df = time_df.apply(lambda x: pd.to_datetime(x, unit='D',origin=start_date),axis=1)
date_df.columns = ['date_first','date_last','date_middle']
date_df[['year_middle','doy_middle','frac_year_middle']] = pd.DataFrame(f['time']['yyyy_doy_yrplot_middle']).T

In [16]:
uncertainty_cols = list(f['uncertainty'])
uncertainty_df = pd.DataFrame()
for key in uncertainty_cols[:-1]:
#     print(f['uncertainty'][key].shape)
    uncertainty_df[key] = np.array(f['uncertainty'][key]).T.squeeze()
noise_df = pd.DataFrame(f['uncertainty']['noise_2sigma'])

uncertainty_df = pd.concat([uncertainty_df, noise_df],axis=1)

In [18]:
uncertainty_df.describe()

Unnamed: 0,leakage_2sigma,leakage_trend,0,1,2,3,4,5,6,7,...,205,206,207,208,209,210,211,212,213,214
count,41168.0,41168.0,41168.0,41168.0,41168.0,41168.0,41168.0,41168.0,41168.0,41168.0,...,41168.0,41168.0,41168.0,41168.0,41168.0,41168.0,41168.0,41168.0,41168.0,41168.0
mean,1.004,0.000109,1.445508,1.448297,1.133806,1.501971,1.427161,1.314948,1.569741,1.78786,...,1.824345,2.007738,1.996577,1.726733,1.78265,1.617386,2.397713,2.190057,1.531518,1.123155
std,1.050979,0.768421,1.294436,1.296932,1.01531,1.344997,1.278006,1.17752,1.405684,1.601007,...,1.633679,1.797905,1.787911,1.546269,1.596341,1.44835,2.147123,1.96117,1.371456,1.005772
min,0.206016,-28.177664,0.367234,0.367942,0.288045,0.381578,0.362573,0.334065,0.398795,0.454209,...,0.463478,0.510069,0.507234,0.438679,0.452885,0.4109,0.609143,0.556388,0.389085,0.28534
25%,0.343326,-0.051017,0.612442,0.613623,0.480378,0.636364,0.604668,0.557125,0.665077,0.757491,...,0.772949,0.850651,0.845922,0.731592,0.755284,0.685264,1.015877,0.927897,0.648883,0.475865
50%,0.540978,-0.000906,0.917396,0.919165,0.719573,0.953229,0.905751,0.834535,0.99624,1.13467,...,1.157825,1.274216,1.267133,1.095875,1.131363,1.026478,1.521714,1.389925,0.971982,0.712813
75%,1.425697,0.049189,1.944288,1.948038,1.525032,2.020233,1.91961,1.768677,2.111388,2.404769,...,2.453843,2.700518,2.685506,2.32255,2.397761,2.175473,3.225055,2.945747,2.059976,1.510705
max,26.885994,28.599211,30.59905,30.658072,24.000829,31.794263,30.210668,27.835303,33.228852,37.846069,...,38.618392,42.500532,42.264277,36.552107,37.735777,34.237424,50.755657,46.359934,32.419737,23.775357
