### make monthly dataframe of mooring SITs and auxiliary data

In [1]:
from netCDF4 import Dataset
import numpy as np
import datetime
import matplotlib.pyplot as plt
import pandas as pd
from scipy.spatial.kdtree import KDTree
from pyproj import Proj, Transformer
from scipy.stats import linregress
import xarray as xr
import cartopy.crs as ccrs
import os
import glob
import scipy.io
import h5py
import cartopy
import tqdm
import warnings
warnings.filterwarnings("ignore")

  from scipy.spatial.kdtree import KDTree


In [2]:
def lonlat_to_xy(coords_1, coords_2, hemisphere, inverse=False):

    """Converts between longitude/latitude and EASE xy coordinates.
 
    Args:
        lon (float): WGS84 longitude
        lat (float): WGS84 latitude
        hemisphere (string): 'n' or 's'
        inverse (bool): if true, converts xy to lon/lat
 
    Returns:
        tuple: pair of xy or lon/lat values
    """

    EASE_Proj = {'n': 'EPSG:3408',
                 's': 'EPGS:3409'}
    
    WGS_Proj = 'EPSG:4326'
    
    for coords in [coords_1, coords_2]: assert isinstance(coords,(np.ndarray,list))

    if inverse == False: # lonlat to xy
        
        lon, lat = coords_1, coords_2
        
        transformer = Transformer.from_crs(WGS_Proj, EASE_Proj[hemisphere])
        
        x, y = transformer.transform(lat, lon)
        
        return (x, y)

    else: # xy to lonlat
        
        x, y = coords_1, coords_2
        
        transformer = Transformer.from_crs(EASE_Proj[hemisphere], WGS_Proj)
        
        lat, lon = transformer.transform(x, y)
        
        return (lon, lat)

### load auxiliary daily data and combine into monthly dataframe

In [3]:
### daily icetype on EASE grid
datapath = '/Users/carmennab/Dropbox/alpha_retracker/data/'
icetype = Dataset(datapath+'auxiliary/icetype_01012010-30042021.nc')['Ice Type']

### daily snow depth and density on EASE grid
era5_sden = Dataset(datapath+'snow/SMLG_ERA5_01Jan2010-31Jul2021.nc')['Snow Density']
era5_snod = Dataset(datapath+'snow/SMLG_ERA5_01Jan2010-31Jul2021.nc')['Snow Depth']
merra2_sden = Dataset(datapath+'snow/SMLG_MERRA2_01Jan2010-31Jul2021.nc')['Snow Density']
merra2_snod = Dataset(datapath+'snow/SMLG_MERRA2_01Jan2010-31Jul2021.nc')['Snow Depth']

In [4]:
rho_fyi = 916.7  
rho_myi = 882.
rho_fyi_jut = 916.7 * 1.013
rho_myi_jut = 882. * 1.023

In [5]:
### load buoy locations
datapath = '../data/sit/pysiral/'
locs = pd.read_csv(datapath+'locs.csv')
locs

Unnamed: 0,year,month,mooring,lon,lat
0,2010,10,a,-149.97907,75.00092
1,2010,10,b,-149.96996,77.98854
2,2010,10,d,-139.98340,73.99908
3,2010,11,a,-149.97907,75.00092
4,2010,11,b,-149.96996,77.98854
...,...,...,...,...,...
425,2018,12,f14,-6.50100,78.81500
426,2019,1,f14,-6.50100,78.81500
427,2019,2,f14,-6.50100,78.81500
428,2019,3,f14,-6.50100,78.81500


In [6]:
# load ease lons,lats:
ease_lats = np.load('/Users/carmennab/Dropbox/alpha_retracker/data/auxiliary/lat_25km_cent.npy')
ease_lons = np.load('/Users/carmennab/Dropbox/alpha_retracker/data/auxiliary/lon_25km_cent.npy')

# convert into x,y:
ease_x, ease_y = lonlat_to_xy(ease_lons,ease_lats,hemisphere='n')

# create KDTree to find EASE grid cell closest to buoy:
tree = KDTree(list(zip(ease_x.ravel(),ease_y.ravel())))

In [7]:
### combine data into big monthly dataframe
days = np.arange(0,4138)
dates = [datetime.date(2010,1,1)+datetime.timedelta(days=int(days)) for days in days]

years = np.arange(2010,2022)
months = [1,2,3,4,10,11,12]

big_df = {}

for buoy in ['a','b','d','f11','f12','f13','f14']: ### loop through buoy dataframes
    buoy_locs = locs[locs['mooring']==buoy]

    buoy_df = []
    
    for year,month in zip(buoy_locs['year'],buoy_locs['month']):

        monthyear_locs = buoy_locs[(buoy_locs['month']==month) & (buoy_locs['year']==year)]

        ### find nearest EASE-2 grid cell to buoys
        buoy_lon = monthyear_locs['lon'].iloc[0]
        buoy_lat = monthyear_locs['lat'].iloc[0]

        buoy_x,buoy_y = lonlat_to_xy(np.array(buoy_lon),np.array(buoy_lat),hemisphere='n')
        dist, ind = tree.query([buoy_x,buoy_y])

        ind2d = np.unravel_index(ind, (ease_lats.shape[0],ease_lats.shape[1]))

        ease_x = ind2d[0] ; ease_y = ind2d[1]

        ### average daily data over month
        era5_sden_vals = [] ; era5_snod_vals = [] ; merra2_sden_vals = [] ; merra2_snod_vals = [] ; icetype_vals = [] 
        for day in days: 
            date = dates[day]
            if (date.month==month) & (date.year==year):
                era5_sden_vals.append(era5_sden[day])
                era5_snod_vals.append(era5_snod[day])
                merra2_sden_vals.append(merra2_sden[day])
                merra2_snod_vals.append(merra2_snod[day])
                icetype_vals.append(icetype[day])

        ### calculate monthly averages, on ease grid
        mean_era5_snod = np.nanmean(era5_snod_vals,axis=0)
        mean_era5_sden = np.nanmean(era5_sden_vals,axis=0)
        mean_merra2_snod = np.nanmean(merra2_snod_vals,axis=0)
        mean_merra2_sden = np.nanmean(merra2_sden_vals,axis=0)
        mean_icetype = np.round(np.nanmean(icetype_vals,axis=0),decimals=0)

        if mean_icetype[ease_x,ease_y] == 2:
            rho_i = rho_fyi
            rho_i_jut = rho_fyi_jut
        elif mean_icetype[ease_x,ease_y] == 3:
            rho_i = rho_myi
            rho_i_jut = rho_myi_jut
        elif np.isnan(mean_icetype[ease_x,ease_y]):
            rho_i = np.nan
            rho_i_jut = np.nan
        else:
            raise
            
        mean_era5_snod_90 = mean_era5_snod*0.9
        mean_era5_snod_110 = mean_era5_snod*1.1

        ### add monthly means to dataframe
        buoy_df.append({"year":year,"month":month,"mean_era5_sden":mean_era5_sden[ease_x,ease_y],
                        "mean_era5_snod":mean_era5_snod[ease_x,ease_y],"mean_era5_snod_90":mean_era5_snod_90[ease_x,ease_y],
                        "mean_era5_snod_110":mean_era5_snod_110[ease_x,ease_y],"mean_merra2_sden":mean_merra2_sden[ease_x,ease_y],
                        "mean_merra2_snod":mean_merra2_snod[ease_x,ease_y],"mean_ice_density":rho_i,"mean_ice_density_jut":rho_i_jut})

    df = pd.DataFrame(buoy_df)

    big_df[buoy] = df

In [8]:
big_df['a']

Unnamed: 0,year,month,mean_era5_sden,mean_era5_snod,mean_era5_snod_90,mean_era5_snod_110,mean_merra2_sden,mean_merra2_snod,mean_ice_density,mean_ice_density_jut
0,2010,10,192.442490,0.027158,0.024442,0.029874,179.003555,0.027081,916.7,928.6171
1,2010,11,266.800446,0.066308,0.059677,0.072939,264.380249,0.066523,916.7,928.6171
2,2010,12,260.804382,0.110834,0.099750,0.121917,263.062378,0.120450,916.7,928.6171
3,2011,1,256.055206,0.099190,0.089271,0.109109,282.026672,0.107682,916.7,928.6171
4,2011,2,249.614227,0.112708,0.101437,0.123979,294.701080,0.122424,916.7,928.6171
...,...,...,...,...,...,...,...,...,...,...
72,2020,12,298.634949,0.096556,0.086900,0.106211,309.483734,0.137395,916.7,928.6171
73,2021,1,315.006317,0.102995,0.092695,0.113294,343.688416,0.133642,,
74,2021,2,277.479034,0.183921,0.165529,0.202313,338.958588,0.228458,,
75,2021,3,292.335663,0.132713,0.119442,0.145985,369.107483,0.173123,,


### load BGEP daily drafts and add monthly mean to big_df

In [9]:
draft_df = {}

for buoy in ['a','b','d']:
    years = np.arange(10,19)
    bgep_dfs = []
    datapath = '../data/validation/BGEP/ULS_'+buoy.upper()+'/'
    
    for year in years:
        file = datapath + 'uls'+str(year)+buoy+'_dailyn.mat'
      ### older files require scipy.io, newer ones h5py
        if year in [10,11]:
            data = scipy.io.loadmat(file)
            drafts = data['IDS'][:,1] # load sea ice draft

            dates = np.array([datetime.datetime.strptime(date,'%Y-%m-%d').date() for date in data['dates']])

            months = np.array([d.month for d in dates])
            years = np.array([d.year for d in dates])
            bgep_df = pd.DataFrame({'mean_draft':drafts,
            'date':dates,
            'year':years,
            'month':months})

            bgep_dfs.append(bgep_df)

        else:
            with h5py.File(file, 'r') as f:
                drafts = np.array(f['IDS'][1]) # load sea ice draft  
                dt = np.array(f['yday'])[0]
                j1 = datetime.date(2000+year,1,1)
                dates = np.array([j1+datetime.timedelta(days=int(d)) for d in dt])
                months = np.array([d.month for d in dates])
                years = np.array([d.year for d in dates])

                bgep_df = pd.DataFrame({'mean_draft':drafts,
                'date':dates,
                'year':years,
                'month':months})

                bgep_dfs.append(bgep_df)
    draft_df[buoy] = pd.DataFrame(np.concatenate(bgep_dfs), columns = ['mean_draft','date','year','month'])

In [10]:
for buoy in ['a','b','d']:
    
    buoy_df = draft_df[buoy]
    
    mean_drafts=[]
    
    for month, year in zip(big_df[buoy]['month'],big_df[buoy]['year']):

        monthyear_data = buoy_df[(buoy_df['month']==month) & (buoy_df['year']==year)]

        mean_drafts.append(np.nanmean(monthyear_data['mean_draft']))

    ### add mean monthly draft to big_df
    big_df[buoy]['mean_draft'] = mean_drafts

In [11]:
big_df[buoy]

Unnamed: 0,year,month,mean_era5_sden,mean_era5_snod,mean_era5_snod_90,mean_era5_snod_110,mean_merra2_sden,mean_merra2_snod,mean_ice_density,mean_ice_density_jut,mean_draft
0,2010,10,173.356171,0.034783,0.031304,0.038261,188.321335,0.039978,916.7,928.6171,0.450911
1,2010,11,291.953522,0.092709,0.083438,0.101979,300.074829,0.101658,916.7,928.6171,0.677667
2,2010,12,270.324280,0.087654,0.078889,0.096420,253.036652,0.114278,882.0,902.2860,0.900402
3,2011,1,238.103745,0.124065,0.111658,0.136472,274.636292,0.134621,882.0,902.2860,1.725921
4,2011,2,265.261169,0.146331,0.131698,0.160964,330.062012,0.151559,916.7,928.6171,1.299694
...,...,...,...,...,...,...,...,...,...,...,...
72,2020,12,316.147552,0.093438,0.084094,0.102781,333.702209,0.116445,916.7,928.6171,0.733917
73,2021,1,327.560669,0.114402,0.102961,0.125842,365.840118,0.147755,,,1.249953
74,2021,2,325.728546,0.189659,0.170693,0.208625,361.593597,0.208959,,,1.593629
75,2021,3,318.451050,0.236274,0.212647,0.259902,355.476349,0.263666,,,1.692710


### load Sumata monthly drafts and to big_df

In [12]:
draft_df = {}

years = np.arange(2010,2020)
months = np.arange(1,13)

datapath = '/Users/carmennab/Dropbox/alpha_retracker/data/validation/Sumata_2023/raw/netcdfs/'

for buoy in ['f11','f12','f13','f14']:
    buoy_drafts = []
    for month, year in zip(big_df[buoy]['month'],big_df[buoy]['year']):

            filepath = f'{datapath}{year}{month:02d}_{buoy.upper()}.nc'

            if os.path.exists(filepath) == True:
                file = Dataset(filepath)
                pdf = file['y_pdf'][:]
                bins = file['x_bin'][:]

                mean_draft = np.average(bins,weights=pdf)

                buoy_drafts.append(mean_draft)
    
    drafts = pd.DataFrame(buoy_drafts)

    big_df[buoy]['mean_draft'] = drafts

In [13]:
big_df[buoy]

Unnamed: 0,year,month,mean_era5_sden,mean_era5_snod,mean_era5_snod_90,mean_era5_snod_110,mean_merra2_sden,mean_merra2_snod,mean_ice_density,mean_ice_density_jut,mean_draft
0,2012,10,315.237305,0.145578,0.13102,0.160135,354.785919,0.167223,882.0,902.286,1.648971
1,2012,11,330.493835,0.144895,0.130406,0.159385,331.112915,0.116225,882.0,902.286,1.860519
2,2012,12,314.382996,0.184811,0.16633,0.203292,305.934692,0.180511,882.0,902.286,1.788335
3,2014,10,421.252594,0.129573,0.116616,0.142531,422.585052,0.167628,882.0,902.286,1.549472
4,2014,11,421.033691,0.321681,0.289513,0.35385,420.980133,0.305018,882.0,902.286,2.058176
5,2014,12,450.855469,0.316936,0.285243,0.34863,429.616547,0.311756,882.0,902.286,1.886259
6,2015,1,408.842224,0.19899,0.179091,0.218889,422.284424,0.210451,882.0,902.286,1.787383
7,2015,2,381.1026,0.277355,0.24962,0.305091,377.162903,0.294981,882.0,902.286,1.930802
8,2015,3,354.64151,0.227589,0.20483,0.250348,404.154022,0.227322,882.0,902.286,2.214197
9,2015,4,381.259216,0.229751,0.206776,0.252727,417.156067,0.220582,916.7,928.6171,1.80377


### calculate mean monthly buoy thickness and add to big_df

In [14]:
rho_w = 1023.9
c = 3.
cs = 2.4

In [16]:
for name in ['era5','merra2','jut','90','110']:
    for buoy in ['a','b','d','f11','f12','f13','f14']:

        data = big_df[buoy]

        if name in ['era5','jut']:
            df_hs = data['mean_era5_snod']
            df_rho_s = data['mean_era5_sden']
        else:
            df_hs = data['mean_merra2_snod']
            df_rho_s = data['mean_merra2_sden']
        if name == 'jut':
            df_rho_i = data['mean_ice_density_jut']
        else:
            df_rho_i = data['mean_ice_density']
            
        if name == '90':
            df_hs = data['mean_era5_snod_90']
        if name == '110':
            df_hs = data['mean_era5_snod_110']
        df_draft = data['mean_draft']

        rows = np.arange(0,len(df_hs))

        Fi = [((df_draft[row]*(rho_w-df_rho_i[row]))-(df_hs[row]*df_rho_s[row])) / df_rho_i[row] for row in rows]

        sit = np.array([((Fi[row]*rho_w) + (df_hs[row]*df_rho_s[row])) / (rho_w - df_rho_i[row])  for row in rows])

        big_df[buoy]['mean_buoy_thickness_'+name] = sit

        filepath = '../data/sit/pysiral/aux_'+buoy+'.csv'
        big_df[buoy].to_csv(filepath, na_rep=np.nan, index=False)

In [17]:
big_df[buoy]

Unnamed: 0,year,month,mean_era5_sden,mean_era5_snod,mean_era5_snod_90,mean_era5_snod_110,mean_merra2_sden,mean_merra2_snod,mean_ice_density,mean_ice_density_jut,mean_draft,mean_buoy_thickness_era5,mean_buoy_thickness_merra2,mean_buoy_thickness_jut,mean_buoy_thickness_90,mean_buoy_thickness_110
0,2012,10,315.237305,0.145578,0.13102,0.160135,354.785919,0.167223,882.0,902.286,1.648971,1.862234,1.846999,1.820365,1.861562,1.84985
1,2012,11,330.493835,0.144895,0.130406,0.159385,331.112915,0.116225,882.0,902.286,1.860519,2.105554,2.116215,2.058215,2.110891,2.100012
2,2012,12,314.382996,0.184811,0.16633,0.203292,305.934692,0.180511,882.0,902.286,1.788335,2.010176,2.013437,1.964981,2.018356,2.005535
3,2014,10,421.252594,0.129573,0.116616,0.142531,422.585052,0.167628,882.0,902.286,1.549472,1.736872,1.718444,1.697822,1.742885,1.730468
4,2014,11,421.033691,0.321681,0.289513,0.35385,420.980133,0.305018,882.0,902.286,2.058176,2.235746,2.243719,2.18548,2.25112,2.220412
5,2014,12,450.855469,0.316936,0.285243,0.34863,429.616547,0.311756,882.0,902.286,1.886259,2.027718,2.037874,1.98213,2.050788,2.019913
6,2015,1,408.842224,0.19899,0.179091,0.218889,422.284424,0.210451,882.0,902.286,1.787383,1.982706,1.974186,1.938129,1.9892,1.970146
7,2015,2,381.1026,0.277355,0.24962,0.305091,377.162903,0.294981,882.0,902.286,1.930802,2.121595,2.115297,2.073896,2.134695,2.110974
8,2015,3,354.64151,0.227589,0.20483,0.250348,404.154022,0.227322,882.0,902.286,2.214197,2.478915,2.466261,2.423182,2.476568,2.45571
9,2015,4,381.259216,0.229751,0.206776,0.252727,417.156067,0.220582,916.7,928.6171,1.80377,1.919151,1.914327,1.894522,1.920609,1.899699


### combine sumata buoys

In [None]:
moorings = ['f11','f12','f13','f14']
### monthly mean data at each mooring

datapath = '/Users/carmennab/Dropbox/alpha_retracker/data/sit/pysiral/'

aux_data = {}

for mooring in moorings:

    aux_data[mooring] =  pd.read_csv('../data/sit/pysiral/aux_'+mooring+'.csv')

In [None]:
unique_months = set(aux_data['f11']['month'])
unique_years = set(aux_data['f11']['year'])

sits = []
for month, year in itertools.product(unique_months,unique_years):
    f11_data = aux_data['f11'][(aux_data['f11']['month']==month) & (aux_data['f11']['year']==year)]
    f12_data = aux_data['f12'][(aux_data['f12']['month']==month) & (aux_data['f12']['year']==year)]
    f13_data = aux_data['f13'][(aux_data['f13']['month']==month) & (aux_data['f13']['year']==year)]
    f14_data = aux_data['f14'][(aux_data['f14']['month']==month) & (aux_data['f14']['year']==year)]

    d = {'year':year,'month':month}
    
    for key in aux_data['f11']:
        if key in ['year','month']: pass
        else:
            values = []
            for data in [f11_data[key],f12_data[key],f13_data[key],f14_data[key]]:
                if len(data)>0:
                    values.append(data.iloc[0])
            value = np.nanmean(values)
            
            d[key] = value
    sits.append(d)
sit_df = pd.DataFrame(sits)

In [None]:
fyi = 916.7
myi = 882.0
lim = (fyi + myi)/2

densities = np.array(sit_df['mean_ice_density'])
densities[densities>=lim] = fyi
densities[densities<lim] = myi

sit_df['mean_ice_density'] = densities
set(sit_df['mean_ice_density'])

In [None]:
fyi_jut = 916.7 * 1.013
myi_jut = 882.0 * 1.023
lim = (fyi_jut + myi_jut)/2

densities = np.array(sit_df['mean_ice_density_jut'])
densities[densities>=lim] = fyi_jut
densities[densities<lim] = myi_jut

sit_df['mean_ice_density_jut'] = densities
set(sit_df['mean_ice_density_jut'])

In [None]:
filepath = '../data/sit/pysiral/aux_sumata.csv'
sit_df.to_csv(filepath, na_rep=np.nan, index=False)