# Argovis explore vertical structure of ocean fields

This notebook contains some examples and is a work in progress: code will transition from here to other notebooks. Public consumers should focus on our educational notebooks, starting with [Intro to Argovis](https://github.com/argovis/demo_notebooks/blob/main/Intro_to_Argovis.ipynb).

In [None]:
from argovisHelpers import helpers as avh
import datetime #, pandas, matplotlib, scipy, numpy
import numpy as np
import matplotlib.pyplot as plt
#from matplotlib import cm
#import cartopy.crs as ccrs

# for the function
import xarray as xr
import dateutil
from Argovis_tasks_helpers import map_lons_lats, interpolate_profiles
import pandas
# 
from Argovis_tasks_helpers import get_route #, map_lons_lats

API_KEY=''

In [None]:
# API call showing collections available for each route

#### parameters the user needs to select
# user selects list of collections to use for the plot

#### for bgc variables we have gridded time mean fields
selection_params = {}
selection_params['collections']  = ['argo', 'grids/glodap',
              ] # we compare argo profiles with a gridded product

# collections = ['cchdo', 'grids/glodap',
#               ] # we compare argo profiles with a gridded product

#### for temperature and salinity only we have a monthly product
# collections = ['argo', 'grids/rg09',
#               ] # we compare argo profiles with a gridded product

######## show list of variables available
for icollection in selection_params['collections']:
    try:
        print(avh.query(icollection+'/vocabulary', options={'parameter': 'data'}, verbose='true',apikey=API_KEY, apiroot=get_route(icollection)) )
    except:
        print('No data parameter for vocabulary query')
    try:
        bfr = avh.query(icollection+'/vocabulary', verbose='true',apikey=API_KEY, apiroot=get_route(icollection))
        print(*bfr[0]['data'],sep=',')
    except:
        print('Needs data parameter for vocabulary query')
    
#### params varying with collection
# define name of variable of interest for each collection (first show list of all variables available for each collection)
selection_params['varnames']     = ['doxy', 'oxygen']
selection_params['varnames_qc']  = [',1', ''] # argoqc = 1 is best quality
selection_params['vartitle']     = 'Oxygen, umol/kg'
# define name of the variable that includes levels for each collection
selection_params['varname_levels'] = ['pressure',''] # for the gridded product, the level info is in the metadata, i.e. there is no variable in 'data' (for argo, 'pressure' is within the 'data' instead)
####
# varname     = ['doxy', 'oxygen']
# varname_qc  = [',2', ''] # woceqc = 2 is best quality, it corresponds to 'no problem noted' (see https://dmoserv3.whoi.edu/data_docs/GEOTRACES/EPZT/WOCE_QualityFlags.pdf)
# vartitle    = 'Oxygen, umol/kg'


# varname     = ['salinity', 'rg09_salinity']
# varname_qc  = [',1', '']
# vartitle    = 'Salinity, psu'

# varname     = ['temperature', 'rg09_temperature']
# varname_qc  = [',1', '']
# vartitle    = 'Temperature, degC'

#### params varying with the region
# in this example, we will use the 'box' selection (in the next cell),
# hence we indicate here the bottom/left and top/right vertices... 
# the other option is to search in a 'polygon' and indicate the polygon
# vertices in a list (first and last vertex should be the same)
selection_params['regions']     = [
                        [[-145.5,45.5],[-135.5,50.5]],
                        [[-50,45],[-40,50]],
                        ]
selection_params['regions_type'] = ['box', 'box']

selection_params['regions_tags'] = ['Pacific', 'Atlantic']
####

# list of startDate and endDates of interest
selection_params['startDate']    = ['2021-01-01T00:00:00Z']
selection_params['endDate']      = ['2021-12-31T00:00:00Z']

#### other params for the api query
# platform?


#### other params
# levels for vertically integrated profiles
selection_params['interp_levels']= list(range(10,2001))[0::20]


In [None]:
def format_api_output(api_output):
    api_output_formatted = {}
    if api_output:
        # create a list with information for non gridded products
        if 'grids' not in icollection and 'timeseries' not in icollection:
            api_output_formatted['data']     =[x['data'][0] for x in api_output]
            api_output_formatted['level']    =[x['data'][1] for x in api_output]
            api_output_formatted['timestamp']=[dateutil.parser.isoparse(x['timestamp']) for x in api_output]
            api_output_formatted['longitude']=[x['geolocation']['coordinates'][0] for x in api_output]
            api_output_formatted['latitude'] =[x['geolocation']['coordinates'][1] for x in api_output]

            # if interp_levels are provided, then interpolate and create an xarray
            if 'interp_levels' in selection_params.keys():
                interpolated_profiles = []
                for idata in api_output:
                    interpolated_profiles.append(interpolate_profiles(profile=idata,levels_varname=selection_params['varname_levels'][icl],levels_new=selection_params['interp_levels']))
                        
                d = [x['data'] for x in interpolated_profiles]
                d = [[level[selection_params['varnames'][icl]] for level in x] for x in d]
                    
                d_ind = np.array([list(range(1,len(api_output)+1))]*len(selection_params['interp_levels'])).transpose().tolist()
                d_lev = [selection_params['interp_levels']]*len(api_output)

                # create xarray
                data_dict = {'data': np.array(d).flatten().tolist(),'levels': np.array(d_lev).flatten().tolist(),'index':np.array(d_ind).flatten().tolist()
                        } 
                data_df = pandas.DataFrame(data_dict)   
                df_rows = pandas.DataFrame(data_df).set_index(["levels","index"])
                xar     = xr.Dataset.from_dataframe(df_rows)
                xar.assign(longitude=(['index'],np.array(api_output_formatted['longitude'])))
                xar.assign(latitude=(['index'],np.array(api_output_formatted['latitude'])))
                xar.assign(timestamp=(['index'],np.array(api_output_formatted['timestamp'])))

                api_output_formatted['data_xarray'] = xar
    return api_output_formatted

In [None]:
api_output_formatted_list = []

for icl,icollection in enumerate(selection_params['collections']):
    for i,ireg in enumerate(selection_params['regions']):
        for istart,iend in zip(selection_params['startDate'],selection_params['endDate']):
            
            iparam = {}
            iparam = {'data': selection_params['varnames'][icl]+selection_params['varnames_qc'][icl]}
            if ireg:
                iparam[selection_params['regions_type'][i]] = ireg
            if istart:
                iparam['startDate'] = istart
            if iend:
                iparam['endDate']   = iend
            api_output = avh.query(icollection, options=iparam, verbose='true',apikey=API_KEY, apiroot=get_route(icollection)) 
            
            api_output_formatted = format_api_output(api_output)
            
                        
            inprogress
            
            

In [None]:
format_api_output(api_output)

In [None]:
api_output_formatted


In [None]:
api_output_formatted['data_xarray']['data'].plot()

In [None]:
api_output_formatted['timestamp']

In [None]:
map_lons_lats(api_output_formatted['longitude'],api_output_formatted['latitude'],dx=20,dy=20)

In [None]:
inprogress

In [None]:
# get data of interest and do horizontal average
!date
data_reg = get_profiles_in_regions_and_horiz_ave(collections=collections,varname=varname,varname_qc=varname_qc,varname_levels=varname_levels,interp_levels=interp_levels,regions_list_source=regions_list,regions_list_source_type=regions_list_type,regions_list_source_tags=regions_list_tags,startDate=startDate,endDate=endDate,API_KEY=API_KEY)         
!date    

In [None]:
# plot the map for one timestep/level for each of the gridded products
for i,iaxr in enumerate(data_reg['regions_list_data_raw_xarray']):
    if iaxr:
        print(iaxr.sizes)
        print(data_reg['regions_list_tags'][i]+', '+data_reg['regions_list_collections'][i])
        plt.figure()
        iaxr['data'][:,:,0,0].plot()

In [None]:
len(data_reg['regions_list_data_horiz_ave'])

In [None]:
cols = ['k','r','b','m']

In [None]:
# plot the horizontal average for (vertically) interpolated profiles and for the gridded product
profiles_in_regions_and_horiz_ave_plot1d_horiz_ave(data_reg=data_reg,data_reg_cols=cols,xlabel_tag=vartitle)


In [None]:
# let's look at all the raw profiles that were vertically interpolated (except for the gridded products) to then compute the horizontal average above
profiles_in_regions_and_horiz_ave_plot1d_all(data_reg=data_reg,data_reg_cols=cols,xlabel_tag=vartitle)


In [None]:
# let's look at all the vertically interpolated profiles that were used to compute the horizontal average above
profiles_in_regions_and_horiz_ave_plot1d_all_vert_interp(data_reg=data_reg,data_reg_cols=cols,xlabel_tag=vartitle)
    

In [None]:
#https://sites.google.com/view/paztronomer/blog/basic/python-colors
month_groups_cols= ['dodgerblue', 'violet', 'orangered', 'gold']
month_groups     = [[12, 1, 2], [3, 4, 5], [6, 7, 8], [9, 10, 11]]
month_groups_tags= ['DJF', 'MAM', 'JJA', 'SON']
profiles_in_regions_and_horiz_ave_plot1d_all_col_by_monthgroup(data_reg=data_reg,month_groups=month_groups,month_groups_cols=month_groups_cols,month_groups_tags=month_groups_tags,xlabel_tag=vartitle)


In [None]:
# bin in time? platform history? woceline? easyocean?