
# assign a biome to each float profile
# save as netcdf


In [1]:

import sys
sys.path.append('/global/homes/c/cnissen/scripts/seawater-3.3.4/seawater/')
import os
import numpy as np
import xarray as xr
import cartopy
import cartopy.crs as ccrs
import matplotlib.pyplot as plt
from matplotlib import cm
import seawater
#from seawater import dist
#import seawater as sw
import matplotlib.path as mpath
from cartopy.util import add_cyclic_point
import matplotlib.gridspec as gridspec
import matplotlib.ticker as mticker
from cartopy.mpl.ticker import (LongitudeFormatter, LatitudeFormatter,
                                LatitudeLocator)
import random
from numba import njit
from math import sin, cos, sqrt, atan2, radians
from netCDF4 import Dataset
import time 
from tqdm import tqdm


In [2]:
#-----
# saving plots
#-----

#savepath     = '/project/projectdirs/m4003/cnissen/Plots/E3SM_floats/map_initial_positions/'
## check existence of paths
#if not os.path.exists(savepath):
#    print ('Created '+savepath)
#    os.makedirs(savepath)
    

In [3]:
#----
# FUNCTIONS
#----

@njit
def get_closest_grid_point_vector(lon_point, lat_point, lon2, lat2):  
    # PROVIDE LIST OF LOCATIONS TO FUNCTION!
    # in all nodes in mesh, return the index of the closest node to lon_point/lat_point
    # lon2 & lat2 are the locations in the new mesh (to be redistributed to)
    # lon2 & lat2 should be in radians
    # numpy needs to be imported outside the function
    
    #from math import sin, cos, sqrt, atan2, radians
    #import numpy as np
    # approximate radius of earth in km
    R = 6373.0
    
    #lat2 = radians(mesh.y2) # all positions in mesh
    #lon2 = radians(mesh.x2)
    #lat2 = [radians(x) for x in mesh.y2]
    #lon2 = [radians(x) for x in mesh.x2]
    
    index_closest_node    = np.zeros(len(lon_point))
    distance_closest_node = np.zeros(len(lon_point))
    for jj in range(0,len(lon_point)):
        lat1 = radians(lat_point[jj])
        lon1 = radians(lon_point[jj])
        bb1 = cos(lat1)
        
        all_distances = np.zeros(len(lon2))
        for i in range(0,len(lon2)):
            dlon = lon2[i] - lon1
            dlat = lat2[i] - lat1
            a = sin(dlat / 2)**2 + bb1 * cos(lat2[i]) * sin(dlon / 2)**2
            all_distances[i] = 2*R*atan2(sqrt(a), sqrt(1 - a)) # to speed things up, omit constant factors here!
            #all_distances[i] = 2*R*atan2(sqrt(a), sqrt(1 - a)) # correct distance
            #del dlon, dlat, a
        index_closest_node[jj] = np.argmin(all_distances)
        distance_closest_node[jj] = np.min(all_distances)

    return index_closest_node, distance_closest_node


In [4]:
####
# specifics for trajectory output
####

rad_to_deg = 180.0/np.pi
latlim = -45.0

path_mesh = '/global/cfs/cdirs/m4003/maltrud/'
meshID = 'EC30to60E2r2'
meshfile = xr. open_dataset(path_mesh+'ocean.'+meshID+'.210210.nc')
#print(meshfile)

lon  = meshfile['lonCell'].values*rad_to_deg
lat  = meshfile['latCell'].values*rad_to_deg
topo = meshfile['bottomDepth'].values
area = meshfile['areaCell'].values
zlevs            = meshfile['refBottomDepth'].values
layerThickness   = meshfile['layerThickness'].values
restingThickness = meshfile['restingThickness'].values

print(len(lon),'nodes in mesh')
print(topo.shape)
print(area.shape)
print('Min/Max lon:',np.min(lon),np.max(lon))
print('Min/Max lat:',np.min(lat),np.max(lat))
print('layerThickness.shape:',layerThickness.shape)
print('restingThickness.shape:',restingThickness.shape)

meshfile.close()



236853 nodes in mesh
(236853,)
(236853,)
Min/Max lon: 0.0007300572350528742 359.997672445938
Min/Max lat: -78.53259417674468 89.94461290099375
layerThickness.shape: (1, 236853, 60)
restingThickness.shape: (236853, 60)


In [5]:
#----
# load mask of biomes
#----

path_mask = '/global/cfs/cdirs/m4003/cnissen/masks/'
file_mask = 'reccap_mask_regions_e3sm_mesh_EC30to60E2r2_wSubregions.nc'

ff = xr. open_dataset(path_mask+file_mask)
mask_global=ff['mask_e3sm_all_regions'].values.squeeze()
ff.close()

#subareas = ['Atlantic','Pacific','Indian','Arctic','SouthernOcean']
#subareas = ['Atlantic','Pacific','Indian','Arctic','STSS','SPSS','ICE']

print('Min/Max mask_e3sm_all_regions:',np.min(mask_global),np.max(mask_global)) 

print(mask_global.shape,lat.shape,lon.shape)


Min/Max mask_e3sm_all_regions: 0.0 27.0
(236853,) (236853,) (236853,)


In [6]:
#----
# process daily float output
#----
save_netcdf = True

path = '/global/cfs/cdirs/m4003/maltrud/6year/floats/'
year_list = ['0055','0056','0057','0058','0059','0060']

# kick out floats in shallow regions (these are not advected I think)
ind = np.where(zlevs<=2200)[0]

start_at_float = 7000

for yy in range(5,len(year_list)):
    print('Load year '+year_list[yy])
    file1 = 'floats.year'+year_list[yy]+'.nc'   
    data = xr. open_dataset(path+file1)

    lon_1   = data['particleColumnLon'].values*rad_to_deg 
    lat_1   = data['particleColumnLat'].values*rad_to_deg 
    dic_1   = data['particleColumnDIC'].values #[:,0,:]
    #print('lat_all',lat_all.shape)

    # set missing values to NaN (deep ocean layers) 
    if year_list[yy] in ['0059']: # last year has only 363 days, but DIC has 364  
        #lat_1[dic_1[:-1,:,:]==-1]=np.nan
        #lon_1[dic_1[:-1,:,:]==-1]=np.nan
        lat_1[dic_1[:,:,:]==-1]=np.nan
        lon_1[dic_1[:,:,:]==-1]=np.nan
    else: # until year 5
        lat_1[dic_1==-1]=np.nan
        lon_1[dic_1==-1]=np.nan
    dic_1[dic_1==-1]=np.nan 
    
    #print('Reduce to floats in the deep ocean')
    #if yy==0: # only load the first time, re-use ind_deep
    #    aux = np.sum(np.isnan(dic_1[0,ind,:]),axis=0) # check if any of the depth levels shallower than 1100m is NaN
    #    ind_deep = np.where(aux==0)[0] # if it is, aux is >0; only keep those that are 0
    ##print('Floats in the deep ocean:',ind_deep.shape)
    #lon_1   = lon_1[:,:,ind_deep]
    #lat_1   = lat_1[:,:,ind_deep]
    
    
    #----
    # loop over all floats and find 1) closest node in mesh, 2) biome index
    #----

    lat_float = lat_1[:,0,:]
    lon_float = lon_1[:,0,:]
    print(lat_float.shape) 

    ## convert lat/lon on model grid to radians
    #lon_radians = [np.radians(x) for x in lon]
    #lat_radians = [np.radians(x) for x in lat]

    #closest_node = np.nan*np.ones_like(lat_float)
    #biome_index  = np.nan*np.ones_like(lat_float)
    for nn in tqdm(range(start_at_float,lat_float.shape[1])): # loop over floats
        #start_time = time.time()

        # narrow down the region of the float
        #print(np.min(lon_float[:,nn]),np.max(lon_float[:,nn]))
        #print(np.min(lat_float[:,nn]),np.max(lat_float[:,nn]))

        # narrow down the region of the float
        # to speed things up -> in colocation, there is no need to loop over all 200.000 nodes to find the closest one!!

        # d1,d2: define spread of lon/lat for a given float
        #d1 = np.abs((np.max(lon_float[:,nn])-np.min(lon_float[:,nn])))
        #d2 = np.abs((np.max(lat_float[:,nn])-np.min(lat_float[:,nn])))
        # select all locations that 
        #.   are between lat_min-3 and lat_max+3
        #.   are between lon_min-3 and lon_max+3
        dd = 1

        d1 = np.min(lon_float[:,nn])-3 # lon is larger than this
        d2 = np.max(lon_float[:,nn])+3 # lon is smaller than this
        d3 = np.min(lat_float[:,nn])-3
        d4 = np.max(lat_float[:,nn])+3
        #print(d1,d2)
        #print(d3,d4)
        ind_near = np.where((lon>d1) & (lon<d2) & (lat>d3) & (lat<d4))[0]
        #print(ind_near.shape)


        #ind_near = np.where((np.abs(lon-np.min(lon_float[:,nn]))<5) & (np.abs(lat-np.min(lat_float[:,nn]))<5))[0]
        #if (np.max(lat_float[:,nn])-np.min(lat_float[:,nn]))<5:
        #    ind_near = np.where((np.abs(lon-np.min(lon_float[:,nn]))<5) & (np.abs(lat-np.min(lat_float[:,nn]))<5))[0]
        #else:
        #    ind_near = np.where((np.abs(lon-np.min(lon_float[:,nn]))<35) & (np.abs(lat-np.min(lat_float[:,nn]))<35))[0]
        #print(np.min(lon[ind_near]),np.max(lon[ind_near]))
        #print(np.min(lat[ind_near]),np.max(lat[ind_near]))

        # convert lat/lon on model grid to radians
        lon_radians = [np.radians(x) for x in lon[ind_near]]
        lat_radians = [np.radians(x) for x in lat[ind_near]]

        ##-----
        # CORRECTION July, 20: 
        #   the index identified here is relative to lon[ind_near] and lat[ind_near], not relative to the full mesh!!!
        #   i.e., all indices were stored incorrectly in previous version!!
        #-------
        index, distance = get_closest_grid_point_vector(lon_float[:,nn], lat_float[:,nn], lon_radians, lat_radians)
        index = [int(x) for x in index]
        
        # convert to index relative to full mesh
        index_all = np.zeros_like(index)
        for ii in range(0,len(index)):
            index_all[ii] = np.where((lon==lon[ind_near][index[ii]]) & (lat==lat[ind_near][index[ii]]))[0]
        del distance

        if save_netcdf: 
            # save to netcdf file
            saving_directory  = '/global/cfs/cdirs/m4003/cnissen/6year_run/' 
            netcdf_name       = 'Float_positions_colocated_with_biomes_year'+year_list[yy]+'_v2.nc' 

            fv=-999

            if save_netcdf: 
                # check if file exists already
                if not os.path.exists(saving_directory+netcdf_name):
                    print ('Create file '+saving_directory+netcdf_name)
                    w_nc_fid = Dataset(saving_directory+netcdf_name, 'w', format='NETCDF4_CLASSIC')

                    w_nc_fid.source_file = path+file1
                    w_nc_fid.mesh_file = path_mesh+meshID
                    w_nc_fid.script = '/global/homes/c/cnissen/scripts/plot_floats_E3SM_assign_biomes_multi_year_run.ipynb'
                    w_nc_fid.mask_file = path_mask+file_mask

                    w_nc_fid.mask_e3sm_all_regions = "1.NA SPSS, 2.NA STSS, 3.NA STPS, 4.AEQU, 5.SA STPS, 6.MED (not in FM14)" ;
                    w_nc_fid.mask_e3sm_all_regions = "7.IND STPS, 8.(not in FM14)" ;
                    w_nc_fid.mask_e3sm_all_regions = "9.NP SPSS, 10.NP STSS, 11.NP STPS, 12.PEQU-W, 13.PEQU-E, 14.SP STPS" ;
                    w_nc_fid.mask_e3sm_all_regions = "15.ARCTIC ICE (not in FM14), 16.NP ICE, 17.NA ICE, 18.Barents (not in FM14)" ;
                    w_nc_fid.mask_e3sm_all_regions = "19. STSS_Atl, 20. SPSS_Atl, 21. ICE_Atl, 22. STSS_Ind, 23. SPSS_Ind, 24. ICE_Ind, 25. STSS_Pac, 26. SPSS_Pac, 27. ICE_Pac"

                    # create dimension & variable
                    w_nc_fid.createDimension('Time', lat_float.shape[0]) 
                    w_nc_fid.createDimension('nParticles', lat_float.shape[1]) 
                    w_nc_var1 = w_nc_fid.createVariable('closest_node', 'f4',('Time','nParticles'),fill_value=fv)
                    w_nc_var1.description = 'closest node on MPAS-O mesh'
                    w_nc_var2 = w_nc_fid.createVariable('biome_index', 'f4',('Time','nParticles'),fill_value=fv)
                    w_nc_var2.description = 'biome index on MPAS-O mesh'
                    w_nc_fid.close()

                # store data in file
                data1 = np.arange(0,len(mask_global))[ind_near][index]
                data2 = mask_global[ind_near][index] # make sure to hear first reduce to "ind_near" so that indices are correct
                #print(data1)
                #print(lon[data1])
                #print(lon_float[:,nn])
                data1[np.isnan(data1)] = fv
                data2[np.isnan(data2)] = fv
                w_nc_fid = Dataset(saving_directory+netcdf_name, 'r+', format='NETCDF4_CLASSIC')      # Create and open new netcdf file to write to
                w_nc_fid.variables['closest_node'][:,nn] = index_all # CORRECTION JULY 2023, was "index" before!
                w_nc_fid.variables['biome_index'][:,nn]  = data2
                w_nc_fid.close()  
                del data2
                if np.mod(nn,200)==0:
                    print('Saved all days for float',nn+1)

        del index,ind_near
        #end_time = time.time()
        #print('Total time elapsed:',end_time-start_time)   

    
print('done')  



Load year 0060
(364, 10560)


Encountered the use of a type that is scheduled for deprecation: type 'reflected list' found for argument 'lat2' of function 'get_closest_grid_point_vector'.

For more information visit https://numba.readthedocs.io/en/stable/reference/deprecation.html#deprecation-of-reflection-for-list-and-set-types

File "../../../../../tmp/ipykernel_1053735/704263543.py", line 5:
<source missing, REPL/exec in use?>

Encountered the use of a type that is scheduled for deprecation: type 'reflected list' found for argument 'lon2' of function 'get_closest_grid_point_vector'.

For more information visit https://numba.readthedocs.io/en/stable/reference/deprecation.html#deprecation-of-reflection-for-list-and-set-types

File "../../../../../tmp/ipykernel_1053735/704263543.py", line 5:
<source missing, REPL/exec in use?>

  0%|          | 3/3560 [00:02<38:43,  1.53it/s]  

Saved all days for float 7001


  6%|▌         | 203/3560 [01:28<10:15,  5.46it/s]  

Saved all days for float 7201


 11%|█▏        | 401/3560 [02:44<10:39,  4.94it/s]

Saved all days for float 7401


 17%|█▋        | 600/3560 [03:59<11:20,  4.35it/s]

Saved all days for float 7601


 23%|██▎       | 803/3560 [05:08<10:18,  4.46it/s]

Saved all days for float 7801


 28%|██▊       | 1002/3560 [06:14<04:43,  9.03it/s]

Saved all days for float 8001


 34%|███▍      | 1204/3560 [07:32<06:44,  5.83it/s]

Saved all days for float 8201


 39%|███▉      | 1401/3560 [08:43<10:31,  3.42it/s]

Saved all days for float 8401


 45%|████▌     | 1602/3560 [09:59<05:21,  6.08it/s]

Saved all days for float 8601


 51%|█████     | 1802/3560 [11:04<08:56,  3.28it/s]

Saved all days for float 8801


 56%|█████▌    | 2002/3560 [11:56<02:38,  9.82it/s]

Saved all days for float 9001


 62%|██████▏   | 2201/3560 [12:49<06:08,  3.68it/s]

Saved all days for float 9201


 67%|██████▋   | 2402/3560 [14:10<12:10,  1.59it/s]

Saved all days for float 9401


 73%|███████▎  | 2603/3560 [15:06<02:19,  6.87it/s]

Saved all days for float 9601


 79%|███████▊  | 2802/3560 [16:11<04:13,  2.99it/s]

Saved all days for float 9801


 84%|████████▍ | 3004/3560 [17:09<01:19,  6.96it/s]

Saved all days for float 10001


 90%|████████▉ | 3202/3560 [18:12<02:08,  2.78it/s]

Saved all days for float 10201


 96%|█████████▌| 3403/3560 [19:14<00:47,  3.29it/s]

Saved all days for float 10401


100%|██████████| 3560/3560 [20:02<00:00,  2.96it/s]

done





In [7]:
print(len(index))

NameError: name 'index' is not defined