# ANHA4 to ANHA12 grid mapping

Using Scipy griddata for the interpolation, joblib Parallel to split jobs over multiple cores, and xarray to read and write to files.

In [1]:
import numpy as np
import matplotlib.pyplot as plt
import xarray as xr
import netCDF4 as nc
import time
from mpl_toolkits.basemap import Basemap, cm

# Library for running on multiple cores:
from joblib import Parallel

Coordinate files to load, ANHA4:

In [2]:
nc_ANHA4_gridT = nc.Dataset('/scratch/brogalla/ANHA4_files/ANHA4-EXH005_y2002m01d05_gridT.nc')
nc_ANHA4_gridU = nc.Dataset('/scratch/brogalla/ANHA4_files/ANHA4-EXH005_y2002m01d05_gridU.nc')
nc_ANHA4_gridV = nc.Dataset('/scratch/brogalla/ANHA4_files/ANHA4-EXH005_y2002m01d05_gridV.nc')
nc_ANHA4_gridW = nc.Dataset('/scratch/brogalla/ANHA4_files/ANHA4-EXH005_y2002m01d05_gridW.nc')
nc_ANHA4_icemod = nc.Dataset('/scratch/brogalla/ANHA4_files/ANHA4-EXH005_y2002m01d05_icemod.nc')
lon_ANHA4_gridT = np.array(nc_ANHA4_gridT.variables['nav_lon']); lat_ANHA4_gridT = np.array(nc_ANHA4_gridT.variables['nav_lat']);
lon_ANHA4_gridU = np.array(nc_ANHA4_gridU.variables['nav_lon']); lat_ANHA4_gridU = np.array(nc_ANHA4_gridU.variables['nav_lat']);
lon_ANHA4_gridV = np.array(nc_ANHA4_gridV.variables['nav_lon']); lat_ANHA4_gridV = np.array(nc_ANHA4_gridV.variables['nav_lat']);
lon_ANHA4_gridW = np.array(nc_ANHA4_gridW.variables['nav_lon']); lat_ANHA4_gridW = np.array(nc_ANHA4_gridW.variables['nav_lat']);

Coordinate files to load, ANHA12:

In [3]:
nc_ANHA12_gridT = nc.Dataset('/home/brogalla/project/brogalla/GEOTRACES/data/ANHA12/ANHA12-EXH006_5d_gridT_y2002m01d05.nc')
nc_ANHA12_gridU = nc.Dataset('/home/brogalla/project/brogalla/GEOTRACES/data/ANHA12/ANHA12-EXH006_5d_gridU_y2002m01d05.nc')
nc_ANHA12_gridV = nc.Dataset('/home/brogalla/project/brogalla/GEOTRACES/data/ANHA12/ANHA12-EXH006_5d_gridV_y2002m01d05.nc')
nc_ANHA12_gridW = nc.Dataset('/home/brogalla/project/brogalla/GEOTRACES/data/ANHA12/ANHA12-EXH006_5d_gridW_y2002m01d05.nc')
nc_ANHA12_icemod = nc.Dataset('/home/brogalla/project/brogalla/GEOTRACES/data/ANHA12/ANHA12-EXH006_5d_icemod_y2002m01d05.nc')
lon_ANHA12_gridT = np.array(nc_ANHA12_gridT.variables['nav_lon']); lat_ANHA12_gridT = np.array(nc_ANHA12_gridT.variables['nav_lat']);
lon_ANHA12_gridU = np.array(nc_ANHA12_gridU.variables['nav_lon']); lat_ANHA12_gridU = np.array(nc_ANHA12_gridU.variables['nav_lat']);
lon_ANHA12_gridV = np.array(nc_ANHA12_gridV.variables['nav_lon']); lat_ANHA12_gridV = np.array(nc_ANHA12_gridV.variables['nav_lat']);
lon_ANHA12_gridW = np.array(nc_ANHA12_gridW.variables['nav_lon']); lat_ANHA12_gridW = np.array(nc_ANHA12_gridW.variables['nav_lat']);

In [4]:
def interp_np(nav_lon, nav_lat, var_in, lon_ANHA12, lat_ANHA12):
    ''' Interpolate some field to ANHA12 grid.
        The function is based on the bilinear interpolation in scipy, griddata 
        =======================================================================
            nav_lon, nav_lat        : input field lon/lat
            lon_ANHA12, lat_ANHA12  : ANHA12 defined lons/lats
            var_in                  : 2-D model variable
    '''
    from scipy.interpolate import griddata
    
    LatLonPair = (nav_lon, nav_lat)
    var_out = griddata(LatLonPair, var_in, (lon_ANHA12, lat_ANHA12), method='linear')
    return var_out

In [5]:
def interp_gridT_xr(filenameT, lon_ANHA4=lon_ANHA4_gridT, lat_ANHA4=lat_ANHA4_gridT, lon_ANHA12=lon_ANHA12_gridT, lat_ANHA12=lat_ANHA12_gridT):
    # Load file
    file    = xr.open_dataset('/data/brogalla/ANHA4/' + filenameT)
    varT    = file['votemper'].values
    varS    = file['vosaline'].values
    varx    = file['somxl010'].values
    
    # Interpolate ANHA4 variables onto ANHA12 grid:
    ANHA12_votemper = np.empty((1,50,2400,1632))
    ANHA12_vosaline = np.empty((1,50,2400,1632))
    for depth in range(0,50):
        ANHA12_votemper[0,depth,:,:] = interp_np(lon_ANHA4.flatten(), lat_ANHA4.flatten(), varT[0,depth,:,:].flatten(), lon_ANHA12, lat_ANHA12)
        ANHA12_vosaline[0,depth,:,:] = interp_np(lon_ANHA4.flatten(), lat_ANHA4.flatten(), varS[0,depth,:,:].flatten(), lon_ANHA12, lat_ANHA12)
    
    ANHA12_somxl010 = np.empty((1,2400,1632))    
    ANHA12_somxl010[0,:,:] = interp_np(lon_ANHA4.flatten(), lat_ANHA4.flatten(), varx[0,:,:].flatten(), lon_ANHA12, lat_ANHA12)
    
    # Write interpolated values to file:
    file_write = xr.Dataset(
        {'votemper': (("time_counter","deptht","y","x"), ANHA12_votemper),
         'vosaline': (("time_counter","deptht","y","x"), ANHA12_vosaline),
         'somxl010': (("time_counter","y","x"), ANHA12_somxl010)
        }, 
        coords = {
            "time_counter": np.zeros(1),
            "deptht": np.zeros(50),
            "y": np.zeros(2400),
            "x": np.zeros(1632),
        },
    )
    file_write.to_netcdf('/scratch/brogalla/ANHA4_remapped/ANHA4-EXH005_5d_gridT_'+filenameT[13:24]+'.nc')
    return

In [None]:
def interp_gridT_nc(filenameT, lon_ANHA4=lon_ANHA4_gridT, lat_ANHA4=lat_ANHA4_gridT, lon_ANHA12=lon_ANHA12_gridT, lat_ANHA12=lat_ANHA12_gridT):
    # Load file
    file    = nc.Dataset('/data/brogalla/ANHA4/' + filenameT)
    varT    = np.array(file.variables['votemper'])
    varS    = np.array(file.variables['vosaline'])
    varx    = np.array(file.variables['somxl010'])

    # Interpolate ANHA4 variables onto ANHA12 grid:
    ANHA12_votemper = np.empty((1,50,2400,1632))
    ANHA12_vosaline = np.empty((1,50,2400,1632))
    for depth in range(0,50):
        ANHA12_votemper[0,depth,:,:] = interp_np(lon_ANHA4.flatten(), lat_ANHA4.flatten(), varT[0,depth,:,:].flatten(), lon_ANHA12, lat_ANHA12)
        ANHA12_vosaline[0,depth,:,:] = interp_np(lon_ANHA4.flatten(), lat_ANHA4.flatten(), varS[0,depth,:,:].flatten(), lon_ANHA12, lat_ANHA12)
    
    ANHA12_somxl010 = np.empty((1,2400,1632))    
    ANHA12_somxl010[0,:,:] = interp_np(lon_ANHA4.flatten(), lat_ANHA4.flatten(), varx[0,:,:].flatten(), lon_ANHA12, lat_ANHA12)
    
    # Write interpolated values to file:
    file_write = xr.Dataset(
        {'votemper': (("time_counter","deptht","y","x"), ANHA12_votemper),
         'vosaline': (("time_counter","deptht","y","x"), ANHA12_vosaline),
         'somxl010': (("time_counter","y","x"), ANHA12_somxl010)
        }, 
        coords = {
            "time_counter": np.zeros(1),
            "deptht": np.zeros(50),
            "y": np.zeros(2400),
            "x": np.zeros(1632),
        },
    )
    file_write.to_netcdf('/scratch/brogalla/ANHA4_remapped/ANHA4-EXH005_5d_gridT_'+filenameT[13:24]+'.nc')
    return

In [None]:
def interp_gridT_nowrite(filenameT, lon_ANHA4=lon_ANHA4_gridT, lat_ANHA4=lat_ANHA4_gridT, lon_ANHA12=lon_ANHA12_gridT, lat_ANHA12=lat_ANHA12_gridT):
    # Load file
    file    = xr.open_dataset('/data/brogalla/ANHA4/' + filenameT)
    varT    = file['votemper'].values
    varS    = file['vosaline'].values
    varx    = file['somxl010'].values

    # Interpolate ANHA4 variables onto ANHA12 grid:
    ANHA12_votemper = np.empty((1,50,2400,1632))
    ANHA12_vosaline = np.empty((1,50,2400,1632))
    for depth in range(0,50):
        ANHA12_votemper[0,depth,:,:] = interp_np(lon_ANHA4.flatten(), lat_ANHA4.flatten(), varT[0,depth,:,:].flatten(), lon_ANHA12, lat_ANHA12)
        ANHA12_vosaline[0,depth,:,:] = interp_np(lon_ANHA4.flatten(), lat_ANHA4.flatten(), varS[0,depth,:,:].flatten(), lon_ANHA12, lat_ANHA12)
    
    ANHA12_somxl010 = np.empty((1,2400,1632))    
    ANHA12_somxl010[0,:,:] = interp_np(lon_ANHA4.flatten(), lat_ANHA4.flatten(), varx[0,:,:].flatten(), lon_ANHA12, lat_ANHA12)
    
    return

In [6]:
# Define joblib solver such that it passes a file to the main calculation and returns what you want
def joblib_solver(main_calc, file):
    calc = main_calc(file)
    return calc

In [7]:
gridT_files = ['ANHA4-EXH005_y2002m01d05_gridT.nc']

In [8]:
# Add items to the list of jobs that need to be calculated. Each job reads in a file and performs a calculation on it.
joblist_nc=[]
joblist_xr=[]
joblist_nowrite=[]

for file in gridT_files:
    positional_args_nc = [interp_gridT_nc, file]
    positional_args_xr = [interp_gridT_xr, file]
    positional_args_nowrite = [interp_gridT_nowrite, file]
    
    keyword_args={}
    
    joblist_nc.append((joblib_solver,positional_args_nc,keyword_args))
    joblist_xr.append((joblib_solver,positional_args_xr,keyword_args))
    joblist_nowrite.append((joblib_solver,positional_args_nowrite,keyword_args))

Check that the interpolation gives reasonable results:

In [2]:
start = time.time()

ncores=1
with Parallel(n_jobs=ncores,backend='threading') as parallel:
    parallel(joblist_nc)
    
end = time.time()
print('GridT file calculation took: {:.4f} mins with 1 core for 1 file using netCDF4 to read, and xarray to write to file.'.format((end-start)/60))

GridT file calculation took: 0.0000 mins with 1 core for 1 file using netCDF4 to read, and xarray to write to file.


In [16]:
start = time.time()

ncores=1
with Parallel(n_jobs=ncores,backend='threading') as parallel:
    parallel(joblist_xr)
    
end = time.time()

print('GridT file calculation took: {:.4f} mins with 1 core for 1 file using xarray and writing to file.'.format((end-start)/60))

GridT file calculation took:  10.53926839431127 mins, with 1 core for 1 file using xarray and writing to file.


In [9]:
start = time.time()
 
ncores=1
with Parallel(n_jobs=ncores,backend='threading') as parallel:
    parallel(joblist_nowrite)
    
end = time.time()
print('GridT file calculation took: {:.4f} mins with 1 core for 1 file using xarray and not writing to file.'.format((end-start)/60))

GridT file calculation took:  10.022475628058116 mins, with 1 core for 1 file using xarray and not writing to file.
