# Compute surface pCO2 for GLORYS12V1 daily data using Model 3
Created by Ivan Lima on Tue May 10 2022 13:24:17 -0400

In [1]:
%matplotlib inline
import xarray as xr
import pandas as pd
import numpy as np
import seaborn as sns
import matplotlib.pyplot as plt
import os, datetime
from tqdm import notebook
print('Last updated on {}'.format(datetime.datetime.now().ctime()))

Last updated on Tue Jul  5 13:01:57 2022


## Load neural network model and data scaler

In [2]:
import torch, joblib
import torch.nn as nn

scaler = joblib.load('models/scaler_nosat.joblib')

features = ['depth', 'bottom_depth', 'Temperature', 'Salinity', 'pCO2_monthave']

n_features = len(features) # number of input variables
n_targets = 2  # number of output variables
n_hidden = 256 # number of hidden layers
learning_rate = 0.001

class MLPReg(nn.Module):
    def __init__(self, n_features, n_hidden, n_targets):
        super(MLPReg, self).__init__()
        self.l1    = nn.Linear(n_features, n_hidden)
        self.l2    = nn.Linear(n_hidden, n_hidden)
        self.l3    = nn.Linear(n_hidden, n_targets)
        self.activ = nn.LeakyReLU()
    
    def forward(self, x):
        out = self.l1(x)
        out = self.activ(out)
        out = self.l2(out)
        out = self.activ(out)
        out = self.l3(out)
        return out

nn_reg = MLPReg(n_features=n_features, n_hidden=n_hidden, n_targets=n_targets) # create model instance
loss_func = nn.MSELoss()                                                       # loss function (mean square error)
optimizer = torch.optim.Adam(nn_reg.parameters(), lr=learning_rate)            # optimizer

nn_reg.load_state_dict(torch.load('models/nn_reg_nosat_state.pth'))
nn_reg.eval()

https://scikit-learn.org/stable/model_persistence.html#security-maintainability-limitations


MLPReg(
  (l1): Linear(in_features=5, out_features=256, bias=True)
  (l2): Linear(in_features=256, out_features=256, bias=True)
  (l3): Linear(in_features=256, out_features=2, bias=True)
  (activ): LeakyReLU(negative_slope=0.01)
)

## Extract bottom depth at grid points

In [3]:
from scipy.interpolate import griddata

ds_grid = xr.open_dataset('/bali/data/ilima/GLORYS12V1/daily/GLORYS12V1_NW_Atlantic_1993_daily.nc',
                          drop_variables = ['mlotst','zos','bottomT'])

xx, yy = np.meshgrid(ds_grid.longitude.values, ds_grid.latitude.values)
df_positions = pd.DataFrame({'longitude': xx.ravel(), 'latitude': yy.ravel()})

# read bottom topography data
etopo = xr.open_dataset('data/etopo5.nc', chunks='auto')
# etopo['bath'] = etopo.bath.where(etopo.bath<0) # ocean points only
etopo = etopo.isel(X=slice(3100,4000), Y=slice(1300,1700)) # subset data to make things faster

X = np.where(etopo.X>180, etopo.X-360, etopo.X) # 0:360 -> -180:180
lon_topo, lat_topo = np.meshgrid(X, etopo.Y.values)
lon, lat = df_positions.longitude.values, df_positions.latitude.values
bottom_depth = griddata((lon_topo.ravel(), lat_topo.ravel()), etopo.bath.values.ravel(), (lon, lat), method='linear')
df_positions['bottom_depth'] = np.abs(bottom_depth)
print(df_positions.bottom_depth.min(), df_positions.bottom_depth.max())
df_positions.head()

0.0 5313.0


Unnamed: 0,longitude,latitude,bottom_depth
0,-78.0,34.0,4.0
1,-77.916664,34.0,10.997803
2,-77.833336,34.0,3.000732
3,-77.75,34.0,11.0
4,-77.666664,34.0,18.000275


## Read monthly atmospheric pCO2 data

In [4]:
df_pco2_monthly = pd.read_csv('work/co2_mm_mlo.csv')
df_pco2_monthly = df_pco2_monthly.set_index(['year','month'])

## Apply NN model to GLORYS12V1 data and compute surface pCO2 

In [5]:
import PyCO2SYS as pyco2
import gsw

var_attrs = {
    'SST': {'long_name': 'Sea surface temperature',
            'standard_name': 'sea_water_potential_temperature',
            'units': 'degrees C',
            'unit_long': 'Degrees Celsius'},
    'SSS': {'long_name': 'Sea surface salinity',
            'standard_name': 'sea_water_salinity',
            'units': '1e-3',
            'unit_long': 'Practical Salinity Unit'},
    'DIC': {'long_name': 'Dissolved inorganic carbon',
            'standard_name': 'DIC',
            'units': 'micro mol/kg',
            'unit_long': 'micro mol/kg'},
    'TA': {'long_name': 'Total alkalinity',
            'standard_name': 'TA',
            'units': 'micro mol/kg',
            'unit_long': 'micro mol/kg'},    
    'pCO2': {'long_name': 'Seawater partial pressure of CO2',
            'standard_name': 'pCO2',
            'units': 'micro atm',
            'unit_long': 'micro atm'},
    'fCO2': {'long_name': 'Seawater fugacity of CO2',
            'standard_name': 'fCO2',
            'units': 'micro atm',
            'unit_long': 'micro atm'},
}

cols = ['time', 'longitude', 'latitude', 'depth', 'bottom_depth', 'Temperature', 'Salinity', 'pCO2_monthave']
outdir = '/bali/data/ilima/GLORYS12V1/daily/BGC/Model_3/surface'

for year in notebook.tqdm(range(1993,2020)):
    with xr.open_dataset('/bali/data/ilima/GLORYS12V1/daily/GLORYS12V1_NW_Atlantic_{}_daily.nc'.format(year),
                            drop_variables = ['mlotst','zos','bottomT']) as ds_in:
        ds = ds_in.isel(depth=0).squeeze(drop=True) # surface fields
        # add monthly atmospheric pCO2
        for i in df_pco2_monthly.loc[year].index:
            if i==1:
                fill = np.nan
            else:
                fill = ds.pCO2_monthave

            ds['pCO2_monthave'] = xr.where(ds.time.dt.month==i, df_pco2_monthly.loc[(year,i),'average'], fill)

        # merge bottom depth with GLORYS12V1 data
        df_glorys = ds[['pCO2_monthave','thetao','so']].to_dataframe()
        df_glorys = df_glorys.reset_index().rename(columns={'thetao':'Temperature', 'so':'Salinity'})
        df_glorys = pd.merge(df_positions, df_glorys, on=['longitude', 'latitude'])
        df_data = df_glorys[cols].dropna()
        # print('{:,d} data points\n'.format(df_data.shape[0]))

        X_numpy = df_data[features].values                      # select features
        X_numpy_scaled = scaler.transform(X_numpy)              # rescale features
        X = torch.from_numpy(X_numpy_scaled.astype(np.float32)) # convert array to tensor

        # apply model to rescaled features
        with torch.no_grad():
            Y_pred = nn_reg(X)

        # add estimated DIC & TA to features dataframe
        df_data['DIC'] = Y_pred[:,0]
        df_data['TA'] = Y_pred[:,1]

        # compute additional carbonate chemistry variables
        pressure =  gsw.p_from_z(-df_data.depth.values, df_data.latitude.values)
        kwargs = dict(
            par1 = df_data.TA.values,   # TA
            par2 = df_data.DIC.values,  # DIC
            par1_type = 1,              # type 1 = alkalinity
            par2_type = 2,              # type 2 = DIC
            salinity = df_data.Salinity.values,
            temperature = df_data.Temperature.values,
            pressure = pressure,
            opt_k_carbonic = 10,  # LDK00, Lueker et al 2000
            opt_k_bisulfate = 1,  # D90a, Dickson 1990
            opt_total_borate = 2, # LKB10, Lee et al 2010
            opt_k_fluoride = 2    # PF87, Perez & Fraga 1987
        )
        results = pyco2.sys(**kwargs)
        co2sys_vars = ['pCO2', 'fCO2']
        for vname in co2sys_vars:
            df_data[vname] = results[vname]

        # merge estimated carbonate chemistry variables to original dataframe
        for vname in ['DIC','TA'] + co2sys_vars:
            df_glorys[vname] = df_data[vname]

        # convert dataframe to xarray dataset
        df = df_glorys.set_index(['time','latitude','longitude'])
        df = df.rename(columns={'Temperature':'SST', 'Salinity':'SSS'})
        ds_out = df[['SST', 'SSS', 'DIC', 'TA', 'pCO2', 'fCO2']].to_xarray()

        # set variable attributes
        for vname in var_attrs:
            ds_out[vname].attrs.update(var_attrs[vname])

        # copy variable attributes
        for vname in ['latitude','longitude']:
            for attr in ['long_name','standard_name','units','unit_long']:
                ds_out[vname].attrs[attr] = ds[vname].attrs[attr]

        # save dataset to netCDF file
        outfile = os.path.join(outdir, 'GLORYS12V1_NW_Atlantic_{}_surf_pCO2.nc'.format(year))
        print('writing {}'.format(outfile))
        ds_out.to_netcdf(outfile, mode='w', unlimited_dims=['time'])

  0%|          | 0/27 [00:00<?, ?it/s]

writing /bali/data/ilima/GLORYS12V1/daily/BGC/surface/GLORYS12V1_NW_Atlantic_1993_surf_pCO2.nc
writing /bali/data/ilima/GLORYS12V1/daily/BGC/surface/GLORYS12V1_NW_Atlantic_1994_surf_pCO2.nc
writing /bali/data/ilima/GLORYS12V1/daily/BGC/surface/GLORYS12V1_NW_Atlantic_1995_surf_pCO2.nc
writing /bali/data/ilima/GLORYS12V1/daily/BGC/surface/GLORYS12V1_NW_Atlantic_1996_surf_pCO2.nc
writing /bali/data/ilima/GLORYS12V1/daily/BGC/surface/GLORYS12V1_NW_Atlantic_1997_surf_pCO2.nc
writing /bali/data/ilima/GLORYS12V1/daily/BGC/surface/GLORYS12V1_NW_Atlantic_1998_surf_pCO2.nc
writing /bali/data/ilima/GLORYS12V1/daily/BGC/surface/GLORYS12V1_NW_Atlantic_1999_surf_pCO2.nc
writing /bali/data/ilima/GLORYS12V1/daily/BGC/surface/GLORYS12V1_NW_Atlantic_2000_surf_pCO2.nc
writing /bali/data/ilima/GLORYS12V1/daily/BGC/surface/GLORYS12V1_NW_Atlantic_2001_surf_pCO2.nc
writing /bali/data/ilima/GLORYS12V1/daily/BGC/surface/GLORYS12V1_NW_Atlantic_2002_surf_pCO2.nc
writing /bali/data/ilima/GLORYS12V1/daily/BGC/surf