In [1]:
#Region selection for CORA RAW - July 2021
import datetime as dt
import glob
import netCDF4 as nc
import numpy as np
import pandas as pd
import os
import matplotlib.pyplot as plt
import numpy as np
import numpy.ma as ma
import xarray as xr
import cmocean
from gsw import sigma0
import cartopy
import cartopy.crs as ccrs
import cartopy.feature as cfeature
from cartopy.mpl.ticker import LongitudeFormatter, LatitudeFormatter
from cartopy.mpl.gridliner import LONGITUDE_FORMATTER, LATITUDE_FORMATTER
cartopy.config["data_dir"] = './cartopy_shapefiles'

os.getcwd()

'/home1/datahome/epauthen/NaN_profiles_gestion'

# Preselection of the Raw CORA profiles (INSITU_GLO_TS_REP_OBSERVATIONS_013_001_b)
- CORA raw files without moorings MO.nc and drifting buoys DB.nc and TE.nc
- select  profiles that contain PRES, TEMP and PSAL 
- adjusted variable if it exists
- QC = 1 only for the three variables combined TEMP, PSAL and PRES
- Gulf Stream region
- Save the selected profiles in daily xarray dataset and netcdf (TEMP, PSAL, PRES, LON, LAT, JULD (ref to 1950-01-01), DC_REFERENCE, PLATFORM_NUMBER).

--> Location of output : /home/datawork-lops-bluecloud/osnet/cora_raw_gulfstream/raw/

In [2]:
%%time
cora_rep = '/home/datawork-coriolis-cora-s/INSITU/global/'
for yy in range(2011,2021):
    listfile = set(glob.glob(cora_rep + str(yy) + '/CO_*.nc')) - set(glob.glob(cora_rep + str(yy) + '/*MO.nc')) - set(glob.glob(cora_rep + str(yy) + '/*TE.nc')) - set(glob.glob(cora_rep + str(yy) + '/*DB.nc'))
    count = list()
    for myfile in listfile:
        filename = os.path.basename(myfile)
        ds = nc.Dataset(myfile, 'r')  # r+
        dm = ds.variables['DATA_MODE'][:]
        dcref = ds.variables['DC_REFERENCE'][:,:]
        lon   = ds.variables['LONGITUDE'][:]
        lat   = ds.variables['LATITUDE'][:]
        juld  = ds.variables['JULD'][:]
        platform_number = ds.variables['PLATFORM_NUMBER'][:]
        ind     = list()
        mydcref = list()
        mypfnum = list()
        for ii in range(0,len(dm)):
            suffix  =''
            #REGION
            if lon[ii]>=-85 and lon[ii]<=-20 and lat[ii]>=20 and lat[ii]<=60:
                #ADJUSTED IF IT EXIST
                if dm[ii] == b'A' or dm[ii] == b'D':
                    suffix = '_ADJUSTED'
                #TEMP AND PSAL AND PRES
                if 'TEMP' in ds.variables.keys() and 'PSAL' in ds.variables.keys() and 'PRES' in ds.variables.keys():
                    ind.append(ii)
                    dc = b''.join(dcref[ii,dcref[ii,:].mask== False])
                    dc = dc.decode('utf-8')
                    mydcref.append(dc)
                    pf = b''.join(platform_number[ii,platform_number[ii,:].mask== False])
                    mypfnum.append(pf)
        if len(ind)>0:
            count.append(len(ind))
            #RETRIEVE QC FOR ind AND CREATE MASK FROM IT
            mask_TEMP = np.isin(ds.variables['TEMP' + suffix  + '_QC'][ind,:].data, b'1')
            mask_PSAL = np.isin(ds.variables['PSAL' + suffix  + '_QC'][ind,:].data, b'1')
            mask_PRES = np.isin(ds.variables['PRES' + suffix  + '_QC'][ind,:].data, b'1')
            #MAKE A SINGLE MASK FROM THE COMBINED THREE OTHERS
            mask = mask_TEMP & mask_PSAL & mask_PRES
            #EXPORT IN DATASET XARRAY
            dsxr = xr.Dataset(
            data_vars=dict(
                TEMP = (["N_PROF","N_PRES"], ma.masked_array(ds.variables['TEMP' + suffix ][ind,:], mask=~mask)),
                PSAL = (["N_PROF","N_PRES"], ma.masked_array(ds.variables['PSAL' + suffix ][ind,:], mask=~mask)),
                PRES = (["N_PROF","N_PRES"], ma.masked_array(ds.variables['PRES' + suffix ][ind,:], mask=~mask)),
                DC_REFERENCE    = (["N_PROF"], mydcref),
                PLATFORM_NUMBER = (["N_PROF"], mypfnum)
            ),
            coords=dict(
                LON  = (["N_PROF"], lon[ind]),
                LAT  = (["N_PROF"], lat[ind]),
                JULD = (["N_PROF"],juld[ind])
            ),
            attrs=dict(description="CORA subsampled : Selection of profiles with (1) TEMP, PSAL and PRES present, (2) adjusted data if adjusted exist and (3) in the Gulf Stream region (lon in [-80,-30], lat in [23,50]"),
            )
            dsxr.JULD.attrs["units"] = "days since 1950-01-01"
            dsxr['DC_REFERENCE'] = dsxr['DC_REFERENCE'].astype('|S8')
            dsxr.to_netcdf("/home/datawork-lops-bluecloud/osnet/data_cora_raw/raw_all_depth/" + myfile[50:76] + "_GulfStream.nc")
        ds.close()
    print('Extracted a total of ' + str(sum(count)) + ' TS profiles in the Gulf Stream Region, for the year ' + str(yy) + '.')

Extracted a total of 17215 TS profiles in the Gulf Stream Region, for the year 2011.
Extracted a total of 22690 TS profiles in the Gulf Stream Region, for the year 2012.
Extracted a total of 18836 TS profiles in the Gulf Stream Region, for the year 2013.
Extracted a total of 14220 TS profiles in the Gulf Stream Region, for the year 2014.
Extracted a total of 14072 TS profiles in the Gulf Stream Region, for the year 2015.
Extracted a total of 17170 TS profiles in the Gulf Stream Region, for the year 2016.
Extracted a total of 14194 TS profiles in the Gulf Stream Region, for the year 2017.
Extracted a total of 16200 TS profiles in the Gulf Stream Region, for the year 2018.
Extracted a total of 14690 TS profiles in the Gulf Stream Region, for the year 2019.
Extracted a total of 14424 TS profiles in the Gulf Stream Region, for the year 2020.
CPU times: user 9min 18s, sys: 1min 20s, total: 10min 38s
Wall time: 33min 42s


# Plot the TS profiles + TS diagram and Maps (to add in the loop if needed)
- Reopen the netcdf to convert the date with xarray

In [None]:
        dsxr = xr.open_mfdataset("/Users/epauthenet/Documents/Database/CORA_GS/2019/" + myfile[47:73] + "_GulfStream.nc")
        plt.ioff()
        ts = pd.to_datetime(dsxr.JULD.values[1]) 
        ts = ts.strftime('%Y.%m.%d')
        fig, ax = plt.subplots(nrows=1, ncols=3, figsize=(10, 5), dpi=120, facecolor='w', edgecolor='k')
        ax[0].scatter(dsxr.TEMP,-dsxr.PRES,s = 1)
        ax[0].set_title(ts)
        ax[0].grid()
        ax[0].set_xlabel('Temperature')
        ax[1].scatter(dsxr.PSAL,-dsxr.PRES,s = 1)
        ax[1].grid()
        ax[1].set_xlabel('Salinity')
        ax[2].scatter(dsxr.PSAL,dsxr.TEMP,s = 1)
        ax[2].grid()
        ax[2].set_xlabel('Salinity')
        ax[2].set_ylabel('Temperature')
        plt.savefig("/Users/epauthenet/Documents/Database/CORA_GS/Figure/2019/" + myfile[47:73] + "_TS.jpg")

        ax = plt.axes(projection=ccrs.PlateCarree())
        ax.set_extent([-80, -20, 23, 55], ccrs.PlateCarree())
        ax.scatter(dsxr.LON,dsxr.LAT,s = 1)
        ax.add_feature(cfeature.LAND)
        ax.add_feature(cfeature.COASTLINE)
        ax.set_title(ts)
        gl = ax.gridlines(draw_labels=True, dms=True, x_inline=False, y_inline=False,linewidth=.5, color='gray', alpha=0.5, linestyle='--')
        gl.top_labels = False
        gl.right_labels = False
        plt.savefig("/Users/epauthenet/Documents/Database/CORA_GS/Figure/2019/" + myfile[47:73] + "_map.jpg")
        dsxr.close()