# __Application of a PCM to the argo reference data base__

The aim of this notebook is to apply a Profile Clasification Model (PCM) to the reference profiles chosen by OWC software for correcting an specific profile. The profile WMO is chosen below and the reference profiles comes from the argo reference database. All paths are read from the ow_config.txt file. The notebook produce a .txt including class labels than can be read for the modified version of OWC included in thsi repository. 
The PCM allows to classify oceanographic profiles depending on its shape (look at BlueCloud notebooks). By using the output of this notebooks in the OWC software, only reference profiles in the same class than de float profiles are used to evaluated it, so profiles in the same dynamic region.
Authors
Logos

In [None]:
import xarray as xr
import numpy as np
import numpy.ma as ma
#import pandas as pd

import pyxpcm
from pyxpcm.models import pcm

import Plotter
from Plotter import Plotter

#from BIC_calculation import *

from classif_functions import *
 
import matplotlib as mpl
import matplotlib.pyplot as plt
import matplotlib.ticker as mticker

import cartopy.feature as cfeature
import cartopy.crs as ccrs
from cartopy.mpl.gridliner import LONGITUDE_FORMATTER, LATITUDE_FORMATTER

#import sys
#np.set_printoptions(threshold=sys.maxsize)

import configparser

### __User inputs__

__1. Configuration file__

Paths are read from the OWC configuration file. You should give the path to this file.

In [None]:
config_filename = '/home1/homedir5/perso/agarciaj/EARISE/DMQC-PCM/OWC-pcm/matlabow/ow_config.txt'

path to WMO_boxes_latlon.txt

In [None]:
WMOboxes_latlon='WMO_boxes_latlon.txt'

__2. Interpolation depth__

The model can not cope with NaNs should data should be interpolated to the same levels and profiles that are too short will be deleted. Please, choose the max_depth below. We recomend a value around 1000 m, to take into account an important part of the water column in the classification.

In [None]:
# depth for interpolation
max_depth = 1000

__3. Season__

Maybe usefull for decorrelation of the training dataset

In [None]:
# chose season ('DJF', 'MAM', 'JJA', 'SON' or 'all') for training dataset
season = ['all']

__4. Float WMO__

Float you want to correct

In [None]:
# Galicia
#float_mat_path = config['float_source_directory'] + '/test4/6900230.mat'
#float_WMO = 3901915
# agulhas current
#float_mat_path = config['float_source_directory'] + '/test3/3901915.mat'
#float_WMO = 3901915
# southern ocean
#float_mat_path = config['float_source_directory'] + '/test2/3901928.mat'
#float_WMO = 3901928
# north atlantic 
#float_mat_path = '/home1/homedir5/perso/agarciaj/EARISE/DMQC-PCM/OWC-pcm/matlabow/data/float_source/test1/4900136.mat'
float_WMO = 4900136

__5. Model parameters__

Please choose the number of classes. You can have un idea of the best number of classes in your dataset by using the BIC_calculation notebook

In [None]:
K=4 # number of classes

## 1. Load argo reference database

Important paths are readed form the config file and data is loaded from the argo reference database

Read OWC configuration file

In [None]:
with open(config_filename) as f:
    file_content = '[configuration]\n' + f.read()

config_parser = configparser.RawConfigParser(comment_prefixes='%')
config_parser.read_string(file_content)
config = config_parser['configuration']

In [None]:
#wmo_boxes= config['config_directory'] + config['config_wmo_boxes']
wmo_boxes='wmo_boxes_argo.mat'
#ref_path = '/home1/homedir5/perso/agarciaj/EARISE/OW/matlabow/data/climatology/'
ref_path = config['historical_directory']
float_mat_path = config['float_source_directory'] + '/test1/' + str(float_WMO)+ '.mat'

__Load argo reference database__

Talk about ellipses and PV option

In [None]:
ds = get_refdata(float_mat_path = float_mat_path, 
                 WMOboxes_latlon = WMOboxes_latlon, 
                 wmo_boxes = wmo_boxes, 
                 ref_path = ref_path,
                 config = config,
                 map_pv_use = 0)

In [None]:
print(ds)

## 2. Create training dataset

Use a regular sampling

In [None]:
ds_t = ds

In [None]:
%%time
corr_dist = 50 # in km
ds_t = get_regulargrid_dataset(ds, corr_dist, season=season)

In [None]:
print(ds_t)

__Interpolate to standard levels__

In [None]:
std_lev = np.arange(0,max_depth)
ds_t = interpolate_standard_levels(ds_t, std_lev)

In [None]:
# some format
#pres should be negative for the PCM
ds_t['PRES_INTERPOLATED'] = -np.abs(ds_t['PRES_INTERPOLATED'].values)
#axis attributtes for plotter class
ds_t.PRES_INTERPOLATED.attrs['axis'] = 'Z'
ds_t.lat.attrs['axis'] = 'Y'
ds_t.long.attrs['axis'] = 'X'
ds_t.dates.attrs['axis'] = 'T'

In [None]:
print(ds_t)

## 3. Create prediction dataset

Add float data to initial dataset

In [None]:
ds_p = add_floatdata(float_WMO, float_mat_path, ds)

In [None]:
print(ds_p)

__Interpolate to standard levels__

In [None]:
std_lev = np.arange(0,max_depth)
ds_p = interpolate_standard_levels(ds_p, std_lev)

In [None]:
# some format
#pres should be negative for the PCM
ds_p['PRES_INTERPOLATED'] = -np.abs(ds_p['PRES_INTERPOLATED'].values)
#axis attributtes for plotter class
ds_p.PRES_INTERPOLATED.attrs['axis'] = 'Z'
ds_p.lat.attrs['axis'] = 'Y'
ds_p.long.attrs['axis'] = 'X'
ds_p.dates.attrs['axis'] = 'T'

In [None]:
print(ds_p)

__Plot float trajectory and ref profiles__

In [None]:
selected_float_index = [i for i, isource in enumerate(ds_p['source'].values) if 'selected_float' in isource]

proj=ccrs.PlateCarree()
subplot_kw = {'projection': proj}
fig, ax = plt.subplots(nrows=1, ncols=1, figsize=(
            12, 12), dpi=120, facecolor='w', edgecolor='k', subplot_kw=subplot_kw)

p1 = ax.scatter(ds_p['long'], ds_p['lat'], s=3, transform=proj, label='Argo reference data')
p2 = ax.plot(ds_p['long'].isel(n_profiles = selected_float_index), ds_p['lat'].isel(n_profiles = selected_float_index), 
                 'ro', transform=proj, markersize = 3, label = str(float_WMO) + ' float trajectory')

land_feature = cfeature.NaturalEarthFeature(
            category='physical', name='land', scale='50m', facecolor=[0.9375, 0.9375, 0.859375])
ax.add_feature(land_feature, edgecolor='black')

defaults = {'linewidth': .5, 'color': 'gray', 'alpha': 0.5, 'linestyle': '--'}
gl = ax.gridlines(crs=ax.projection,draw_labels=True, **defaults)
gl.xlocator = mticker.FixedLocator(np.arange(-180, 180+1, 4))
gl.ylocator = mticker.FixedLocator(np.arange(-90, 90+1, 4))
gl.xformatter = LONGITUDE_FORMATTER
gl.yformatter = LATITUDE_FORMATTER
gl.xlabel_style = {'fontsize': 5}
gl.ylabel_style = {'fontsize': 5}
gl.xlabels_top = False
gl.ylabels_right = False
lon_180 = np.mod((ds_p['long']+180),360)-180
ax.set_xlim([lon_180.min()-1, lon_180.max()+1])
ax.set_ylim([ds_p['lat'].min()-1, ds_p['lat'].max()+1])


plt.legend(bbox_to_anchor=(1.02, 1), loc='upper left')

## 4. Apply PCM

Create model

In [None]:
z_dim = 'PRES_INTERPOLATED'
var_name_mdl = ['temp', 'sal']

# pcm feature
z = ds_t[z_dim]
pcm_features = {var_name_mdl[0]: z, var_name_mdl[1]: z}

m = pcm(K=K, features=pcm_features)
m

Fit model

In [None]:
var_name_ds = ['temp', 'sal']
# Variable to be fitted {variable name in model: variable name in dataset}
features_in_ds = {var_name_mdl[0] : var_name_ds[0], var_name_mdl[1] : var_name_ds[1]}

m.fit(ds_t, features=features_in_ds, dim=z_dim)
m

Predict labels

Add an artificial shift

In [None]:
selected_float_index = [i for i, isource in enumerate(ds_p['source'].values) if 'selected_float' in isource]
fig, axes = plt.subplots(ncols=1, figsize=(8,8))
#loop
shift_values = [0, 0.01, 0.02, 0.05, 0.1, 0.2, 0.5]
# new dataset

ds_p_shift = ds_p.copy()
for shift in shift_values:
    #add a shift
    ds_p_shift['sal'][selected_float_index,:] = ds_p['sal'][selected_float_index,:] + shift
    ds_p_shift['sal'][selected_float_index[2],:].plot(y= 'PRES_INTERPOLATED')
    #make prediction
    prediction = m.predict(ds_p, features=features_in_ds)
    #select float values
    prediction = prediction.isel(n_profiles = selected_float_index, drop=True)
    #save new PCM labels
    if shift == 0:
        labels_shift = prediction
    else:
        labels_shift = xr.concat([labels_shift, prediction], dim="shift")

labels_shift['shift'] = shift_values

In [None]:
labels_shift

In [None]:
profile_number = np.arange(1,len(ds_p['source'].isel(n_profiles = selected_float_index))+1)
mesh_x, mesh_y =np.meshgrid(profile_number,shift_values)

In [None]:
print(np.shape(labels_shift))

plot labels

In [None]:
fig, ax = plt.subplots(figsize=(30, 10))
#loop
#for nshift in range(len(shift_values)):
#    ax.plot(ds_p['source'].isel(n_profiles = selected_float_index), labels_shift[nshift, :], 
#            'o', label='class='+str(shift_values[nshift]))

p1 = ax.scatter(mesh_x, mesh_y, 
                s=5, c=labels_shift, cmap='Accent')


ax.set_ylabel('Artifitially added offset', fontsize=10)
ax.set_xlabel('float ' + str(float_WMO) + ' profile number', fontsize=10)
#ax.set_title('Reference profiles per float profile')
#ax.set_xticks(labels)
ax.set_xlim([0.5,len(selected_float_index)+0.5])
#ax.set_xticklabels(labels)
#ax.legend(bbox_to_anchor=(1.02, 1), loc='upper left')
#cbar = plt.colorbar(p1, shrink=0.3)
cbar = plt.colorbar(p1, shrink=0.3)
#cbar.set_ticks(np.arange(0.5, K+0.5))
#cbar.set_ticklabels(range(K))

In [None]:
#ds_p = ds_t
m.predict(ds_p, features=features_in_ds, inplace = True)
print(ds_p)

In [None]:
ds_p = order_class_names(ds_p, K=K)

In [None]:
m.predict_proba(ds_p, features=features_in_ds, dim=z_dim, inplace=True);

In [None]:
ds_p = ds_p.pyxpcm.quantile(m, q=[0.05, 0.5, 0.95], of=var_name_ds[0], outname=var_name_ds[0] + '_Q', keep_attrs=True, inplace=True)
ds_p = ds_p.pyxpcm.quantile(m, q=[0.05, 0.5, 0.95], of=var_name_ds[1], outname=var_name_ds[1] + '_Q', keep_attrs=True, inplace=True)

In [None]:
print(ds_p)

## 6. Plots

In [None]:
P = Plotter(ds_p, m, coords_dict= {'latitude': 'lat', 'longitude': 'long', 'time': 'dates'})
P.data_type = 'profiles'

vertical structure

In [None]:
P.vertical_structure(q_variable = var_name_ds[0] + '_Q', sharey=True, xlabel='Temperature (°C)', xlim=[0, 28])

In [None]:
P.vertical_structure(q_variable = var_name_ds[1] + '_Q', sharey=True, xlabel='Salinity (PSU)', xlim=[31.5,37])

In [None]:
P.vertical_structure_comp(q_variable = var_name_ds[0] + '_Q', plot_q= 'all', xlabel='Temperature (°C)')

In [None]:
P.vertical_structure_comp(q_variable = var_name_ds[1] + '_Q', plot_q= 'all', xlabel='Salinity (PSU)')

spatial distribution

In [None]:
P.spatial_distribution(lonlat_grid =[8,4])

plot float trajectory with classes

In [None]:
ds_p['PCM_LABELS'].isel(n_profiles = selected_float_index)
#ds_p['source'].isel(n_profiles = selected_float_index)

In [None]:
f= open("output_files/add_artificial_shift_4901218.txt","w+")
np.savetxt(f, ds_p['PCM_LABELS'].isel(n_profiles = selected_float_index).values, fmt=['%i'])
f.close() 

In [None]:
with open("output_files/add_artificial_shift_4901218.txt", "ab") as f:
    f.write(b"\n")
    np.savetxt(f, ds_p['PCM_LABELS'].isel(n_profiles = selected_float_index).values, fmt=['%i'])

In [None]:
len(ds_p['source'].isel(n_profiles = selected_float_index))

In [None]:
selected_float_index = [i for i, isource in enumerate(ds_p['source'].values) if 'selected_float' in isource]

kmap = m.plot.cmap(name='Accent')

proj=ccrs.PlateCarree()
subplot_kw = {'projection': proj}
fig, ax = plt.subplots(nrows=1, ncols=1, figsize=(
            12, 12), dpi=120, facecolor='w', edgecolor='k', subplot_kw=subplot_kw)

#p1 = ax.scatter(ds_p['long'], ds_p['lat'], s=3, c=ds_p['PCM_LABELS'], cmap=kmap, transform=proj, label='Argo reference data', alpha=0)

#p2 = ax.scatter(ds_p['long'].isel(n_profiles = selected_float_index), ds_p['lat'].isel(n_profiles = selected_float_index), 
#                s=4, c=ds_p['PCM_LABELS'].isel(n_profiles = selected_float_index), cmap=kmap, transform=proj)
p1 = ax.scatter(ds_p['long'], ds_p['lat'], s=3, c=ds_p['PCM_LABELS'], cmap=kmap, transform=proj, label='Argo reference data')

#p3 = ax.plot(ds_p['long'].isel(n_profiles = selected_float_index), ds_p['lat'].isel(n_profiles = selected_float_index), 
#                 'bo', transform=proj, markersize = 3, markerfacecolor="None", label = str(float_WMO) + ' float trajectory')

land_feature = cfeature.NaturalEarthFeature(
            category='physical', name='land', scale='50m', facecolor=[0.9375, 0.9375, 0.859375])
ax.add_feature(land_feature, edgecolor='black')

defaults = {'linewidth': .5, 'color': 'gray', 'alpha': 0.5, 'linestyle': '--'}
gl = ax.gridlines(crs=ax.projection,draw_labels=True, **defaults)
gl.xlocator = mticker.FixedLocator(np.arange(-180, 180+1, 4))
gl.ylocator = mticker.FixedLocator(np.arange(-90, 90+1, 4))
gl.xformatter = LONGITUDE_FORMATTER
gl.yformatter = LATITUDE_FORMATTER
gl.xlabel_style = {'fontsize': 5}
gl.ylabel_style = {'fontsize': 5}
gl.xlabels_top = False
gl.ylabels_right = False
lon_180 = np.mod((ds_p['long'].isel(n_profiles = selected_float_index)+180),360)-180
ax.set_xlim([lon_180.min()-1, lon_180.max()+1])
ax.set_ylim([ds_p['lat'].isel(n_profiles = selected_float_index).min()-1, ds_p['lat'].isel(n_profiles = selected_float_index).max()+1])

cbar = plt.colorbar(p1, shrink=0.3)
cbar.set_ticks(np.arange(0.5, K+0.5))
#cbar.set_ticklabels(range(K))

#plt.legend(bbox_to_anchor=(1.02, 1), loc='upper left')

In [None]:
range(K)

Classes pie chart

In [None]:
P.pie_classes()

Temporal representation

Bar plot by month

In [None]:
P.temporal_distribution(time_bins = 'month')

Bar plot by season

In [None]:
P.temporal_distribution(time_bins = 'season')

## 7. Output

Create a .txt file with classes

In [None]:
#create array with source, lat, long and PCM_labels
# change long values to 0-360 as used in OW
#long_360 = np.mod(ds_p['long'].values, 360)

matrix_txt = np.stack(('"'+ ds_p['source'].values + '"', ds_p['lat'].values, ds_p['long'].values, ds_p['PCM_LABELS'].values), axis=1)
#matrix_txt = np.stack((ds_p['source'].values, ds_p['lat'].values, long_360, ds_p['PCM_LABELS'].values), axis=1)
header = 'source lat long PCM_LABELS'
print([ds_p['long'].min(), ds_p['long'].max()])

f= open("output_files/PCM_classes_6900230_argo_k4.txt","w+")
np.savetxt(f, matrix_txt, fmt=['%s', '%.3f', '%.3f', '%i'], header=header)
f.close() 

## 8. Plot with AVISO

Load AVISO data

In [None]:
path_aviso = '/home1/homedir5/perso/agarciaj/EARISE/data/AVISO_gulfstream_2004.nc'
ds_aviso = xr.open_dataset(path_aviso)
ds_aviso

In [None]:
ds_aviso['longitude'] = np.mod((ds_aviso.longitude.values+180),360)-180
ds_aviso

In [None]:
time_step = '2004-09-03'

scatter plot using class

In [None]:
selected_float_index = [i for i, isource in enumerate(ds_out['source'].values) if isource == 'selected_float']

proj=ccrs.PlateCarree()
subplot_kw = {'projection': proj}
fig, ax = plt.subplots(nrows=1, ncols=1, figsize=(
            6, 6), dpi=120, facecolor='w', edgecolor='k', subplot_kw=subplot_kw)

sc = ax.pcolormesh(ds_aviso['longitude'], ds_aviso['latitude'], ds_aviso['adt'].sel(time = time_step), cmap='bwr', transform=proj)
p1 = ax.scatter(ds_out['long'].isel(n_profiles = selected_float_index), ds_out['lat'].isel(n_profiles = selected_float_index), 
                 s=8, c=ds_out['PCM_LABELS'].isel(n_profiles = selected_float_index), transform=proj, cmap='Accent')
#p2 = ax.scatter(ds_out['long'], ds_out['lat'], s=3, transform=proj, label='Argo reference data')
#p3 = ax.plot(ds_out['long'].isel(n_profiles = selected_float_index), ds_out['lat'].isel(n_profiles = selected_float_index), 
#                 'ro-', transform=proj, markersize = 3, label = str(float_WMO) + ' float trajectory')

land_feature = cfeature.NaturalEarthFeature(
            category='physical', name='land', scale='50m', facecolor=[0.9375, 0.9375, 0.859375])
ax.add_feature(land_feature, edgecolor='black')

defaults = {'linewidth': .5, 'color': 'gray', 'alpha': 0.5, 'linestyle': '--'}
gl = ax.gridlines(crs=ax.projection,draw_labels=True, **defaults)
gl.xlocator = mticker.FixedLocator(np.arange(-180, 180+1, 4))
gl.ylocator = mticker.FixedLocator(np.arange(-90, 90+1, 4))
gl.xformatter = LONGITUDE_FORMATTER
gl.yformatter = LATITUDE_FORMATTER
gl.xlabel_style = {'fontsize': 5}
gl.ylabel_style = {'fontsize': 5}
gl.xlabels_top = False
gl.ylabels_right = False

#plt.legend(bbox_to_anchor=(1.02, 1), loc='upper left')
ax.set_xlim([-64,-48])
legend1 = ax.legend(*p1.legend_elements(),
                    loc="upper right", title="Classes")
ax.add_artist(legend1)