In [1]:
import os
import sys
if not sys.warnoptions:
    import warnings
    warnings.simplefilter('ignore')

import logging
import configparser

from numpy.core._exceptions import _ArrayMemoryError
from argopy import DataFetcher as ArgoDataFetcher
from argopy.errors import DataNotFound

from PCM_utils_forDMQC.classification import loadReferenceData, applyBIC, plot_BIC, \
    applyPCM, setupLogger

ImportError: cannot import name 'Plotter_cn' from 'PCM_utils_forDMQC.Plotter_cn' (L:\users\argo\dm_qc\SO_assesment\DMQC-PCM-main\PCM-design\PCM_utils_forDMQC\Plotter_cn.py)

In [None]:
GDAC_MIRROR = 'L:/scratch/argo/gdac_mirror'
MATLAB_DIRECTORY = 'L:/users/argo/dm_qc/SO_assesment/DMQC-PCM-main/OWC-pcm/matlabow/'
FLOAT_SOURCE_RAW = MATLAB_DIRECTORY + '/data/float_source/default/'
FLOAT_SOURCE_ADJUSTED = MATLAB_DIRECTORY + '/data/float_source/adjusted/'
CONFIG_FILENAME = MATLAB_DIRECTORY + 'ow_config_jn.txt'

# PCM can not deal with NaN values, so the reference data set is interpolated
# on standard depth levels and  profiles shallower than the max_depth excluded
MAX_DEPTH = 1000
CORR_DISTANCE = 50
NUMBER_RUNS = 10  # number of runs for each k
NK = 15  # max number of classes to explore

"""float_list = [3900601, 3900602, 3900603, 3900604, 3900605, 3900797,
              3900798, 3900799, 3900800, 3900801, 3900802,
              1901494, 1901495, 1901496, 1901497] ## Argentina"""

float_list = [5900454]

with open(CONFIG_FILENAME) as f:
    file_content = '[configuration]\n' + f.read()

config_parser = configparser.RawConfigParser(comment_prefixes='%')
config_parser.read_string(file_content)
config = config_parser['configuration']

print('ready to process')

In [None]:
# process each float in the list of WMO numbers in turn
for float_WMO in float_list:

    print(f'processing WMO number: {float_WMO}')
    print(0)

    log_file_path = f'log/{float_WMO}_runtime_log.txt'
    setupLogger(logger_name='runtime_logger', log_file=log_file_path, level=logging.INFO)
    runtime_logger = logging.getLogger('runtime_logger')

    # Generating Wong matrix for raw data
    wong_matrix_path = os.path.join(FLOAT_SOURCE_RAW, f'{float_WMO}.mat')
    if not os.path.exists(wong_matrix_path):
        print(1)

        try:
            print(2)
            ds = ArgoDataFetcher(src='localftp',
                                 local_ftp=GDAC_MIRROR,
                                 cache=True,
                                 mode='expert').float(float_WMO).load().data

            ds.argo.create_float_source(FLOAT_SOURCE_RAW, force='raw')
            ds.argo.create_float_source(FLOAT_SOURCE_ADJUSTED, force='adjusted')

        except DataNotFound:
            print(3)
            runtime_logger.info('DataNotFound error: due to either PSAL_QC, TEMP_QC or PRES_QC flags')
            continue
        except ValueError:
            print(4)
            runtime_logger.info('ValueError: check whether WMO is valid')
            continue
        
        print(5)
        runtime_logger.info('Wong matrix created')
        
    else:
        print(6)
        runtime_logger.info('Wong matrix already exists')

    # define required directories
    pcm_output_directory = config['PCM_DIRECTORY'] + 'output_files/'
    plots_directory = config['PCM_DIRECTORY'] + 'figures/'

    # if the PCM output file already exists skip this float
    pcm_file_root = f'PCM_classes_{float_WMO}'
    PCM_file_name = [file_name for file_name in os.listdir(pcm_output_directory) \
            if file_name[:len((pcm_file_root))] == pcm_file_root]
    if PCM_file_name:
        runtime_logger.info('PCM has already been run')
        continue

    print(7)
    runtime_logger.info('applying BIC')
    float_mat_path = config['float_source_directory'] + str(float_WMO) + '.mat'

    # Starting the PCM analysis
    ds = loadReferenceData(float_mat_path=float_mat_path,
                           config=config)

    # apply BIC function to determine most suitable number of classes
    BIC, number_classes = applyBIC(ds=ds,
                              Nrun=NUMBER_RUNS,
                              NK=NK,
                              corr_dist=CORR_DISTANCE,
                              max_depth=MAX_DEPTH)
    runtime_logger.info(f'classes: {number_classes}')

    # generate the BIC plot
    plot_BIC(BIC=BIC,
             NK=NK,
             float_WMO=float_WMO,
             plots_directory=plots_directory)

    try:
        runtime_logger.info('applying PCM')
        pcm_file_path = pcm_output_directory + f'PCM_classes_{float_WMO}_K{number_classes}.txt'
        # run PCM function to calculate the classes and save OWC text file
        applyPCM(ds=ds,
                 float_WMO=float_WMO,
                 float_mat_path=float_mat_path,
                 pcm_file_path=pcm_file_path,
                 number_classes=number_classes,
                 corr_dist=CORR_DISTANCE,
                 max_depth=MAX_DEPTH,
                 plots_directory=plots_directory)

    except _ArrayMemoryError:
        runtime_logger.info('_ArrayMemoryError: Unable to allocate sufficient memory')
        continue
        
    runtime_logger.info('classes file saved')