# CTD Calibration 

This script and notebook gives the code for loading the CTD calibration coefficients into a properly named calibration csv, as well as an example of how to use it. The calibration coefficients can be parsed from either the vendor calibration (.cal) file, the vendor xmlcon file, or from the capture file (either .cap, .log, or .txt) from the QCT check-in.


In [67]:
#!/usr/bin/env python

import csv
import datetime
import os
import shutil
import sys
import time
import re
import xml.etree.ElementTree as et
import pandas as pd
from zipfile import ZipFile


class CTDCalibration():
    # Class that stores calibration values for CTDs.

    def __init__(self, uid):
        self.serial = ''
        self.uid = uid
        self.coefficients = {}
        self.date = {}
        self.type = ''
                    
        self.coefficient_name_map = {
            'TA0': 'CC_a0',
            'TA1': 'CC_a1',
            'TA2': 'CC_a2',
            'TA3': 'CC_a3',
            'CPCOR': 'CC_cpcor',
            'CTCOR': 'CC_ctcor',
            'CG': 'CC_g',
            'CH': 'CC_h',
            'CI': 'CC_i',
            'CJ': 'CC_j',
            'G': 'CC_g',
            'H': 'CC_h',
            'I': 'CC_i',
            'J': 'CC_j',
            'PA0': 'CC_pa0',
            'PA1': 'CC_pa1',
            'PA2': 'CC_pa2',
            'PTEMPA0': 'CC_ptempa0',
            'PTEMPA1': 'CC_ptempa1',
            'PTEMPA2': 'CC_ptempa2',
            'PTCA0': 'CC_ptca0',
            'PTCA1': 'CC_ptca1',
            'PTCA2': 'CC_ptca2',
            'PTCB0': 'CC_ptcb0',
            'PTCB1': 'CC_ptcb1',
            'PTCB2': 'CC_ptcb2',
            # additional types for series O
            'C1': 'CC_C1',
            'C2': 'CC_C2',
            'C3': 'CC_C3',
            'D1': 'CC_D1',
            'D2': 'CC_D2',
            'T1': 'CC_T1',
            'T2': 'CC_T2',
            'T3': 'CC_T3',
            'T4': 'CC_T4',
            'T5': 'CC_T5',
        }

        self.o2_coefficients_map = {
            'A': 'CC_residual_temperature_correction_factor_a',
            'B': 'CC_residual_temperature_correction_factor_b',
            'C': 'CC_residual_temperature_correction_factor_c',
            'E': 'CC_residual_temperature_correction_factor_e',
            'SOC': 'CC_oxygen_signal_slope',
            'OFFSET': 'CC_frequency_offset'
        }
                
    @property
    def uid(self):
        return self._uid
        
    @uid.setter
    def uid(self, d):
        r = re.compile('.{5}-.{6}-.{5}')
        if r.match(d) is not None:
            self.serial = d.split('-')[2]
            self._uid = d
        else:
            raise Exception(f"The instrument uid {d} is not a valid uid. Please check.")
            

        
    def read_cal(self, data):
        """
        Function which reads and parses the CTDBP calibration values stored
        in a .cal file.
        
        Args:
            filename - the name of the calibration (.cal) file to load. If the
                cal file is not located in the same directory as this script, the
                full filepath also needs to be specified.
        Returns:
            self.coefficients - populated coefficients dictionary
            self.date - the calibration dates associated with the calibration values
            self.type - the type (i.e. 16+/37-IM) of the CTD
            self.serial - populates the 5-digit serial number of the instrument
        """
                       
        for line in data.splitlines():
            key, value = line.replace(" ","").split('=')
            
            if key == 'INSTRUMENT_TYPE' and value == 'SEACATPLUS':
                self.type = '16'
            
            elif key == 'SERIALNO':
                if self.serial != value.zfill(5):
                    raise Exception(f'Serial number {value.zfill(5)} stored in cal file does not match {self.serial} from the UID.')
                     
            elif 'CALDATE' in key:
                self.date.update({key:datetime.datetime.strptime(value, '%d-%b-%y').strftime('%Y%m%d')})
            
            else:
                name = self.coefficient_name_map.get(key)
                if not name or name is None:
                    continue
                else:
                    self.coefficients.update({name:value})
                    
                    
    def load_cal(self, filepath):
        """
        Loads all of the calibration coefficients from the vendor cal files for
        a given CTD instrument class.
    
        Args:
            filepath - directory path to where the zipfiles are stored locally
        Raises:
            FileExistsError - Checks the given filepath that a .cal file exists           
        Returns:
            self.coefficients - populated coefficients dictionary
            self.date - the calibration dates associated with the calibration values
            self.type - the type (i.e. 16+/37-IM) of the CTD
            self.serial - populates the 5-digit serial number of the instrument
        """
        
        if filepath.endswith('.zip'):
            with ZipFile(filepath) as zfile:
                filename = [name for name in zfile.namelist() if '.cal' in name]
                if len(filename) > 0:
                    data = zfile.read(filename[0]).decode('ASCII')
                    self.read_cal(data)
                else:
                    FileExistsError(f"No .cal file found in {filepath}.")

        elif filepath.endswith('.cal'):
            with open(filepath) as filename:
                data = filename.read()
                self.read_cal(data)
            
        else:
            FileExistsError(f"No .cal file found in {filepath}.")      

                    
    def read_xml(self, data):
        """
        Function which reads and parses the CTDBP calibration values stored
        in the xmlcon file.
        
        Args:
            data - the data string to parse
        Returns:
            self.coefficients - populated coefficients dictionary
            self.date - the calibration dates associated with the calibration values
            self.type - the type (i.e. 16+/37-IM) of the CTD
            self.serial - populates the 5-digit serial number of the instrument
        """
        
        Tflag  = False
        Cflag  = False
        O2flag = False

        for child in data.iter():
            key = child.tag.upper()
            value = child.text.upper()
            
            if key == 'NAME':
                if '16PLUS' in value:
                    self.type = '16'
    
            # Check if we are processing the calibration values for the temperature sensor
            # If we already have parsed the Temp data, need to turn the flag off
            if key == 'TEMPERATURESENSOR':
                Tflag = True
            elif 'SENSOR' in key and Tflag == True:
                Tflag = False
            else:
                pass
    
            # Check on if we are now parsing the conductivity data
            if key == 'CONDUCTIVITYSENSOR':
                Cflag = True
            elif 'SENSOR' in key and Cflag == True:
                Cflag = False
            else:
                pass
    
            # Check if an oxygen sensor has been appended to the CTD configuration
            if key == 'OXYGENSENSOR':
                O2flag = True

            # Check that the serial number in the xmlcon file matches the serial
            # number from the UID
            if key == 'SERIALNUMBER':
                if self.serial != value.zfill(5):
                    raise Exception(f'Serial number {value.zfill(5)} stored in xmlcon file does not match {self.serial} from the UID.')
        
            # Parse the calibration dates of the different sensors
            if key == 'CALIBRATIONDATE':
                if Tflag == True:
                    self.date.update({'TCALDATE':datetime.datetime.strptime(value, '%d-%b-%y').strftime('%Y%m%d')})
                elif Cflag == True:
                    self.date.update({'CCALDATE':datetime.datetime.strptime(value, '%d-%b-%y').strftime('%Y%m%d')})
                else:
                    self.date.update({'PCALDATE':datetime.datetime.strptime(value, '%d-%b-%y').strftime('%Y%m%d')})
            
            # Now, we get to parse the actual calibration values, but it is necessary to make sure the
            # key names are correct
            if Tflag == True:
                key = 'T'+key
            
            name = self.coefficient_name_map.get(key)
            if not name or name is None:
                if O2flag == True:
                    name = self.o2_coefficients_map.get(key)
                    self.coefficients.update({name:value})
                else:
                    pass
            else:
                self.coefficients.update({name:value})
                
                
    def load_xml(self, filepath):
        """
        Loads all of the calibration coefficients from the vendor xmlcon files for
        a given CTD instrument class.
    
        Args:
            filepath - the name of the xmlcon file to load and parse. If the
                xmlcon file is not located in the same directory as this script,
                the full filepath also needs to be specified. May point to a zipfile.
        Raises:
            FileExistsError - Checks the given filepath that an xmlcon file exists           
        Returns:
            self.coefficients - populated coefficients dictionary
            self.date - the calibration dates associated with the calibration values
            self.type - the type (i.e. 16+/37-IM) of the CTD
            self.serial - populates the 5-digit serial number of the instrument
        """
        
        if filepath.endswith('.zip'):
            with ZipFile(filepath) as zfile:
                filename = [name for name in zfile.namelist() if '.xmlcon' in name]
                if len(filename) > 0:
                    data = et.parse(zfile.open(filename[0]))
                    self.read_xml(data)
                else:
                    FileExistsError(f"No .cal file found in {filepath}.")
                    
        elif filepath.endswith('.xmlcon'):
            with open(filepath) as file:
                data = et.parse(file)
                self.read_xml(data)

        else:
            FileExistsError(f"No .cal file found in {filepath}.")
            
            
    def load_qct(self, filepath):
        """
        Function which parses the output from the QCT check-in and loads them into
        the CTD object.
        
        Args:
            filepath - the full directory path and filename 
        Raises:
            ValueError - checks if the serial number parsed from the UID matches the
                the serial number stored in the file.
        Returns:
            self.coefficients - populated coefficients dictionary
            self.date - the calibration dates associated with the calibration values
            self.type - the type (i.e. 16+/37-IM) of the CTD
            self.serial - populates the 5-digit serial number of the instrument 
        """
        
        with open(qct_path+qct_file[0]) as filename:
            data = filename.read()

        Tflag  = False
        O2flag = False
        Cflag  = False

        data.splitlines()
        for line in data.splitlines():
    
            if 'SBE 16Plus' in line:
                self.type = '16'
        
            elif 'SERIAL NO.' in line:
                items = line.split()
                ind = items.index('NO.')
                if self.serial != items[ind+1].strip().zfill(5):
                    raise ValueError(f'Serial number {value.zfill(5)} from the QCT file does not match {self.serial} from the UID.')
                else:
                    pass
        
        else:
            items = re.split(': | =',line)
            key = items[0].strip()
            value = items[-1].strip()
        
            if key == 'temperature':
                self.date.update({'TCALDATE':datetime.datetime.strptime(value, '%d-%b-%y').strftime('%Y%m%d')})    
        
            elif key == 'conductivity':
                self.date.update({'CCALDATE':datetime.datetime.strptime(value, '%d-%b-%y').strftime('%Y%m%d')})
            
            elif key == 'pressure S/N':
                self.date.update({'PCALDATE':datetime.datetime.strptime(value, '%d-%b-%y').strftime('%Y%m%d')})
            
            else:
                name = self.coefficient_name_map.get(key)
                if not name or name is None:
                    if O2flag == True:
                        name = self.o2_coefficients_map.get(key)
                        self.coefficients.update({name:value})
                    else:
                        pass
                else:
                    self.coefficients.update({name:value})
                
    
    def write_csv(self, outpath):
        """
        This function writes the correctly named csv file for the ctd to the
        specified directory.
        
        Args:
            outpath - directory path of where to write the csv file
        Raises:
            ValueError - raised if the CTD object's coefficient dictionary 
                has not been populated
        Returns:
            self.to_csv - a csv of the calibration coefficients which is 
                written to the specified directory from the outpath.
        """
        
        # Run a check that the coefficients have actually been loaded
        if len(self.coefficients) == 0:
            raise ValueError('No calibration coefficients have been loaded.')
            
        # Create a dataframe to write to the csv
        data = {'serial':[self.type + '-' + self.serial]*len(self.coefficients),
               'name':list(self.coefficients.keys()),
               'value':list(self.coefficients.values()),
               'notes':['']*len(self.coefficients) }
        df = pd.DataFrame().from_dict(data)
        
        # Generate the csv name
        cal_date = max(self.date.values())
        csv_name = self.uid + '__' + cal_date + '.csv'
        
        # Now write to 
        check = input(f"Write {csv_name} to {outpath}? [y/n]: ")
        if check.lower().strip() == 'y':
            df.to_csv(outpath+'/'+csv_name, index=False)
        

SyntaxError: invalid syntax (<ipython-input-67-51cdfd30e13e>, line 311)

# Usage

This script can be used to autopopulate the calibration csv from the vendor docs, either the calibration (.cal) file or the xml configuration (.xmlcon) file, or from the QCT check in. It can also read the relevant files directly from a zip directory. The script also generates the appropriate csv file name following the convention of {MIO}-{Instrument+class}-{serial number}**__**{calibration date}. It can then write the csv file to an user specified directory.


### Inputs
The following information is needed to run the script:
* **UID**: Create the calibration instance requires inputing the instrument UID. This UID is checked against the serial number info imported from the calibration docs.
* **filepath**: The full directory path which points to the directory where the relevant calibration files are stored. Include the full file name in the path. May point to a zip directory.
* **outpath**: The full directory path which points to the directory where to write the calibration csv script.

### How to use:
1. Create a calibration instance: CTD = CTDCalibration(uid=UID)
2. Load the calibration file: 
    * CTD.load_cal(path_to_cal_file) or 
    * CTD.load_xml(path_to_xmlcon_file) or
    * CTD.load_qct(path_to_qct_file)
3. Write the calibration csv: CTD.write_csv(path_to_directory)

### Example of use:

1. Create a CTD calibration object.
Here, I'm going to use the CTDBP, series C, serial number 50002, with the UID of CGINS-CTDBPC-50002

In [68]:
CTD = CTDCalibration(uid='CGINS-CTDBPC-50002')

2. Specify the filepath to the directory where the calibration/xmlcon/qct files are stored, and then load the calibration values into the CTD object.

In [69]:
filepath = '/media/andrew/OS/Users/areed/Documents/Project_Files/Records/Instrument_Records/CTDBP/CTDBP-C_SBE_16PlusV2_SN_16-50002_Calibration_Files_2017-12-09.zip'

In [70]:
CTD.load_xml(filepath)

In [72]:
CTD.coefficients

{'CC_a0': '1.26788684E-003',
 'CC_a1': '2.71313048E-004',
 'CC_a2': '-8.80325736E-007',
 'CC_a3': '1.70863095E-007',
 'CC_cpcor': '-9.57000000E-008',
 'CC_g': '-9.77008608E-001',
 'CC_h': '1.38913049E-001',
 'CC_i': '-2.49524472E-004',
 'CC_j': '3.72577101E-005',
 'CC_ctcor': '3.2500E-006',
 'CC_pa0': '2.86024392E-002',
 'CC_pa1': '4.88980690E-004',
 'CC_pa2': '-5.88592991E-012',
 'CC_ptempa0': '-6.01941724E+001',
 'CC_ptempa1': '5.38832263E+001',
 'CC_ptempa2': '-2.64810604E-001',
 'CC_ptca0': '5.25312278E+005',
 'CC_ptca1': '6.44869510E+000',
 'CC_ptca2': '-1.64707723E-001',
 'CC_ptcb0': '2.50425000E+001',
 'CC_ptcb1': '9.00000000E-004',
 'CC_ptcb2': '0.00000000E+000'}

In [73]:
CTD.date

{'TCALDATE': '20171209', 'CCALDATE': '20171209', 'PCALDATE': '20171130'}

In [74]:
CTD.serial

'50002'

In [75]:
CTD.type

'16'

3. Now, we can write the results to a csv file. Must specify explicitly where to store the written csv. The function call will request a check of the csv file name and where the file will be saved. If [n]o is selected, the function exits.

In [71]:
CTD.write_csv('/media/andrew/OS/Users/areed/Documents/OOI-CGSN/QAQC_Sandbox/Metadata_Review/')

Write CGINS-CTDBPC-50002__20171209.csv to /media/andrew/OS/Users/areed/Documents/OOI-CGSN/QAQC_Sandbox/Metadata_Review/? [y/n]: y
