In [1]:
from utils import *

In [2]:
import csv
import datetime
import os
import shutil
import sys
import time
import re
import xml.etree.ElementTree as et
import pandas as pd
from zipfile import ZipFile

========================================================================================================================
### Define functions

In [3]:
def get_calibration_files(serial_nums,dirpath):
    """
    Function which gets all the calibration files associated with the
    instrument serial numbers.
    
    Args:
        serial_nums - serial numbers of the instruments
        dirpath - path to the directory containing the calibration files
    Returns:
        calibration_files - a dictionary of instrument uids with associated
            calibration files
    """
    calibration_files = {}
    for uid in serial_nums.keys():
        sn = serial_nums.get(uid)[0].strip()
        sn = str(sn)
        files = []
        for file in os.listdir(dirpath):
            if sn in file:
                if 'Calibration' in file:
                    files.append(file)
                else:
                    pass
            else:
                pass
        
        calibration_files.update({uid:files})
        
    return calibration_files

In [4]:
def get_qct_files(df, qct_directory):
    qct_dict = {}
    uids = list(set(df['UID']))
    for uid in uids:
        df['UID_match'] = df['UID'].apply(lambda x: True if uid in x else False)
        qct_series = df[df['UID_match'] == True]['QCT Testing']
        qct_series = list(str(qct_series.iloc[0]).split('\n'))
        qct_dict.update({uid:qct_series})
    return qct_dict

In [5]:
# Now I need to load the all of the csv files based on their UID
def load_csv_info(csv_dict,filepath):
    """
    Loads the calibration coefficient information contained in asset management
    
    Args:
        csv_dict - a dictionary which associates an instrument UID to the
            calibration csv files in asset management
        filepath - the path to the directory containing the calibration csv files
    Returns:
        csv_cals - a dictionary which associates an instrument UID to a pandas
            dataframe which contains the calibration coefficients. The dataframes
            are indexed by the date of calibration
    """
    
    # Load the calibration data into pandas dataframes, which are then placed into
    # a dictionary by the UID
    csv_cals = {}
    for uid in csv_dict:
        cals = pd.DataFrame()
        for file in csv_dict[uid]:
            data = pd.read_csv(filepath+file)
            date = file.split('__')[1].split('.')[0]
            data['CAL DATE'] = pd.to_datetime(date)
            cals = cals.append(data)
        csv_cals.update({uid:cals})
        
    # Pivot the dataframe to be sorted based on calibration date
    for uid in csv_cals:
        csv_cals[uid] = csv_cals[uid].pivot(index=csv_cals[uid]['CAL DATE'], columns='name')['value']
        
    return csv_cals

In [6]:
def splitDataFrameList(df,target_column):
    ''' 
    df = dataframe to split,
    target_column = the column containing the values to split
    separator = the symbol used to perform the split
    returns: a dataframe with each entry for the target column separated, with each element moved into a new row. 
    The values in the other columns are duplicated across the newly divided rows.
    '''
    
    def splitListToRows(row,row_accumulator,target_column):
        split_row = row[target_column]
        for s in split_row:
            new_row = row.to_dict()
            new_row[target_column] = s
            row_accumulator.append(new_row)
            
    new_rows = []
    df.apply(splitListToRows,axis=1,args = (new_rows,target_column))
    new_df = pd.DataFrame(new_rows)
    return new_df

========================================================================================================================
### Directories
**Define the main directories where important information is stored.**

In [306]:
qct_directory = '/media/andrew/OS/Users/areed/Documents/Project_Files/Records/Instrument_Records/PRESF/PRESF_Results'
cal_directory = '/media/andrew/OS/Users/areed/Documents/Project_Files/Records/Instrument_Records/PRESF/PRESF_Cal'
asset_management_directory = '/home/andrew/Documents/OOI-CGSN/ooi-integration/asset-management/calibration/PRESFC'

In [307]:
excel_spreadsheet = '/media/andrew/OS/Users/areed/Documents/Project_Files/Documentation/System/System Notebook/WHOI_Asset_Tracking.xlsx'
sheet_name = 'Sensors'

In [308]:
PRESF = whoi_asset_tracking(excel_spreadsheet,sheet_name,instrument_class='PRESF',whoi=True)
PRESF

Unnamed: 0,Instrument Class,Series,Supplier Serial Number,WHOI #,OOI #,UID,Model,CGSN PN,Firmware Version,Supplier,...,QCT Testing,PreDeployment,Post Deployment,Refurbishment/ Repair,DO Number,Date Received,Deployment History,Current Deployment,Instrument Location on Current Deployment,Notes
1057,PRESF,B,26-1392,116898,A01141,CGINS-PRESFB-01392,26Plus,3305-00012-00002,7.2,SeaBird,...,3305-00105-00018\n3305-00105-00045\n3305-00105...,,,3305-00900-00178\n3305-00900-00361,WHOI_11-09-2011-PRESF-1003,10/30/2014\n4/3/2015,CP01CNSM-00005\nCP8 Spare\nCP01CNSM-00008\nCP0...,,MFN,
1058,PRESF,C,26-1400,116909,A01187,CGINS-PRESFC-01400,26Plus,3305-00012-00003,7.2,SeaBird,...,3305-00105-00019\n3305-00105-00028\n3305-00108...,,,3305-00900-00085,WHOI_11-09-2011-PRESF-1003,2014-12-12 00:00:00,CP04OSSM-00002\nCP04OSSM-00005\nCP04OSSM-00008,Pioneer 12 spare,,
1059,PRESF,C,26-1401,116910,A01188,CGINS-PRESFC-01401,26Plus,3305-00012-00003,7.2,SeaBird,...,3305-00105-00020\n3305-00105-00037\n3305-00105...,,,3305-00900-00135\n3305-00900-00309,WHOI_11-09-2011-PRESF-1003,2014-12-12 00:00:00,CP4 Spare\nCP04OSSM-00003\nCP04OSSM-00006\nP10...,CP04OSSM-00010,MFN,
1061,PRESF,B,26P64515-1329,114940,A00051,CGINS-PRESFB-01329,26Plus,3305-00012-00002,6.1e,SeaBird,...,3305-00105-00002\n3305-00105-00027\n3305-00105...,,,3305-00900-00085\n3305-00900-00178\n3305-00900...,WH-DO-1,2012-04-09 00:00:00,CP1 Spare\nCP3a Spare\nCP03ISSM-00002\nCP03ISS...,,MFN,
1062,PRESF,B,26P71826-1352,115275,A00165,CGINS-PRESFB-01352,26Plus,3305-00012-00002,7.2,SeaBird,...,3305-00105-00003\n3305-00105-00038\n3305-00105...,,,3305-00900-00013\n3305-00900-00135\n3305-00900...,WHOI_Contract_11-09-2011-PRESF-1001,2813-01-01 00:00:00,CP01CNSM-00001\nCP4 Spare\nCP03ISSM-00003\nCP0...,CP01CNSM-00011,MFN,
1063,PRESF,B,26P76848-1386,116453,A00813,CGINS-PRESFB-01386,26Plus,3305-00012-00002,7.2,SeaBird,...,3305-00105-00010\n3305-00105-00025\n3305-00105...,,,3305-00900-00085\n3305-00900-00178\n3305-00900...,WHOI_Contract_11-09-2011-PRESF-1002,2014-04-11 00:00:00,CP01CNSM-00002\nCP01CNSM-00003\nCP04OSSM-00004...,CP03ISSM-00010,MFN,
1064,PRESF,B,26P76848-1387,116337,A00761,CGINS-PRESFB-01387,26Plus,3305-00012-00002,7.2,SeaBird,...,3305-00105-00011\n3305-00105-00026\n3305-00105...,,,3305-00900-00085,WHOI_Contract_11-09-2011-PRESF-1002,2014-04-11 00:00:00,CP03ISSM-00001\nCP01CNSM-00006\nCP03ISSM-00008,Pioneer 12 spare,,
1065,PRESF,C,26P76848-1388,116338,A00762,CGINS-PRESFC-01388,26Plus,3305-00012-00003,7.2,SeaBird,...,3305-00105-00012\n3305-00105-00024\n3305-00105...,,,3305-00900-00035\n3305-00900-00364,WHOI_Contract_11-09-2011-PRESF-1002,2014-04-11 00:00:00,CP04OSSM-00001\nCP8 Spare\nCP04OSSM-00007\nCP0...,,MFN,


**Identify the QCT Testing documents associated with each individual instrument (the UID)**

In [309]:
qct_dict = get_qct_files(PRESF, qct_directory)
qct_dict

{'CGINS-PRESFB-01392': ['3305-00105-00018',
  '3305-00105-00045',
  '3305-00105-00066'],
 'CGINS-PRESFB-01386': ['3305-00105-00010',
  '3305-00105-00025',
  '3305-00105-00044',
  '3305-00105-00056'],
 'CGINS-PRESFB-01329': ['3305-00105-00002',
  '3305-00105-00027',
  '3305-00105-00043',
  '3305-00105-00054'],
 'CGINS-PRESFB-01352': ['3305-00105-00003',
  '3305-00105-00038',
  '3305-00105-00047',
  '3305-00105-00065'],
 'CGINS-PRESFC-01400': ['3305-00105-00019',
  '3305-00105-00028',
  '3305-00108-00048'],
 'CGINS-PRESFB-01387': ['3305-00105-00011',
  '3305-00105-00026',
  '3305-00105-00046'],
 'CGINS-PRESFC-01388': ['3305-00105-00012',
  '3305-00105-00024',
  '3305-00105-00067'],
 'CGINS-PRESFC-01401': ['3305-00105-00020',
  '3305-00105-00037',
  '3305-00105-00055']}

**Identify the calibration csvs stored in asset management which correspond to a particular instrument.**

In [310]:
csv_dict = load_asset_management(PRESF, asset_management_directory)
csv_dict

{'CGINS-PRESFC-01388': ['CGINS-PRESFC-01388__20180614.csv',
  'CGINS-PRESFC-01388__20150929.csv',
  'CGINS-PRESFC-01388__20141212.csv'],
 'CGINS-PRESFC-01400': ['CGINS-PRESFC-01400__20170811.csv',
  'CGINS-PRESFC-01400__20150509.csv',
  'CGINS-PRESFC-01400__20161012.csv'],
 'CGINS-PRESFC-01401': ['CGINS-PRESFC-01401__20151022.csv',
  'CGINS-PRESFC-01401__20160818.csv',
  'CGINS-PRESFC-01401__20171217.csv']}

In [311]:
uids = sorted(list(csv_dict.keys()))

In [312]:
serial_nums = get_serial_nums(PRESF, uids)
serial_nums

{'CGINS-PRESFC-01388': ['26P76848-1388'],
 'CGINS-PRESFC-01400': ['26-1400'],
 'CGINS-PRESFC-01401': ['26-1401']}

In [313]:
cal_dict = get_calibration_files(serial_nums, cal_directory)
cal_dict

{'CGINS-PRESFC-01388': ['PRESF-C_SBE_26plus_SN_26P76848-1388_Calibration_Documents_2014-04-11.pdf',
  'PRESF-C_SBE_26Plus_SN_26P76848-1388_Calibration_Files_2015-10-16.zip',
  'PRESF-C_SBE_26Plus_SN_26P76848-1388_Calibration_Files_2018-06-16.zip'],
 'CGINS-PRESFC-01400': ['PRESF-C_SBE_26Plus_SN_26-1400_Calibration_Files_2014-12-12.zip',
  'PRESF-C_SBE_26Plus_SN_26-1400_Calibration_Files_2016-01-06.zip',
  'PRESF-C_SBE_26Plus_SN_26-1400_Calibration_Files_2017-07-25.zip'],
 'CGINS-PRESFC-01401': ['PRESF-C_SBE_26Plus_SN_26-1401_Calibration_Files_2014-12-12.zip',
  'PRESF-C_SBE_26Plus_SN_26-1401_Calibration_Files_2016-08-16.zip',
  'PRESF-C_SBE_26Plus_SN_26-1401_Calibration_Files_2017-12-17.zip']}

========================================================================================================================
**Now, need to get all the files for a particular CTDMO UID:**

In [404]:
uid = sorted(uids)[2]
uid

'CGINS-PRESFC-01401'

In [405]:
cal_files = sorted(cal_dict[uid])
for file in cal_files:
    print(file)

PRESF-C_SBE_26Plus_SN_26-1401_Calibration_Files_2014-12-12.zip
PRESF-C_SBE_26Plus_SN_26-1401_Calibration_Files_2016-08-16.zip
PRESF-C_SBE_26Plus_SN_26-1401_Calibration_Files_2017-12-17.zip


In [406]:
csv_files = sorted(csv_dict[uid])
for file in csv_files:
    print(file)

CGINS-PRESFC-01401__20151022.csv
CGINS-PRESFC-01401__20160818.csv
CGINS-PRESFC-01401__20171217.csv


In [407]:
qct_files = sorted(qct_dict[uid])
for file in qct_files:
    print(file)

3305-00105-00020
3305-00105-00037
3305-00105-00055


In [408]:
csv_path = []
for cf in csv_files:
    path = generate_file_path(asset_management_directory, cf)
    csv_path.append(path)
csv_path

['/home/andrew/Documents/OOI-CGSN/ooi-integration/asset-management/calibration/PRESFC/CGINS-PRESFC-01401__20151022.csv',
 '/home/andrew/Documents/OOI-CGSN/ooi-integration/asset-management/calibration/PRESFC/CGINS-PRESFC-01401__20160818.csv',
 '/home/andrew/Documents/OOI-CGSN/ooi-integration/asset-management/calibration/PRESFC/CGINS-PRESFC-01401__20171217.csv']

In [409]:
cal_path = []
for cf in cal_files:
    path = generate_file_path(cal_directory, cf)
    cal_path.append(path)
cal_path

['/media/andrew/OS/Users/areed/Documents/Project_Files/Records/Instrument_Records/PRESF/PRESF_Cal/PRESF-C_SBE_26Plus_SN_26-1401_Calibration_Files_2014-12-12.zip',
 '/media/andrew/OS/Users/areed/Documents/Project_Files/Records/Instrument_Records/PRESF/PRESF_Cal/PRESF-C_SBE_26Plus_SN_26-1401_Calibration_Files_2016-08-16.zip',
 '/media/andrew/OS/Users/areed/Documents/Project_Files/Records/Instrument_Records/PRESF/PRESF_Cal/PRESF-C_SBE_26Plus_SN_26-1401_Calibration_Files_2017-12-17.zip']

In [410]:
qct_path = []
for qf in qct_files:
    path = generate_file_path(qct_directory, qf, ext=['.log','.txt','.zip'])
    qct_path.append(path)
qct_path

['/media/andrew/OS/Users/areed/Documents/Project_Files/Records/Instrument_Records/PRESF/PRESF_Results/3305-00105-00020-A.zip',
 '/media/andrew/OS/Users/areed/Documents/Project_Files/Records/Instrument_Records/PRESF/PRESF_Results/3305-00105-00037-A.zip',
 '/media/andrew/OS/Users/areed/Documents/Project_Files/Records/Instrument_Records/PRESF/PRESF_Results/3305-00105-00055-A.zip']

========================================================================================================================
### Now develop code to load the calibration coeffs from the capture files
The **PRESFCalibration** object below is an object designed to load, parse, and write the respective PRESF calibration csvs. The calibration coefficients are stored in the object as attributes.

In [411]:
class PRESFCalibration():
    # Class that stores calibration values for CTDs.

    def __init__(self, uid):
        self.serial = ''
        self.uid = uid
        self.coefficients = {
            'CC_offset_correction_factor':'0',
            'CC_slope_correction_factor':'1',
        }
        self.date = {}
        self.notes = {}

        # Name mapping for the MO-type CTDs (when reading from pdfs)
        self.coefficient_name_map = {
            'U0':'CC_u0',
            'Y1':'CC_y1',
            'Y2':'CC_y2',
            'Y3':'CC_y3',
            'C1':'CC_c1',
            'C2':'CC_c2',
            'C3':'CC_c3',
            'D1':'CC_d1',
            'D2':'CC_d2',
            'T1':'CC_t1',
            'T2':'CC_t2',
            'T3':'CC_t3',
            'T4':'CC_t4',
            'M':'CC_m',
            'B':'CC_b',
            'OFFSET':'CC_pressure_offset_calibration_coefficient'
        }

    @property
    def uid(self):
        return self._uid

    @uid.setter
    def uid(self, d):
        r = re.compile('.{5}-.{6}-.{5}')
        if r.match(d) is not None:
            self.serial = '26-' + d.split('-')[2].lstrip('0')
            self._uid = d
        else:
            raise Exception(f"The instrument uid {d} is not a valid uid. Please check.")
            

    def parse_qct(self, filepath):
        """
        Parses the QCT data in ascii-format.
        
        Args:
            filepath - the full directory to either the parent 
                directory or the full path with filename of the
                QCT file to parse
        Returns:
            self.coefficients - a dictionary which contains the
                calibration coefficients names as key with associated
                values as the key-entries
        """
        
        data = self.open_qct(filepath)
        Calflag = False
        for line in data.splitlines():
    
            line = line.replace('*','').strip()
    
            if 'Pressure coefficients' in line:
                _, cal_date = line.split(':')
                cal_date = pd.to_datetime(cal_date.strip()).strftime('%Y%m%d')
                self.date = cal_date
                # Turn on the flag
                Calflag = True
                # And move on to the next line
                continue
            elif 'Temperature coefficients' in line:
                # Turn the flag off
                Calflag = False
            else:
                pass
        
            if Calflag:
                key,_,value = line.split()
                name = self.coefficient_name_map.get(key)
                self.coefficients.update({name:value})
            
            
    def open_qct(self, filepath):
        """
        Function which opens and reads in the QCT data into a 
        format which is parseable.
        
        Args:
            filepath - the full directory to either the parent 
                directory or the full path with filename of the
                QCT file to parse
        Returns:
            data - the data in ascii-format from the QCT file
        """
        
        if filepath.endswith('.zip'):
            with ZipFile(filepath) as zfile:
                for name in zfile.namelist():
                    if fnmatch.fnmatch(name,'*.hex'):
                        fname = name
                data = zfile.read(fname).decode('ascii')

        elif os.path.isdir(filepath):
            for file in os.listdir(filepath):
                if fnmatch.fnmatch(name,'*.hex'):
                    fname = file
            with open(fname) as file:
                data = file.read().decode('ascii')
                
        else:
            with open(filepath) as file:
                data = file.read().decode('ascii')
        
        return data


    def write_csv(self, outpath):
        """
        This function writes the correctly named csv file for the ctd to the
        specified directory.

        Args:
            outpath - directory path of where to write the csv file
        Raises:
            ValueError - raised if the CTD object's coefficient dictionary
                has not been populated
        Returns:
            self.to_csv - a csv of the calibration coefficients which is
                written to the specified directory from the outpath.
        """

        # Run a check that the coefficients have actually been loaded
        if len(self.coefficients) == 0:
            raise ValueError('No calibration coefficients have been loaded.')

        # Create a dataframe to write to the csv
        data = {
            'serial': [self.serial]*len(self.coefficients),
            'name': list(self.coefficients.keys()),
            'value': list(self.coefficients.values())
        }
        df = pd.DataFrame().from_dict(data)

        # Define a function to reformat the notes into an uniform system
        def reformat_notes(x):
            # First, get rid of 
            try:
                np.isnan(x)
                x = ''
            except:
                x = str(x).replace('[','').replace(']','')
            return x
        
        # Now merge the coefficients dataframe with the notes
        if len(self.notes) > 0:
            notes = pd.DataFrame().from_dict({
                'name':list(self.notes.keys()),
                'notes':list(self.notes.values())
            })
            df = df.merge(notes, how='outer', left_on='name', right_on='name')
        else:
            df['notes'] = ''
        
        # Sort the data by the coefficient name
        df = df.sort_values(by='name')

        # Generate the csv name
        cal_date = self.date
        csv_name = self.uid + '__' + cal_date + '.csv'

        # Write the dataframe to a csv file
        # check = input(f"Write {csv_name} to {outpath}? [y/n]: ")
        check = 'y'
        if check.lower().strip() == 'y':
            df.to_csv(outpath+'/'+csv_name, index=False)

**Initialize the PRESFCalibration object using the instrument uid.**

In [167]:
presf = PRESFCalibration(uid)

**Check that the serial number has been correctly parsed.**

In [168]:
presf.serial

'26-1386'

**Load the PRESF calibration coefficients based on the QCT file.**

In [169]:
presf.parse_qct(qct_path[2])

**Check that the calibration coefficients loaded successfully.**

In [170]:
presf.coefficients

{'CC_offset_correction_factor': '0',
 'CC_slope_correction_factor': '1',
 'CC_u0': '5.810283e+00',
 'CC_y1': '-3.944121e+03',
 'CC_y2': '-1.073763e+04',
 'CC_y3': '0.000000e+00',
 'CC_c1': '2.066838e+03',
 'CC_c2': '-1.270562e+01',
 'CC_c3': '-3.470497e+03',
 'CC_d1': '2.391000e-02',
 'CC_d2': '0.000000e+00',
 'CC_t1': '2.763804e+01',
 'CC_t2': '4.278750e-01',
 'CC_t3': '1.776240e+01',
 'CC_t4': '2.186445e+01',
 'CC_m': '41943.0',
 'CC_b': '2796.2',
 'CC_pressure_offset_calibration_coefficient': '-1.930000e-01'}

In [171]:
qct_dict[uid]

['3305-00105-00010',
 '3305-00105-00025',
 '3305-00105-00044',
 '3305-00105-00056']

**Now, if you want to add any notes to the calibration csv, they can be added using a dictionary to the notes attribute, based on the calibration coefficient name by writing.**

In [172]:
presf.notes = {
    'CC_b': 'Source file is QCT document number 3305-00105-00043.',
    'CC_m': 'I think that this is a constant value.'
}

**For right now, write the file to a temporary local directory.**

In [322]:
temp_directory = '/'.join((os.getcwd(),'temp'))
temp_directory
shutil.rmtree(temp_directory)

In [323]:
temp_path = '/'.join((temp_directory,'qct'))
ensure_dir(temp_path)

**Write the PRESF calibration object using the standardized naming format to the temporary directory in a format that can be ingested by UFrame.**

In [252]:
presf.write_csv(temp_path)

**Check that it wrote.**

In [253]:
os.listdir(temp_path)

['CGINS-PRESFB-01387__20170724.csv']

========================================================================================================================
## Metadata Comparison
Now the goal is to compare the calibration csvs contained in asset management against the calibration coefficients stored in the QCT files.

**First, need to copy the calibration csvs from asset management to the local temp directory.**

In [412]:
shutil.rmtree('/'.join((os.getcwd(),'temp')))

In [413]:
for file in csv_path:
    savedir = '/'.join((os.getcwd(),'temp','csv'))
    ensure_dir(savedir)
    shutil.copy(file, savedir)

In [414]:
os.listdir(savedir)

['CGINS-PRESFC-01401__20151022.csv',
 'CGINS-PRESFC-01401__20160818.csv',
 'CGINS-PRESFC-01401__20171217.csv']

**Next, write all the QCT files to the temp directory in the appropriate csv format. This will print out any QCT files which don't parse.**

In [415]:
ensure_dir(temp_path)
for qct in qct_path:
    try:
        presf = PRESFCalibration(uid=uid)
        presf.parse_qct(qct)
        presf.write_csv(temp_path)
    except:
        print(qct)

In [420]:
os.listdir(temp_path)

['CGINS-PRESFC-01401__20160714.csv',
 'CGINS-PRESFC-01401__20141125.csv',
 'CGINS-PRESFC-01401__20171212.csv']

========================================================================================================================
### Compare results
Now, with QCT files parsed into csvs which follow the UFrame format, I can load both the QCT and the calibration csvs into pandas dataframes, which will allow element by element comparison in relatively few lines of code.

In [417]:
def get_file_date(x):
    x = str(x)
    ind1 = x.index('__')
    ind2 = x.index('.')
    return x[ind1+2:ind2]

**Load the calibration csvs:**

In [418]:
# Now we want to compare dataframe
csv_files = pd.DataFrame(sorted(csv_dict[uid]),columns=['csv'])
csv_files['cal date'] = csv_files['csv'].apply(lambda x: get_file_date(x))
csv_files.set_index('cal date',inplace=True)
csv_files

Unnamed: 0_level_0,csv
cal date,Unnamed: 1_level_1
20151022,CGINS-PRESFC-01401__20151022.csv
20160818,CGINS-PRESFC-01401__20160818.csv
20171217,CGINS-PRESFC-01401__20171217.csv


In [419]:
# Now we want to compare dataframe
qct_files = pd.DataFrame(sorted(os.listdir('temp/qct')),columns=['qct'])
qct_files['cal date'] = qct_files['qct'].apply(lambda x: get_file_date(x))
qct_files.set_index('cal date',inplace=True)
qct_files

Unnamed: 0_level_0,qct
cal date,Unnamed: 1_level_1
20141125,CGINS-PRESFC-01401__20141125.csv
20160714,CGINS-PRESFC-01401__20160714.csv
20171212,CGINS-PRESFC-01401__20171212.csv


In [433]:
df_files = csv_files.join(qct_files,how='outer').fillna(value='-999')
df_files

Unnamed: 0_level_0,csv,qct
cal date,Unnamed: 1_level_1,Unnamed: 2_level_1
20141125,CGINS-PRESFC-01401__20141125.csv,CGINS-PRESFC-01401__20141125.csv
20160714,CGINS-PRESFC-01401__20160714.csv,CGINS-PRESFC-01401__20160714.csv
20171212,CGINS-PRESFC-01401__20171212.csv,CGINS-PRESFC-01401__20171212.csv


**The above dataframe shows the names of the csv files both pulled from asset management (csv) and from the qct. When they don't match based on the calibration date (cal date), that suggests that the date in the csv filename is likely incorrect.**

**If the filename is wrong, the calibration coefficient checker will not manage to compare the results. Consequently, we'll make a local copy of the wrong file to a new file with the correct name, and then run the calibration coefficient checker. Do this for all the incorrectly named files.**

In [430]:
a = 'temp/csv/' + 'CGINS-PRESFC-01401__20171217.csv'
b = 'temp/csv/' + 'CGINS-PRESFC-01401__20171212.csv'
shutil.copy(a,b)

'temp/csv/CGINS-PRESFC-01401__20171212.csv'

In [431]:
!rm 'temp/csv/CGINS-PRESFC-01401__20171217.csv'

In [432]:
csv_files = pd.DataFrame(sorted(os.listdir('temp/csv')),columns=['csv'])
csv_files['cal date'] = csv_files['csv'].apply(lambda x: get_file_date(x))
csv_files.set_index('cal date',inplace=True)
csv_files

Unnamed: 0_level_0,csv
cal date,Unnamed: 1_level_1
20141125,CGINS-PRESFC-01401__20141125.csv
20160714,CGINS-PRESFC-01401__20160714.csv
20171212,CGINS-PRESFC-01401__20171212.csv


**Define a function to check the calibration coefficients between the asset management csv and the csv generated from the QCT file. This function checks based on the relative difference, which is set to 0.01% threshold.**

In [434]:
def check_cal_coeffs(coeffs_dict):
    
    # Part 1: coeff by coeff comparison between each source of coefficients
    keys = list(coeffs_dict.keys())
    comparison = {}
    for i in range(len(keys)):
        names = (keys[i], keys[i - (len(keys)-1)])
        check = len(coeffs_dict.get(keys[i])['value']) == len(coeffs_dict.get(keys[i - (len(keys)-1)])['value'])
        if check:
            compare = np.isclose(coeffs_dict.get(keys[i])['value'], coeffs_dict.get(keys[i - (len(keys)-1)])['value'])
            comparison.update({names:compare})
        else:
            pass
        
    # Part 2: now do a logical_and comparison between the results from part 1
    keys = list(comparison.keys())
    i = 0
    mask = comparison.get(keys[i])
    while i < len(keys)-1:
        i = i + 1
        mask = np.logical_and(mask, comparison.get(keys[i]))
        print(i)
       
    return mask 

In [435]:
result = {}
for cal_date in df_files.index:
    # Part 1, load all of the csv files
    coeffs_dict = {}
    for source,fname in df_files.loc[cal_date].items():
        if fname != '-999':
            load_directory = '/'.join((os.getcwd(),'temp',source,fname))
            df_coeffs = pd.read_csv(load_directory)
            for i in list(set(df_coeffs['serial'])):
                print(source + '-' + fname + ': ' + str(i))
            df_coeffs.set_index(keys='name',inplace=True)
            df_coeffs.sort_index(inplace=True)
            coeffs_dict.update({source:df_coeffs})
        else:
            pass
    
    # Part 2, now check the calibration coefficients
    mask = check_cal_coeffs(coeffs_dict)
    
    # Part 3: get the calibration coefficients are wrong
    # and show them
    fname = df_files.loc[cal_date]['csv']
    if fname == '-999':
        incorrect = 'No csv file.'
    else:
        incorrect = coeffs_dict['csv'][mask == False]
    result.update({fname:incorrect})

csv-CGINS-PRESFC-01401__20141125.csv: 26-1401
qct-CGINS-PRESFC-01401__20141125.csv: 26-1401
1
csv-CGINS-PRESFC-01401__20160714.csv: 26-1401
qct-CGINS-PRESFC-01401__20160714.csv: 26-1401
1
csv-CGINS-PRESFC-01401__20171212.csv: 26-1401
qct-CGINS-PRESFC-01401__20171212.csv: 26-1401
1


In [436]:
result

{'CGINS-PRESFC-01401__20141125.csv': Empty DataFrame
 Columns: [serial, value, notes]
 Index: [], 'CGINS-PRESFC-01401__20160714.csv': Empty DataFrame
 Columns: [serial, value, notes]
 Index: [], 'CGINS-PRESFC-01401__20171212.csv': Empty DataFrame
 Columns: [serial, value, notes]
 Index: []}