# NUTNR METADATA REVIEW
The notebook describes the process for checking the calibration csvs located in the OOI-CGSN asset management repository on GitHub. The purpose is to identify when errors were made during entering the calibration csvs. 

**========================================================================================================================**

The first step is to load relevant packages:

In [1]:
import csv
import re
import os
import numpy as np
import pandas as pd

In [2]:
from utils import *

**=========================================================================================================================**
Define some useful functions for the metadata review (in future will move to a utilities file):

In [3]:
def get_qct_files(df, qct_directory):
    qct_dict = {}
    uids = list(set(df['UID']))
    for uid in uids:
        df['UID_match'] = df['UID'].apply(lambda x: True if uid in x else False)
        qct_series = df[df['UID_match'] == True]['QCT Testing']
        qct_series = list(qct_series.iloc[0].split('\n'))
        qct_dict.update({uid:qct_series})
    return qct_dict

In [4]:
def get_calibration_files(serial_nums, cal_directory):
    calibration_files = {}
    for uid in serial_nums.keys():
        sn = serial_nums.get(uid)
        sn = str(sn[0])
        files = []
        for file in os.listdir(cal_directory):
            if 'Calibration_File' in file:
                if sn in file:
                    files.append(file)
        calibration_files.update({uid:files})
    return calibration_files

In [5]:
# Now I need to load the all of the csv files based on their UID
def load_csv_info(csv_dict,filepath):
    """
    Loads the calibration coefficient information contained in asset management
    
    Args:
        csv_dict - a dictionary which associates an instrument UID to the
            calibration csv files in asset management
        filepath - the path to the directory containing the calibration csv files
    Returns:
        csv_cals - a dictionary which associates an instrument UID to a pandas
            dataframe which contains the calibration coefficients. The dataframes
            are indexed by the date of calibration
    """
    
    # Load the calibration data into pandas dataframes, which are then placed into
    # a dictionary by the UID
    csv_cals = {}
    for uid in csv_dict:
        cals = pd.DataFrame()
        for file in csv_dict[uid]:
            data = pd.read_csv(filepath+file)
            date = file.split('__')[1].split('.')[0]
            data['CAL DATE'] = pd.to_datetime(date)
            cals = cals.append(data)
        csv_cals.update({uid:cals})
        
    # Pivot the dataframe to be sorted based on calibration date
    for uid in csv_cals:
        csv_cals[uid] = csv_cals[uid].pivot(index=csv_cals[uid]['CAL DATE'], columns='name')['value']
        
    return csv_cals

In [6]:
def splitDataFrameList(df,target_column):
    ''' 
    df = dataframe to split,
    target_column = the column containing the values to split
    separator = the symbol used to perform the split
    returns: a dataframe with each entry for the target column separated, with each element moved into a new row. 
    The values in the other columns are duplicated across the newly divided rows.
    '''
    
    def splitListToRows(row,row_accumulator,target_column):
        split_row = row[target_column]
        for s in split_row:
            new_row = row.to_dict()
            new_row[target_column] = s
            row_accumulator.append(new_row)
            
    new_rows = []
    df.apply(splitListToRows,axis=1,args = (new_rows,target_column))
    new_df = pd.DataFrame(new_rows)
    return new_df

In [7]:
# Now, write a function to copy over the file
def copy_to_local(cal_path):
    """
    Function which copies the files from the cal_path to a locally
    created temp directory
    """
    
    for filepath in cal_path:
        # Create a folder in which to save extracted data
        folder, *ignore = filepath.split('/')[-1].split('.')
        savedir = '/'.join((os.getcwd(),'temp','cal_data',folder))
        # Now make sure that the save directory exists and can be used
        ensure_dir(savedir)
    
        if filepath.endswith('.zip'):
            with ZipFile(filepath,'r') as zfile:
                for file in zfile.namelist():
                    zfile.extract(file,path=savedir)    
        else:
            shutil.copy(filepath, savedir)

**====================================================================================================================**
Define the directories where the QCT, Pre, and Post deployment document files are stored, where the vendor documents are stored, where asset tracking is located, and where the calibration csvs are located.

In [8]:
doc_directory = '/media/andrew/OS/Users/areed/Documents/Project_Files/Records/Instrument_Records/NUTNR/NUTNR_Results/'
cal_directory = '/media/andrew/OS/Users/areed/Documents/Project_Files/Records/Instrument_Records/NUTNR/NUTNR_Cal/'
asset_management_directory = '/home/andrew/Documents/OOI-CGSN/ooi-integration/asset-management/calibration/NUTNRB/'

In [9]:
excel_spreadsheet = '/media/andrew/OS/Users/areed/Documents/Project_Files/Documentation/System/System Notebook/WHOI_Asset_Tracking.xlsx'
sheet_name = 'Sensors'

In [10]:
NUTNR = whoi_asset_tracking(spreadsheet=excel_spreadsheet,sheet_name=sheet_name,instrument_class='NUTNR',series='B')
NUTNR

Unnamed: 0,Instrument Class,Series,Supplier Serial Number,WHOI #,OOI #,UID,Model,CGSN PN,Firmware Version,Supplier,...,QCT Testing,PreDeployment,Post Deployment,Refurbishment/ Repair,DO Number,Date Received,Deployment History,Current Deployment,Instrument Location on Current Deployment,Notes
876,NUTNR,B,239,115084,A00065,CGINS-NUTNRB-00239,ISUS,1336-00014-00002,3.2.4,Satlantic,...,3305-00108-00004\n3305-00108-00048\n3305-00108...,3305-00308-00001,3305-00508-00040,3305-00900-00075\n3305-00900-00144\n3305-00900...,WH-SC12-5-NUTNR-1001,2012-11-13 00:00:00,GI01SUMO-00001\nCP04OSSM-00006,,NSIF,Reading High nitrate levels during calibration...
877,NUTNR,B,240,115085,A00066,CGINS-NUTNRB-00240,ISUS,1336-00014-00002,3.2.4,Satlantic,...,3305-00108-00003\n3305-00108-00029\n3305-00108...,,,3305-00900-00008\n3305-00900-00231\n3305-00900...,WH-SC12-5-NUTNR-1001,2012-11-13 00:00:00,CP01CNSM-00001\nGS01SUMO-00002,,NSIF,"09/2017: Clock issue - resets back to Jan 1, 2..."
878,NUTNR,B,260,115671,A00383,CGINS-NUTNRB-00260,ISUS,1336-00014-00002,3.2.4,Satlantic,...,3305-00108-00010\n3305-00108-00056,,3305-00508-00010,3305-00900-00109\n3305-00900-00317,WH-SC12-05-NUTNR-1004,2013-08-12 00:00:00,GI Spare\nCP1 spare\nGS01SUMO-00001\nCP04OSSM-...,,NSIF,Sent to vedor as part of trade in for new (SUN...
879,NUTNR,B,261,115672,A00384,CGINS-NUTNRB-00261,ISUS,1336-00014-00002,3.2.4,Satlantic,...,3305-00108-00011\n3305-00108-00021\n3305-00108...,3305-00308-00007\n3305-00308-00031\n3305-00308...,3305-00508-00022\n3305-00508-00041,3305-00900-00071\n3305-00900-00173\n3305-00900...,WH-SC12-05-NUTNR-1004,2013-08-12 00:00:00,CP03ISSM-00002\nCP01CNSM-00005\nCP01CNSM-00007,,NSIF,Returned to vendor 6/24/14 (RMA#2014-125)\nSen...
880,NUTNR,B,262,115673,A00385,CGINS-NUTNRB-00262,ISUS,1336-00014-00002,3.2.4,Satlantic,...,3305-00108-00012\n3305-00108-00064,3305-00308-00002,,3305-00900-00064\n3305-00900-00153\n3305-00900...,WH-SC12-05-NUTNR-1004,2013-08-12 00:00:00,GI01SUMO-00001,,,Sent to vedor as part of trade in for new (SUN...
881,NUTNR,B,266,116564,A00880,CGINS-NUTNRB-00266,ISUS,1336-00014-00002,3.2.4,Satlantic,...,3305-00108-00015\n3305-00108-00040\n3305-00108...,3305-00308-00004\n3305-00308-00032,3305-00508-00024,3305-00900-00036\n3305-00900-00173\n3305-00900...,WH-SC12-05-NUTNR-1006,2014-06-26 00:00:00,CP04OSSM-00001\nCP04OSSM-00004\nCP01CNSM-00008,,NSIF,Sent to vedor as part of trade in for new (SUN...
882,NUTNR,B,267,116562,A00878,CGINS-NUTNRB-00267,ISUS,1336-00014-00002,3.2.4,Satlantic,...,3305-00108-00018\n3305-00108-00053,,3305-00508-00009,3305-00900-00109\n3305-00900-00363,WH-SC12-05-NUTNR-1006,2014-06-26 00:00:00,GS01SUMO-00001\nCP01CNSM-00006,,NSIF,Sent to vedor as part of trade in for new (SUN...
883,NUTNR,B,268,116563,A00879,CGINS-NUTNRB-00268,ISUS,1336-00014-00002,3.2.4,Satlantic,...,3305-00108-00017\n3305-00108-00046\n3305-00108...,3305-00308-00003\n3305-00308-00034,3305-00508-00023,3305-00900-00071\n3305-00900-00173\n3305-00900...,WH-SC12-05-NUTNR-1006,2014-06-26 00:00:00,CP03ISSM-00001\nCP03ISSM-00004\nCP03ISSM-00006,,NSIF,4/2016: Scheduled to sample every half hour. S...
884,NUTNR,B,269,116565,A00881,CGINS-NUTNRB-00269,ISUS,1336-00014-00002,3.2.4,Satlantic,...,3305-00108-00016\n3305-00108-00047\n3305-00114...,3305-00308-00005\n3305-00308-00009,3305-00508-00001,3305-00900-00071\n3305-00900-00317,WH-SC12-05-NUTNR-1006,2014-06-26 00:00:00,CP01CNSM-00002\nCP01CNSM-00003\nCP03ISSM-00005,,NSIF,Sent to vedor as part of trade in for new (SUN...
885,NUTNR,B,270,116899,A01142,CGINS-NUTNRB-00270,ISUS,1336-00014-00002,3.2.4,Satlantic,...,3305-00108-00020\n3305-00108-00050,3305-00308-00006\n3305-00308-00008,3305-00508-00038,3305-00900-00071\n3305-00900-00317,WH-SC12-05-NUTNR-1006,2014-09-24 00:00:00,CP3a Spare\nCP04OSSM-00002\nGI01SUMO-00003,,NSIF,Sent to vedor as part of trade in for new (SUN...


**======================================================================================================================**
Now, I want to load all the calibration csvs and group them by UID:

In [11]:
uids = sorted( list( set(NUTNR['UID']) ) )

In [12]:
csv_dict = {}
asset_management = os.listdir(asset_management_directory)
for uid in uids:
    files = [file for file in asset_management if uid in file]
    csv_dict.update({uid: sorted(files)})

**======================================================================================================================**
Get the serial numbers of the instruments and match them to the UIDs:

In [13]:
serial_dict = {}
for uid in uids:
    sn = NUTNR[NUTNR['UID'] == uid]['Supplier\nSerial Number']
    serial_dict.update({uid: str(sn.iloc[0])})    

**=======================================================================================================================**
Get the QCT capture files with the following Document Control Numbers (DCNs):
* ISUS: 3305-00108-XXXXX-A
* SUNA: 3305-00127-XXXXX-A

For the NUTNRs, the QCT files do not contain any calibration information. Rather, the calibration information is contained in separate **.CAL** files, which are updated each time. 

In [14]:
files = [file for file in os.listdir(doc_directory) if 'A' in file]
qct_files = []
for file in files:
    if '108' in file or '127' in file:
        qct_files.append(file)
    else:
        pass

**=======================================================================================================================**
Get the pre-deployment capture files, which should contain **.CAL** files, with the following DCNs:
* ISUS: 3305-00308-XXXXX-A
* SUNA: 3305-00327-XXXXX-A

In [15]:
files = [file for file in os.listdir(doc_directory) if 'A' in file]
pre_files = []
for file in files:
    if '308' in file or '327' in file:
        pre_files.append(file)

Open the Pre-deployment files and get the instrument serial number to match the Pre-deployment DCN to an individual insturment.

In [16]:
pre_paths = []
predeployment = {}
for file in pre_files:
    path = generate_file_path(doc_directory, file, ext=['.zip'])
    with ZipFile(path) as zfile:
        cal_files = [file for file in zfile.namelist() if file.lower().endswith('.cal')]
        if len(cal_files) > 0:
            data = zfile.read(cal_files[0]).decode('ascii')
            lines = data.splitlines()
            _, items, *ignore = lines[0].split(',')
            inst, sn, *ignore = items.split()
            sn = sn.lstrip('0')
            if inst == 'SUNA':
                sn = 'NTR-'+sn
    if predeployment.get(sn) is None:
        predeployment.update({sn: [file]})
    else:
        predeployment[sn].append(file)

In [17]:
predeployment

{'270': ['3305-00308-00008-A.zip',
  '3305-00308-00036-A.zip',
  '3305-00308-00006-A.zip'],
 '269': ['3305-00308-00009-A.zip',
  '3305-00308-00046-A.zip',
  '3305-00308-00005-A.zip'],
 '271': ['3305-00308-00010-A.zip',
  '3305-00308-00062-A.zip',
  '3305-00308-00063-A.zip'],
 '274': ['3305-00308-00011-A.zip', '3305-00308-00064-A.zip'],
 '283': ['3305-00308-00012-A.zip', '3305-00308-00048-A.zip'],
 '284': ['3305-00308-00013-A.zip', '3305-00308-00014-A.zip'],
 '254': ['3305-00308-00015-A.zip',
  '3305-00308-00025-A.zip',
  '3305-00308-00058-A.zip'],
 '244': ['3305-00308-00016-A.zip',
  '3305-00308-00026-A.zip',
  '3305-00308-00049-A.zip',
  '3305-00308-00071-A.zip'],
 '255': ['3305-00308-00017-A.zip',
  '3305-00308-00043-A.zip',
  '3305-00308-00056-A.zip'],
 '253': ['3305-00308-00018-A.zip',
  '3305-00308-00041-A.zip',
  '3305-00308-00057-A.zip'],
 '256': ['3305-00308-00019-A.zip',
  '3305-00308-00053-A.zip',
  '3305-00308-00070-A.zip'],
 '263': ['3305-00308-00020-A.zip',
  '3305-00308-0

Based on the serial numbers, link the instrument uids to their pre-deployment files:

In [18]:
pre_dict = {}
for uid in sorted(serial_dict.keys()):
    sn = serial_dict.get(uid)
    if predeployment.get(sn) is not None:
        pre_dict.update({uid: sorted(predeployment.get(sn))})
    else:
        pre_dict.update({uid: None})

In [19]:
pre_dict

{'CGINS-NUTNRB-00239': ['3305-00308-00001-A.zip',
  '3305-00308-00035-A.zip',
  '3305-00308-00061-A.zip'],
 'CGINS-NUTNRB-00240': None,
 'CGINS-NUTNRB-00260': ['3305-00308-00044-A.zip'],
 'CGINS-NUTNRB-00261': ['3305-00308-00007-A.zip',
  '3305-00308-00031-A.zip',
  '3305-00308-00060-A.zip'],
 'CGINS-NUTNRB-00262': ['3305-00308-00002-A.zip'],
 'CGINS-NUTNRB-00266': ['3305-00308-00004-A.zip', '3305-00308-00032-A.zip'],
 'CGINS-NUTNRB-00267': ['3305-00308-00045-A.zip'],
 'CGINS-NUTNRB-00268': ['3305-00308-00003-A.zip', '3305-00308-00034-A.zip'],
 'CGINS-NUTNRB-00269': ['3305-00308-00005-A.zip',
  '3305-00308-00009-A.zip',
  '3305-00308-00046-A.zip'],
 'CGINS-NUTNRB-00270': ['3305-00308-00006-A.zip',
  '3305-00308-00008-A.zip',
  '3305-00308-00036-A.zip'],
 'CGINS-NUTNRB-00271': ['3305-00308-00010-A.zip',
  '3305-00308-00062-A.zip',
  '3305-00308-00063-A.zip'],
 'CGINS-NUTNRB-00272': ['3305-00308-00047-A.zip'],
 'CGINS-NUTNRB-00273': ['3305-00308-00037-A.zip', '3305-00308-00066-A.zip'],
 

**=======================================================================================================================**
Repeat the Pre-deployment process with the post-deployment files. The DCNs are:
* ISUS: 3305-00508-XXXXX-A
* SUNA: 3305-00527-XXXXX-A

In [20]:
files = [file for file in os.listdir(doc_directory) if 'A' in file]
post_files = []
for file in files:
    if '508' in file or '527' in file:
        post_files.append(file)

In [21]:
post_paths = []
postdeployment = {}
for file in post_files:
    path = generate_file_path(doc_directory, file, ext=['.zip'])
    with ZipFile(path) as zfile:
        cal_files = [file for file in zfile.namelist() if file.lower().endswith('.cal')]
        if len(cal_files) > 0:
            data = zfile.read(cal_files[0]).decode('ascii')
            lines = data.splitlines()
            _, items, *ignore = lines[0].split(',')
            inst, sn, *ignore = items.split()
            sn = sn.lstrip('0')
            if inst == 'SUNA':
                sn = 'NTR-'+sn
    if postdeployment.get(sn) is None:
        postdeployment.update({sn: [file]})
    else:
        postdeployment[sn].append(file)

In [22]:
post_dict = {}
for uid in sorted(serial_dict.keys()):
    sn = serial_dict.get(uid)
    post_dict.update({uid: postdeployment.get(sn)})

In [23]:
post_dict

{'CGINS-NUTNRB-00239': ['3305-00508-00040-A.zip'],
 'CGINS-NUTNRB-00240': None,
 'CGINS-NUTNRB-00260': ['3305-00508-00010-A.zip'],
 'CGINS-NUTNRB-00261': ['3305-00508-00022-A.zip', '3305-00508-00041-A.zip'],
 'CGINS-NUTNRB-00262': None,
 'CGINS-NUTNRB-00266': ['3305-00508-00024-A.zip'],
 'CGINS-NUTNRB-00267': ['3305-00508-00009-A.zip'],
 'CGINS-NUTNRB-00268': ['3305-00508-00023-A.zip'],
 'CGINS-NUTNRB-00269': ['3305-00508-00001-A.zip'],
 'CGINS-NUTNRB-00270': ['3305-00508-00038-A.zip'],
 'CGINS-NUTNRB-00271': ['3305-00508-00020-A.zip'],
 'CGINS-NUTNRB-00272': ['3305-00508-00011-A.zip'],
 'CGINS-NUTNRB-00273': ['3305-00508-00012-A.zip', '3305-00508-00043-A.zip'],
 'CGINS-NUTNRB-00274': ['3305-00508-00021-A.zip'],
 'CGINS-NUTNRB-00275': ['3305-00508-00025-A.zip'],
 'CGINS-NUTNRB-00276': ['3305-00508-00044-A.zip'],
 'CGINS-NUTNRB-00277': None,
 'CGINS-NUTNRB-00280': ['3305-00508-00049-A.zip'],
 'CGINS-NUTNRB-00283': None,
 'CGINS-NUTNRB-00284': ['3305-00508-00013-A.zip', '3305-00508-00014

**=======================================================================================================================**
Now, we need to identify the full paths to the relevant files


In [24]:
csv_paths = {}
for uid in sorted(csv_dict.keys()):
    paths = []
    for file in csv_dict.get(uid):
        path = generate_file_path(asset_management_directory, file, ext=['.csv'])
        paths.append(path)
    csv_paths.update({uid: paths})

In [25]:
csv_paths

{'CGINS-NUTNRB-00239': ['/home/andrew/Documents/OOI-CGSN/ooi-integration/asset-management/calibration/NUTNRB/CGINS-NUTNRB-00239__20140910.csv',
  '/home/andrew/Documents/OOI-CGSN/ooi-integration/asset-management/calibration/NUTNRB/CGINS-NUTNRB-00239__20160930.csv'],
 'CGINS-NUTNRB-00240': ['/home/andrew/Documents/OOI-CGSN/ooi-integration/asset-management/calibration/NUTNRB/CGINS-NUTNRB-00240__20131121.csv',
  '/home/andrew/Documents/OOI-CGSN/ooi-integration/asset-management/calibration/NUTNRB/CGINS-NUTNRB-00240__20151214.csv',
  '/home/andrew/Documents/OOI-CGSN/ooi-integration/asset-management/calibration/NUTNRB/CGINS-NUTNRB-00240__20170615.csv',
  '/home/andrew/Documents/OOI-CGSN/ooi-integration/asset-management/calibration/NUTNRB/CGINS-NUTNRB-00240__20180531.csv'],
 'CGINS-NUTNRB-00260': ['/home/andrew/Documents/OOI-CGSN/ooi-integration/asset-management/calibration/NUTNRB/CGINS-NUTNRB-00260__20150218.csv',
  '/home/andrew/Documents/OOI-CGSN/ooi-integration/asset-management/calibratio

In [26]:
pre_paths = {}
for uid in sorted(pre_dict.keys()):
    paths = []
    if pre_dict.get(uid) is not None:
        for file in pre_dict.get(uid):
            path = generate_file_path(doc_directory, file)
            paths.append(path)
        pre_paths.update({uid: paths})
    else:
        pass

In [27]:
pre_paths;

In [28]:
post_paths = {}
for uid in sorted(post_dict.keys()):
    paths = []
    if post_dict.get(uid) is not None:
        for file in post_dict.get(uid):
            path = generate_file_path(doc_directory, file)
            paths.append(path)
        post_paths.update({uid: paths})
    else:
        post_paths.update({uid: None})

In [29]:
post_paths;

**=======================================================================================================================** Find and return the calibration files which contain vendor supplied calibration information:

In [30]:
serial_nums = get_serial_nums(NUTNR, uids)

In [31]:
cal_dict = get_calibration_files(serial_nums, cal_directory)

In [32]:
cal_paths = {}
for uid in sorted(cal_dict.keys()):
    paths = []
    for file in cal_dict.get(uid):
        path = generate_file_path(cal_directory, file, ext=['.zip','.cap', '.txt', '.log'])
        paths.append(path)
    cal_paths.update({uid: paths})

In [33]:
cal_paths;

**=======================================================================================================================**
# Parsing Calibration Coefficients
Above, we have worked through identifying and mapping the calibration files, pre-deployment files, and post-deployment files to the individual instruments through their UIDs and serial numbers. The next step is to open the relevant files and parse out the calibration coefficients. This will require writing a parser for the NUTNRs, including sub-functions to handle the different characteristics of the ISUS and SUNA instruments.

Start by opening the calibration files and read the data:

In [34]:
class NUTNRCalibration():
    # Class that stores calibration values for CTDs.

    def __init__(self, uid):
        self.serial = None
        self.uid = uid
        self.coefficients = {
            'CC_cal_temp':[],
            'CC_di':[],
            'CC_eno3':[],
            'CC_eswa':[],
            'CC_lower_wavelength_limit_for_spectra_fit':'217',
            'CC_upper_wavelength_limit_for_spectra_fit':'240',
            'CC_wl':[]
        }
        self.date = []
        self.notes = {
            'CC_cal_temp':'',
            'CC_di':'',
            'CC_eno3':'',
            'CC_eswa':'',
            'CC_lower_wavelength_limit_for_spectra_fit':'217',
            'CC_upper_wavelength_limit_for_spectra_fit':'240',
            'CC_wl':''
        }

    @property
    def uid(self):
        return self._uid

    @uid.setter
    def uid(self, d):
        r = re.compile('.{5}-.{6}-.{5}')
        if r.match(d) is not None:
            self._uid = d
        else:
            raise Exception(f"The instrument uid {d} is not a valid uid. Please check.")
            
    def load_cal(self, filepath):
        """
        Wrapper function to load all of the calibration coefficients
        
        Args:
            filepath - path to the directory with filename which has the
                calibration coefficients to be parsed and loaded
        Calls:
            open_cal
            parse_cal
        """
        
        data = self.open_cal(filepath)
        
        self.parse_cal(data)
    
    
    def open_cal(self, filepath):
        """
        Function that opens and reads in cal file
        information for a NUTNR. Zipfiles are acceptable inputs.
        """
        
        if filepath.endswith('.zip'):
            with ZipFile(filepath) as zfile:
                # Check if ISUS or SUNA to get the appropriate name
                filename = [name for name in zfile.namelist()
                            if name.lower().endswith('.cal') and 'z' not in name.lower()]
                
                # Get and open the latest calibration file
                if len(filename) == 1:
                    data = zfile.read(filename[0]).decode('ascii')
                    self.source_file(filepath, filename[0])
                    
                elif len(filename) > 1:
                    filename = [max(filename)]
                    data = zfile.read(filename[0]).decode('ascii')
                    self.source_file(filepath, filename[0])

                else:
                    FileExistsError(f"No .cal file found in {filepath}")
                        
        elif filepath.lower().endswith('.cal'):
            if 'z' not in filepath.lower().split('/')[-1]:
                with open(filepath) as file:
                    data = file.read()
                self.source_file(filepath, file)
        else:
            pass
        
        return data
            
        
    def source_file(self, filepath, filename):
        """
        Routine which parses out the source file and filename
        where the calibration coefficients are sourced from.
        """
        
        if filepath.lower().endswith('.cal'):
            dcn = filepath.split('/')[-2]
            filename = filepath.split('/')[-1]
        else:
            dcn = filepath.split('/')[-1]
        
        self.source = f'Source file: {dcn} > {filename}'
        
    
    def parse_cal(self, data):
        
        for k,line in enumerate(data.splitlines()):
            
            if line.startswith('H'):
                _, info, *ignore = line.split(',')
                
                # The first line of the cal file contains the serial number
                if k == 0:
                    _, sn, *ignore = info.split()
                    if 'SUNA' in info:
                        self.serial = 'NTR-' + sn
                    else:
                        self.serial = sn
                    
                
                # File creation time is when the instrument was calibrated.
                # May be multiple times for different cal coeffs
                if 'file creation time' in info.lower():
                    _, _, _, date, time = info.split()
                    date_time = pd.to_datetime(date + ' ' + time).strftime('%Y%m%d')
                    self.date.append(date_time)
                    
                # The temperature at which it was calibrated
                if 't_cal_swa' in info.lower() or 't_cal' in info.lower():
                    _, cal_temp = info.split()
                    self.coefficients['CC_cal_temp'] = cal_temp
                    
            # Now parse the calibration coefficients
            if line.startswith('E'):
                _, wl, eno3, eswa, _, di = line.split(',')
                
                self.coefficients['CC_wl'].append(wl)
                self.coefficients['CC_di'].append(di)
                self.coefficients['CC_eno3'].append(eno3)
                self.coefficients['CC_eswa'].append(eswa)
                
                
    def write_csv(self, outpath):
        """
        This function writes the correctly named csv file for the ctd to the
        specified directory.

        Args:
            outpath - directory path of where to write the csv file
        Raises:
            ValueError - raised if the CTD object's coefficient dictionary
                has not been populated
        Returns:
            self.to_csv - a csv of the calibration coefficients which is
                written to the specified directory from the outpath.
        """

        # Run a check that the coefficients have actually been loaded
        if len(self.coefficients.values()) <= 2:
            raise ValueError('No calibration coefficients have been loaded.')

        # Create a dataframe to write to the csv
        data = {
            'serial': [self.serial]*len(self.coefficients),
            'name': list(self.coefficients.keys()),
            'value': list(self.coefficients.values())
        }
        df = pd.DataFrame().from_dict(data)

        # Define a function to reformat the notes into an uniform system
        def reformat_notes(x):
            # First, get rid of 
            try:
                np.isnan(x)
                x = ''
            except:
                x = str(x).replace('[','').replace(']','')
            return x
        
        # Now merge the coefficients dataframe with the notes
        if len(self.notes) > 0:
            notes = pd.DataFrame().from_dict({
                'name':list(self.notes.keys()),
                'notes':list(self.notes.values())
            })
            df = df.merge(notes, how='outer', left_on='name', right_on='name')
        else:
            df['notes'] = ''
            
        # Add in the source file
        df['notes'].iloc[0] = df['notes'].iloc[0] + ' ' + self.source
        
        # Sort the data by the coefficient name
        df = df.sort_values(by='name')

        # Generate the csv name
        cal_date = max(self.date)
        csv_name = self.uid + '__' + cal_date + '.csv'

        # Write the dataframe to a csv file
        check = input(f"Write {csv_name} to {outpath}? [y/n]: ")
        # check = 'y'
        if check.lower().strip() == 'y':
            df.to_csv(outpath+'/'+csv_name, index=False)

In [None]:
nutnr = NUTNRCalibration(uid)

In [None]:
nutnr.load_cal(cal_paths[uid][0])

In [None]:
len(nutnr.coefficients.values())

In [None]:
nutnr.serial

Need to add in the source of the calibration file:

In [None]:
ensure_dir('/'.join((os.getcwd(),'temp')))

In [None]:
nutnr.write_csv('/'.join((os.getcwd(),'temp')))

In [None]:
df = pd.read_csv('/home/andrew/Documents/OOI-CGSN/QAQC_Sandbox/Metadata_Review/temp/CGINS-NUTNRB-01086__20180602.csv')

In [None]:
df

**=======================================================================================================================**
# Source Loading of Calibration Coefficients
With a NUTNR Calibration object created, we can now begin parsing the different calibration sources for each NUTNR. We will then compare all of the calibration values from each of the sources, checking for any discrepancies between them.

Below, I plan on going through each of the NUTNR UIDs, and parse the data into csvs. For sources which contain multiple sources, I plan on extracting each of the calibrations to a temporary folder using the following structure:

    <local working directory>/<temp>/<source>/data/<calibration file>
    
The separate calibrations will be saved using the standard UFrame naming convention with the following directory structure:

    <local working directory>/<temp>/<source>/<calibration csv>
    
The csvs themselves will also be copied to the temporary folder. This allows for the program to be looking into the same temp directory for every NUTNR check.

In [35]:
import shutil

In [112]:
uid = uids[37]
print(uid)

CGINS-NUTNRB-01107


In [113]:
temp_directory = '/'.join((os.getcwd(),'temp'))
# Check if the temp directory exists; if it already does, purge and rewrite
if os.path.exists(temp_directory):
    shutil.rmtree(temp_directory)
    ensure_dir(temp_directory)

Copy the existing csvs from asset management to the temp directory:

In [114]:
for path in csv_paths[uid]:
    savedir = '/'.join((temp_directory,'csv'))
    ensure_dir(savedir)
    savepath = '/'.join((savedir, path.split('/')[-1]))
    shutil.copyfile(path, savepath)

In [115]:
os.listdir(temp_directory+'/csv')

['CGINS-NUTNRB-01107__20181011.csv', 'CGINS-NUTNRB-01107__20171128.csv']

**=======================================================================================================================**
Load the calibration coefficients from the vendor calibration source files. Start by extracting or copying them to the source data folder in the temporary directory.

In [116]:
cal_paths[uid]

['/media/andrew/OS/Users/areed/Documents/Project_Files/Records/Instrument_Records/NUTNR/NUTNR_Cal/NUTNR-B_SUNA_SN_NTR-1107_Calibration_Files_2017-11-28.zip',
 '/media/andrew/OS/Users/areed/Documents/Project_Files/Records/Instrument_Records/NUTNR/NUTNR_Cal/NUTNR-B_SUNA_SN_NTR-1107_Calibration_Files_2019-03-13.zip']

Extract the calibration zip files to the local temp directory:

In [117]:
for path in cal_paths[uid]:
    with ZipFile(path) as zfile:
        files = [name for name in zfile.namelist() if name.lower().endswith('.cal') and 'Z' not in name]
        for file in files:
            exdir = path.split('/')[-1].strip('.zip')
            expath = '/'.join((temp_directory,'cal','data',exdir))
            ensure_dir(expath)
            zfile.extract(file,path=expath)

Write the vendor calibration files to csvs following the UFrame convention:

In [118]:
savedir = '/'.join((temp_directory,'cal'))
ensure_dir(savedir)
# Now parse the calibration coefficients
for dirpath, dirnames, filenames in os.walk('/'.join((temp_directory,'cal','data'))):
    for file in filenames:
        filepath = os.path.join(dirpath, file)
        # With the filepath for the given calibration retrived, I can now start an instance of the NUTNR Calibration
        # object and begin parsing the coefficients
        nutnr = NUTNRCalibration(uid)
        nutnr.load_cal(filepath)
        nutnr.write_csv(savedir)

Write CGINS-NUTNRB-01107__20190313.csv to /home/andrew/Documents/OOI-CGSN/QAQC_Sandbox/Metadata_Review/temp/cal? [y/n]: y
Write CGINS-NUTNRB-01107__20171128.csv to /home/andrew/Documents/OOI-CGSN/QAQC_Sandbox/Metadata_Review/temp/cal? [y/n]: y
Write CGINS-NUTNRB-01107__20171129.csv to /home/andrew/Documents/OOI-CGSN/QAQC_Sandbox/Metadata_Review/temp/cal? [y/n]: y


**=======================================================================================================================**
Repeat the above process with the predeployment files:

In [119]:
pre_paths[uid]

['/media/andrew/OS/Users/areed/Documents/Project_Files/Records/Instrument_Records/NUTNR/NUTNR_Results/3305-00327-00049-A.zip']

In [120]:
try:
    for path in pre_paths[uid]:
        with ZipFile(path) as zfile:
            files = [name for name in zfile.namelist() if name.lower().endswith('.cal') and 'Z' not in name]
            for file in files:
                exdir = path.split('/')[-1].strip('.zip')
                expath = '/'.join((temp_directory,'pre','data',exdir))
                ensure_dir(expath)
                zfile.extract(file,path=expath)
    savedir = '/'.join((temp_directory,'pre'))
    ensure_dir(savedir)
    # Now parse the calibration coefficients
    for dirpath, dirnames, filenames in os.walk('/'.join((temp_directory,'pre','data'))):
        for file in filenames:
            filepath = os.path.join(dirpath, file)
            # With the filepath for the given calibration retrived, I can now start an instance of the NUTNR Calibration
            # object and begin parsing the coefficients
            nutnr = NUTNRCalibration(uid)
            nutnr.load_cal(filepath)
            nutnr.write_csv(savedir)
except KeyError:
    pass


Write CGINS-NUTNRB-01107__20171129.csv to /home/andrew/Documents/OOI-CGSN/QAQC_Sandbox/Metadata_Review/temp/pre? [y/n]: y
Write CGINS-NUTNRB-01107__20181011.csv to /home/andrew/Documents/OOI-CGSN/QAQC_Sandbox/Metadata_Review/temp/pre? [y/n]: y


**=======================================================================================================================**
Repeat the above process with the post-deployment files:

In [121]:
post_paths[uid]

['/media/andrew/OS/Users/areed/Documents/Project_Files/Records/Instrument_Records/NUTNR/NUTNR_Results/3305-00527-00033-A.zip']

In [122]:
if post_paths[uid] is not None:
    for path in post_paths[uid]:
        with ZipFile(path) as zfile:
            files = [name for name in zfile.namelist() if name.lower().endswith('.cal') and 'Z' not in name]
            for file in files:
                exdir = path.split('/')[-1].strip('.zip')
                expath = '/'.join((temp_directory,'post','data',exdir))
                ensure_dir(expath)
                zfile.extract(file,path=expath)
    
    savedir = '/'.join((temp_directory,'post'))
    ensure_dir(savedir)
    # Now parse the calibration coefficients
    for dirpath, dirnames, filenames in os.walk('/'.join((temp_directory,'post','data'))):
        for file in filenames:
            filepath = os.path.join(dirpath, file)
            # With the filepath for the given calibration retrived, I can now start an instance of the NUTNR Calibration
            # object and begin parsing the coefficients
            nutnr = NUTNRCalibration(uid)
            nutnr.load_cal(filepath)
            nutnr.write_csv(savedir)

Write CGINS-NUTNRB-01107__20181011.csv to /home/andrew/Documents/OOI-CGSN/QAQC_Sandbox/Metadata_Review/temp/post? [y/n]: y
Write CGINS-NUTNRB-01107__20190215.csv to /home/andrew/Documents/OOI-CGSN/QAQC_Sandbox/Metadata_Review/temp/post? [y/n]: y


**=======================================================================================================================**
# Calibration Coefficient Comparison
We have now successfully parsed the calibration files from all the possible sources: the vendor calibration files, the pre-deployments files, and the post-deployment files. Furthermore, we have saved csvs in the UFrame format for all of these calibrations. Now, we want to load those csvs into pandas dataframes, which allow for easy element-by-element comparison of calibration coefficients.

First, load the names of the files into a pandas dataframe to compare between the different calibration dates. This will allow for checking of which calibrations should match up to the csv currently contained in asset management.

In [123]:
def get_file_date(x):
    x = str(x)
    ind1 = x.index('__')
    ind2 = x.index('.')
    return x[ind1+2:ind2]

In [124]:
# CSV from asset management
csv_files = pd.DataFrame(sorted(os.listdir(temp_directory+'/csv')),columns=['csv'])
csv_files['cal date'] = csv_files['csv'].apply(lambda x: get_file_date(x))
csv_files.set_index('cal date',inplace=True)
csv_files

Unnamed: 0_level_0,csv
cal date,Unnamed: 1_level_1
20171128,CGINS-NUTNRB-01107__20171128.csv
20181011,CGINS-NUTNRB-01107__20181011.csv


In [125]:
# CSV from vendor calibrations
files = sorted([file for file in os.listdir(temp_directory+'/cal') if not os.path.isdir(temp_directory+'/cal/'+file)])
cal_files = pd.DataFrame(files,columns=['cal'])
cal_files['cal date'] = cal_files['cal'].apply(lambda x: get_file_date(x))
cal_files.set_index('cal date',inplace=True)
cal_files

Unnamed: 0_level_0,cal
cal date,Unnamed: 1_level_1
20171128,CGINS-NUTNRB-01107__20171128.csv
20171129,CGINS-NUTNRB-01107__20171129.csv
20190313,CGINS-NUTNRB-01107__20190313.csv


In [126]:
# CSV from pre-deployment calibrations
files = sorted([file for file in os.listdir(temp_directory+'/pre') if not os.path.isdir(temp_directory+'/pre/'+file)])
pre_files = pd.DataFrame(files,columns=['pre'])
pre_files['cal date'] = pre_files['pre'].apply(lambda x: get_file_date(x))
pre_files.set_index('cal date',inplace=True)
pre_files

Unnamed: 0_level_0,pre
cal date,Unnamed: 1_level_1
20171129,CGINS-NUTNRB-01107__20171129.csv
20181011,CGINS-NUTNRB-01107__20181011.csv


In [127]:
# CSV from post-deployment calibrations
files = sorted([file for file in os.listdir(temp_directory+'/post') if not os.path.isdir(temp_directory+'/post/'+file)])
post_files = pd.DataFrame(files,columns=['post'])
post_files['cal date'] = post_files['post'].apply(lambda x: get_file_date(x))
post_files.set_index('cal date',inplace=True)
post_files

Unnamed: 0_level_0,post
cal date,Unnamed: 1_level_1
20181011,CGINS-NUTNRB-01107__20181011.csv
20190215,CGINS-NUTNRB-01107__20190215.csv


In [128]:
# Join the different source file dataframes together for easy visual comparison
df_files = csv_files.join(cal_files,how='outer')
df_files = df_files.join(pre_files,how='outer')
df_files = df_files.join(post_files,how='outer')
df_files = df_files.fillna(value='-999')
df_files

Unnamed: 0_level_0,csv,cal,pre,post
cal date,Unnamed: 1_level_1,Unnamed: 2_level_1,Unnamed: 3_level_1,Unnamed: 4_level_1
20171128,CGINS-NUTNRB-01107__20171128.csv,CGINS-NUTNRB-01107__20171128.csv,-999,-999
20171129,-999,CGINS-NUTNRB-01107__20171129.csv,CGINS-NUTNRB-01107__20171129.csv,-999
20181011,CGINS-NUTNRB-01107__20181011.csv,-999,CGINS-NUTNRB-01107__20181011.csv,CGINS-NUTNRB-01107__20181011.csv
20190215,-999,-999,-999,CGINS-NUTNRB-01107__20190215.csv
20190313,-999,CGINS-NUTNRB-01107__20190313.csv,-999,-999


We can use the above dataframe to assess which files correspond to each other. If any of the csv files need to be renamed, now is the time to go ahead and do so. This will allow for direct comparison.

In [129]:
src = '/'.join((os.getcwd(),'temp','csv','CGINS-NUTNRB-01107__20171128.csv'))
dst = '/'.join((os.getcwd(),'temp','csv','CGINS-NUTNRB-01107__20171129.csv'))
shutil.move(src, dst)

'/home/andrew/Documents/OOI-CGSN/QAQC_Sandbox/Metadata_Review/temp/csv/CGINS-NUTNRB-01107__20171129.csv'

In [130]:
# CSV from asset management
csv_files = pd.DataFrame(sorted(os.listdir(temp_directory+'/csv')),columns=['csv'])
csv_files['cal date'] = csv_files['csv'].apply(lambda x: get_file_date(x))
csv_files.set_index('cal date',inplace=True)
csv_files

Unnamed: 0_level_0,csv
cal date,Unnamed: 1_level_1
20171129,CGINS-NUTNRB-01107__20171129.csv
20181011,CGINS-NUTNRB-01107__20181011.csv


In [131]:
# Join the different source file dataframes together for easy visual comparison
df_files = csv_files.join(cal_files,how='outer')
df_files = df_files.join(pre_files,how='outer')
df_files = df_files.join(post_files,how='outer')
df_files = df_files.fillna(value='-999')
df_files

Unnamed: 0_level_0,csv,cal,pre,post
cal date,Unnamed: 1_level_1,Unnamed: 2_level_1,Unnamed: 3_level_1,Unnamed: 4_level_1
20171128,-999,CGINS-NUTNRB-01107__20171128.csv,-999,-999
20171129,CGINS-NUTNRB-01107__20171129.csv,CGINS-NUTNRB-01107__20171129.csv,CGINS-NUTNRB-01107__20171129.csv,-999
20181011,CGINS-NUTNRB-01107__20181011.csv,-999,CGINS-NUTNRB-01107__20181011.csv,CGINS-NUTNRB-01107__20181011.csv
20190215,-999,-999,-999,CGINS-NUTNRB-01107__20190215.csv
20190313,-999,CGINS-NUTNRB-01107__20190313.csv,-999,-999


Now, we have renamed any csv files to their likely calibration source. Our next step is to do the actual coefficient comparisons.

In [148]:
load_directory = '/'.join((temp_directory,'csv'))
fname = 'CGINS-NUTNRB-01107__20181011.csv'
CSV = pd.read_csv(load_directory+'/'+fname)

In [149]:
CSV

Unnamed: 0,serial,name,value,notes
0,NTR-1107,CC_cal_temp,20,date in filename comes from latest caldate wit...
1,NTR-1107,CC_di,"[-6.75, -2.79166667, -1.95833333, -1.20833333,...",
2,NTR-1107,CC_eno3,"[-0.00567069, -0.00111814, -0.00169506, 0.0046...",
3,NTR-1107,CC_eswa,"[0.00063118, -0.00302337, -0.00054055, -0.0035...",
4,NTR-1107,CC_lower_wavelength_limit_for_spectra_fit,217,
5,NTR-1107,CC_upper_wavelength_limit_for_spectra_fit,240,
6,NTR-1107,CC_wl,"[188.93, 189.74, 190.54, 191.35, 192.15, 192.9...",


In [150]:
load_directory = '/'.join((temp_directory,'pre'))
PRE = pd.read_csv(load_directory+'/'+fname)

In [151]:
PRE

Unnamed: 0,serial,name,value,notes
0,NTR-1107,CC_cal_temp,20.00,Source file: 3305-00327-00049-A > SNA1107D.CAL
1,NTR-1107,CC_di,"['-6.75000000', '-2.79166667', '-1.95833333', ...",
2,NTR-1107,CC_eno3,"['-0.00567069', '-0.00111814', '-0.00169506', ...",
3,NTR-1107,CC_eswa,"['0.00063118', '-0.00302337', '-0.00054055', '...",
4,NTR-1107,CC_lower_wavelength_limit_for_spectra_fit,217,217
5,NTR-1107,CC_upper_wavelength_limit_for_spectra_fit,240,240
6,NTR-1107,CC_wl,"['188.93', '189.74', '190.54', '191.35', '192....",


In [60]:
load_directory = '/'.join((temp_directory,'post'))
POST = pd.read_csv(load_directory+'/'+fname)

FileNotFoundError: [Errno 2] File b'/home/andrew/Documents/OOI-CGSN/QAQC_Sandbox/Metadata_Review/temp/post/CGINS-NUTNRB-01103__20171130.csv' does not exist: b'/home/andrew/Documents/OOI-CGSN/QAQC_Sandbox/Metadata_Review/temp/post/CGINS-NUTNRB-01103__20171130.csv'

In [None]:
POST

In [136]:
load_directory = '/'.join((temp_directory,'cal'))
CAL = pd.read_csv(load_directory+'/'+fname)

In [137]:
CAL

Unnamed: 0,serial,name,value,notes
0,NTR-1107,CC_cal_temp,20.00,Source file: NUTNR-B_SUNA_SN_NTR-1107_Calibra...
1,NTR-1107,CC_di,"['-1.70833333', '-0.75000000', '-1.41666667', ...",
2,NTR-1107,CC_eno3,"['-0.00567069', '-0.00111814', '-0.00169506', ...",
3,NTR-1107,CC_eswa,"['0.00063118', '-0.00302337', '-0.00054055', '...",
4,NTR-1107,CC_lower_wavelength_limit_for_spectra_fit,217,217
5,NTR-1107,CC_upper_wavelength_limit_for_spectra_fit,240,240
6,NTR-1107,CC_wl,"['188.93', '189.74', '190.54', '191.35', '192....",


In [152]:
def reformat_arrays(array):
    # First, need to strip extraneous characters from the array
    array = array.replace("'","").replace('[','').replace(']','')
    # Next, split the array into a list
    array = array.split(',')
    # Now, need to eliminate any white space surrounding the individual coeffs
    array = [num.strip() for num in array]
    # Next, float the nums
    array = [float(num) for num in array]
    # Check if the array is len == 1; if so, can just return the number
    if len(array) == 1:
        array = array[0]
    # Now we are done
    return array

In [153]:
CSV['value'] = CSV['value'].apply(lambda x: reformat_arrays(x))

In [154]:
PRE['value'] = PRE['value'].apply(lambda x: reformat_arrays(x))

In [141]:
CAL['value'] = CAL['value'].apply(lambda x: reformat_arrays(x))

In [None]:
POST['value'] = POST['value'].apply(lambda x: reformat_arrays(x))

In [155]:
CSV

Unnamed: 0,serial,name,value,notes
0,NTR-1107,CC_cal_temp,20,date in filename comes from latest caldate wit...
1,NTR-1107,CC_di,"[-6.75, -2.79166667, -1.95833333, -1.20833333,...",
2,NTR-1107,CC_eno3,"[-0.00567069, -0.00111814, -0.00169506, 0.0046...",
3,NTR-1107,CC_eswa,"[0.00063118, -0.00302337, -0.00054055, -0.0035...",
4,NTR-1107,CC_lower_wavelength_limit_for_spectra_fit,217,
5,NTR-1107,CC_upper_wavelength_limit_for_spectra_fit,240,
6,NTR-1107,CC_wl,"[188.93, 189.74, 190.54, 191.35, 192.15, 192.9...",


In [156]:
PRE

Unnamed: 0,serial,name,value,notes
0,NTR-1107,CC_cal_temp,20,Source file: 3305-00327-00049-A > SNA1107D.CAL
1,NTR-1107,CC_di,"[-6.75, -2.79166667, -1.95833333, -1.20833333,...",
2,NTR-1107,CC_eno3,"[-0.00567069, -0.00111814, -0.00169506, 0.0046...",
3,NTR-1107,CC_eswa,"[0.00063118, -0.00302337, -0.00054055, -0.0035...",
4,NTR-1107,CC_lower_wavelength_limit_for_spectra_fit,217,217
5,NTR-1107,CC_upper_wavelength_limit_for_spectra_fit,240,240
6,NTR-1107,CC_wl,"[188.93, 189.74, 190.54, 191.35, 192.15, 192.9...",


In [None]:
POST

In [144]:
CAL

Unnamed: 0,serial,name,value,notes
0,NTR-1107,CC_cal_temp,20,Source file: NUTNR-B_SUNA_SN_NTR-1107_Calibra...
1,NTR-1107,CC_di,"[-1.70833333, -0.75, -1.41666667, 0.16666667, ...",
2,NTR-1107,CC_eno3,"[-0.00567069, -0.00111814, -0.00169506, 0.0046...",
3,NTR-1107,CC_eswa,"[0.00063118, -0.00302337, -0.00054055, -0.0035...",
4,NTR-1107,CC_lower_wavelength_limit_for_spectra_fit,217,217
5,NTR-1107,CC_upper_wavelength_limit_for_spectra_fit,240,240
6,NTR-1107,CC_wl,"[188.93, 189.74, 190.54, 191.35, 192.15, 192.9...",


In [157]:
print(PRE['notes'].iloc[0])

 Source file: 3305-00327-00049-A > SNA1107D.CAL


In [158]:
np.equal(CSV['value'],PRE['value'])

0    True
1    True
2    True
3    True
4    True
5    True
6    True
Name: value, dtype: bool

In [None]:
np.isclose(CSV['value'].iloc[1],PRE['value'].iloc[1],rtol=1e-8,atol=1e-11)

In [None]:
np.isclose(PRE['value'].iloc[1],CSV['value'].iloc[1],rtol=1e-8,atol=1e-11)

In [None]:
def check_cal_coeffs(coeffs_dict):
    
    # Part 1: coeff by coeff comparison between each source of coefficients
    keys = list(coeffs_dict.keys())
    comparison = {}
    for i in range(len(keys)):
        names = (keys[i], keys[i - (len(keys)-1)])
        check = len(coeffs_dict.get(keys[i])['value']) == len(coeffs_dict.get(keys[i - (len(keys)-1)])['value'])
        if check:
            compare = np.isclose(coeffs_dict.get(keys[i])['value'], coeffs_dict.get(keys[i - (len(keys)-1)])['value'])
            comparison.update({names:compare})
        else:
            pass
        
    # Part 2: now do a logical_and comparison between the results from part 1
    keys = list(comparison.keys())
    i = 0
    mask = comparison.get(keys[i])
    while i < len(keys)-1:
        i = i + 1
        mask = np.logical_and(mask, comparison.get(keys[i]))
        print(i)
       
    return mask 

In [None]:
result = {}
for cal_date in df_files.index:
    # Part 1, load all of the csv files
    coeffs_dict = {}
    for source,fname in df_files.loc[cal_date].items():
        if fname != '-999':
            load_directory = '/'.join((os.getcwd(),'temp',source,fname))
            df_coeffs = pd.read_csv(load_directory)
            for i in list(set(df_coeffs['serial'])):
                print(source + '-' + fname + ': ' + str(i))
            df_coeffs.set_index(keys='name',inplace=True)
            df_coeffs.sort_index(inplace=True)
            coeffs_dict.update({source:df_coeffs})
        else:
            pass
    
    # Part 2, now check the calibration coefficients
    mask = check_cal_coeffs(coeffs_dict)
    
    # Part 3: get the calibration coefficients are wrong
    # and show them
    fname = df_files.loc[cal_date]['csv']
    if fname == '-999':
        incorrect = 'No csv file.'
    else:
        incorrect = coeffs_dict['csv'][mask == False]
    result.update({fname:incorrect})

In [None]:
coeffs_dict.keys()