<!-- Auto table of contents -->
<h1 class='tocIgnore'>AutoDS : Tests de validation</h1>
<p>(module <b>autods</b> d'interface python à MCDS.exe)</p>
<div style="overflow-y: auto">
  <h2 class='tocIgnore'>Table des matières</h2>
  <div id="toc"></div>
</div>

In [None]:
%%javascript
$.getScript('../ipython_notebook_toc.js')

In [None]:
%matplotlib inline

In [None]:
import sys
import os
import importlib as implib

import re

from collections import OrderedDict as odict, namedtuple as ntuple

import math
import numpy as np
import pandas as pd

from tqdm import tqdm

from IPython.display import HTML

import matplotlib.pyplot as plt

import plotly as ply
import plotly.graph_objs as plygo

In [None]:
import autods as ads

In [None]:
# Activate Warnings as Exception
#import warnings
#warnings.filterwarnings('error')

# MCDS : Analyses avec de vraies données

(pour comparaison à des analyses faites à la main avec Distance 7.3)

## 1. Construction des cas tests

In [None]:
# Load refout results table
refFileName = 'ACDC2019-Papyrus-ALAARV-TURMER-resultats-distance-73.xlsx'
dfRefRes = pd.read_excel(os.path.join('AutoDS', 'refout', refFileName))
dfRefRes.rename(columns=dict(Name='Model'), inplace=True)
sampleIdCols = ['Species', 'Periods', 'Precision', 'Duration']
dfRefRes.insert(0, column='Sample', value=dfRefRes.groupby(sampleIdCols, sort=False).ngroup())

In [None]:
dfRefRes.head()

In [None]:
# Generate test cases definition code from refout results file (don't cheat : only input columns :-)
modelIdCols = ['Model', 'LTrunc', 'RTrunc', 'FitDistCuts', 'DiscrDistCuts']
caseIdCols = ['Sample'] + sampleIdCols + modelIdCols
dfAnlysCases = dfRefRes[caseIdCols].copy()

#dfAnlysCases['Status'] = \
#    dfAnlysCases.Status.apply(lambda s: 1 if s == 'OK' else 2 if s == 'Warnings' else 3)
dfAnlysCases['KeyFn'] = \
    dfAnlysCases.Model.apply(lambda s: 'UNIFORM' if s.startswith('Unif') \
                                                 else 'HNORMAL' if s.startswith('Half') else 'HAZARD')
dfAnlysCases['AdjSer'] = \
    dfAnlysCases.Model.apply(lambda s: 'COSINE' if s.find(' Cos') > 0 \
                                                else 'POLY' if s.find(' SimPoly') > 0 else 'HERMITE')
dfAnlysCases['InFileName'] = \
    dfAnlysCases.apply(lambda sRow: 'ACDC2019-Papyrus-{}-{}-{}mn-{}dec-dist.txt' \
                                    .format(sRow.Species,
                                            'AB' if 'A+B' in sRow.Periods else 'A' if 'A' in sRow.Periods else 'B',
                                            5 if '5' in sRow.Duration == '5 mn' else 10,
                                            6 if sRow.Precision.startswith('6 déc') else 1),
                       axis='columns')

dfAnlysCases

## 2. Préparation des analyses

In [None]:
decimalFields = ['Point transect*Survey effort', 'Observation*Radial distance']

In [None]:
# Analysis engine
mcds = ads.MCDSEngine(workDir=os.path.join('AutoDS', 'mcds-out'),
                      distanceUnit='Meter', areaUnit='Hectare',
                      surveyType='Point', distanceType='Radial')

In [None]:
# Frozen analysis parameters (a choice here)
KEstimCriterion = 'AIC'
KCVInterval = 95

In [None]:
# Results object construction
custCols = [('sample', col, 'Value') for col in ['Sample'] + sampleIdCols] \
            + [('model', col, 'Value') for col in modelIdCols]
miCustCols = pd.MultiIndex.from_tuples(custCols)
dfCustColTrans = \
    pd.DataFrame(index=miCustCols,
                 data=dict(en=caseIdCols, 
                           fr=['Echantillon', 'Espèce', 'Périodes', 'Précision', 'Durée',
                               'Modèle', 'TroncGche', 'TroncDrte', 'TranchDistMod', 'TranchDistDiscr']))

results = ads.MCDSResultsSet(miCustomCols=miCustCols, dfCustomColTrans=dfCustColTrans)

In [None]:
computed = False

## 3a. Ou : Exécution des analyses

In [None]:
tsStart = pd.Timestamp.now()
print('Started at', tsStart)
print()

# Run all analyses
lastInFileName = None
for ind, sCase in dfAnlysCases.iterrows():
    
    name = sCase.InFileName[len('ACDC2019-Papyrus')+1:-len('-dist.txt')]
    name += '-' + sCase.Model.lower().translate(str.maketrans({c:'-' for c in ' ,.:;()/'}))
    print('#{:3d}'.format(ind+1), name, sCase.KeyFn, sCase.AdjSer, end='\n'*2)
    
    # Create data set if not already done.
    if lastInFileName != sCase.InFileName:
        ds = ads.DataSet(os.path.join('AutoDS', 'refin', sCase.InFileName), decimalFields=decimalFields)
        lastInFileName = sCase.InFileName
        
    # Run analysis
    def nan2None(v):
        return None if pd.isnull(v) else v
    def distCutsFromSpecs(v):
        if pd.isnull(v):
            return None
        if isinstance(v, int):
            return v
        return [float(x) for x in v.split(',')]

    analysis = ads.MCDSAnalysis(engine=mcds, dataSet=ds, name=name,
                                estimKeyFn=sCase.KeyFn, estimAdjustFn=sCase.AdjSer,
                                estimCriterion=KEstimCriterion, cvInterval=KCVInterval,
                                minDist=nan2None(sCase.LTrunc), maxDist=nan2None(sCase.RTrunc),
                                fitDistCuts=distCutsFromSpecs(sCase.FitDistCuts),
                                discrDistCuts=distCutsFromSpecs(sCase.DiscrDistCuts))
    sResult = analysis.run()

    # Save results
    sHead = pd.Series(data=[sCase[col] for col in sCase.index[:len(caseIdCols)]], index=miCustCols)

    results.append(sResult, sCustomHead=sHead)
    
tsEnd = pd.Timestamp.now()
print('Finished at', tsEnd, ': duration', str(tsEnd - tsStart).replace('0 days ', ''))

computed = True

In [None]:
# Save results in case need for not recomputing them
resFileName = os.path.join(mcds.workDir, 'autods-validation-results.xlsx')

results.toExcel(resFileName, sheetName='AutoDSVal')

In [None]:
# Check translation
dfActTrRes = results.dfTransData('fr')

dfActTrRes.head()

## 3b. Ou : Rechargement des résultats d'analyses

(déjà faites ci-dessus)

In [None]:
if not computed:
    
    resFileName = os.path.join(mcds.workDir, 'autods-validation-results.xlsx')

    results.fromExcel(resFileName, sheetName='AutoDSVal')
    
else:
    
    print('Just computed, not reloading from backup !')
    
print('{} analyses to compare'.format(len(results)))

# Comparaison des résultats à la référence

(référence = analyses faites "à la main" avec distance)

## 1. Extraction des données à comparer

In [None]:
# Analysis results
dfActRes = results.dfData

dfActRes.head()

In [None]:
# Sélection des colonnes des résultats autos et association aux disponibles dans la référence, pour comparaison.
dCompCols = \
{
    ('sample', 'Sample', 'Value'):    'Sample',
    ('sample', 'Species', 'Value'):   'Species',
    ('sample', 'Periods', 'Value'):   'Periods',
    ('sample', 'Precision', 'Value'): 'Precision',
    ('sample', 'Duration', 'Value'):  'Duration',
    
    ('model',  'Model', 'Value'):         'Model',
    ('model',  'LTrunc', 'Value'):        'LTrunc',
    ('model',  'RTrunc', 'Value'):        'RTrunc',
    ('model',  'FitDistCuts', 'Value'):   'FitDistCuts',
    ('model',  'DiscrDistCuts', 'Value'): 'DiscrDistCuts',
    
    ('run output', 'run status', 'Value') : 'Status',
    
    ('detection probability', 'total number of parameters (m)', 'Value'): '# params',
    ('encounter rate', 'number of observations (n)', 'Value'): '# obs',
    
    ('detection probability', 'Delta AIC', 'Value'): 'Delta AIC',
    ('detection probability', 'AIC value', 'Value'): 'AIC',
    ('detection probability', 'chi-square test probability determined', 'Value')               : 'GOF Chi-p',
    ('detection probability', 'Kolmogorov-Smirnov test probability', 'Value')                  : 'GOF K-S p',
    ('detection probability', 'Cramér-von Mises (uniform weighting) test probability', 'Value'): 'GOF CvM (unif) p',
    ('detection probability', 'Cramér-von Mises (cosine weighting) test probability', 'Value') : 'GOF CvM (cos) p',
    
    ('detection probability', 'effective strip width (ESW) or effective detection radius (EDR)', 'Value'): 'ESW/EDR',
    ('detection probability', 'effective strip width (ESW) or effective detection radius (EDR)', 'Lcl')  : 'ESW/EDR LCL',
    ('detection probability', 'effective strip width (ESW) or effective detection radius (EDR)', 'Ucl')  : 'ESW/EDR UCL',
    ('detection probability', 'effective strip width (ESW) or effective detection radius (EDR)', 'Cv')   : 'ESW/EDR CV',
    
    ('density/abundance', 'density of animals', 'Value'): 'D',
    ('density/abundance', 'density of animals', 'Lcl')  : 'D LCL',
    ('density/abundance', 'density of animals', 'Ucl')  : 'D UCL',
    ('density/abundance', 'density of animals', 'Cv')   : 'D CV',
    
    ('detection probability', 'probability of detection (Pw)', 'Value'): 'P',
    ('detection probability', 'probability of detection (Pw)', 'Lcl')  : 'P LCL',
    ('detection probability', 'probability of detection (Pw)', 'Ucl')  : 'P UCL',
    ('detection probability', 'probability of detection (Pw)', 'Cv')   : 'P CV',
    ('detection probability', 'probability of detection (Pw)', 'Df')   : 'P DF',
}
len(dCompCols)

In [None]:
# Sélection des colonnes de résultats, et renommage comme la référence, pour comparaison
dfActRes4c = dfActRes[list(dCompCols.keys())].copy()
dfActRes4c.columns = [dCompCols[col] for col in dCompCols]
dfActRes4c[caseIdCols] = dfActRes4c[caseIdCols].fillna(-1) # For easier comparison
dfActRes4c.set_index(caseIdCols, inplace=True)

dfActRes4c

In [None]:
# Sélection des colonnes utiles de la référence pour comparaison
dfRefRes4c = dfRefRes.copy()
dfRefRes4c[caseIdCols] = dfRefRes4c[caseIdCols].fillna(-1) # For easier comparison
dfRefRes4c.set_index(caseIdCols, inplace=True)
dfRefRes4c.drop(columns=['Run'], inplace=True)

dfRefRes4c

## 2. Diagnostic automatique

In [None]:
# Premières vérifications : égalité des listes de cas tests (index) et des listes de noms de colonnes (columns)
assert sorted(dfActRes4c.index)   == sorted(dfRefRes4c.index)
assert sorted(dfActRes4c.columns) == sorted(dfRefRes4c.columns)

In [None]:
# Actual / reference closeness measure : -round(log10((actual - reference) / max(abs(actual), abs(reference))), 1)
# = Compute the orders of magnitude that separate the difference from the max. of the two values
def closeness(sRefAct):
    
    x, y = sRefAct.to_list()
    
    # Special cases with 1 NaN, or 1 or more inf => all different
    if np.isnan(x):
        if not np.isnan(y):
            return 0 # All different
    elif np.isnan(y):
        return 0 # All different
    
    if np.isinf(x) or np.isinf(y):
        return 0 # All different
    
    # Normal case
    c = abs(x - y)
    if not np.isnan(c) and c != 0:
        c = c / max(abs(x), abs(y))
    
    return round(-np.log10(c), 1)

In [None]:
# Comparaison actual / reference : mesure de proximité
# => Plus c'est grand, plus petite est la différence relative entre les 2
#    Ex: 3 = facteur 10**3 entre différence et valeurs absolues ; +inf = AUCUNE différence
#        0 = pas bon, l'un des 2 est nul n'autre pas du tout
#        inf = égalité parfaite ref/act
# Cf. tests unitaires plus bas.
dfRelDif = dfRefRes4c.copy()
for col in dfRelDif.columns:
    dfRelDif['act'] = dfActRes4c[col]
    dfRelDif[col] = dfRelDif[[col, 'act']].apply(closeness, axis='columns')
    dfRelDif.drop(columns='act', inplace=True)
    
dfRelDif

In [None]:
# Diagnostic : on ne garde que ce qui n'est pas rigoureusement égal (lignes et colonnes).
dfBadRelDif = dfRelDif.copy()
len(dfBadRelDif)

In [None]:
# 1. Suppression lignes : Status identique et reste NaN (cas des status = 0/3/4 : erreur d'exécution, ou pas d'exécution)
valCols = [col for col in dfRelDif.columns if col != 'Status']
dfBadRelDif.drop(dfBadRelDif[(dfBadRelDif.Status.abs() == np.inf) & dfBadRelDif[valCols].isnull().all(axis='columns')].index,
            axis='index', inplace=True)
len(dfBadRelDif)

In [None]:
# 2. Suppression lignes : Status et toutes autres colonnes à inf (stricte égalité)
dfBadRelDif.drop(dfBadRelDif[dfBadRelDif.apply(np.isinf, axis='columns').all(axis='columns')].index,
            axis='index', inplace=True)
len(dfBadRelDif)

In [None]:
# 3. Suppression lignes : Status identique et toutes autres colonnes supérieures à 4 (quasi égalité)
dfBadRelDif.drop(dfBadRelDif[(dfBadRelDif >= 4).all(axis='columns')].index, axis='index', inplace=True)
len(dfBadRelDif)

In [None]:
# 4. Suppression lignes : Status identique et toutes autres colonnes supérieures à 4 (quasi égalité)
#                         sauf colonnes GOF KS et CvM à NaN, non calculées quand on discrétise les distances.
discrCols = [col for col in dfRelDif.columns if not col.startswith('GOF') or col.find('Chi') > 0]
df2Drop = (dfBadRelDif.index.get_level_values('DiscrDistCuts') != -1) & (dfBadRelDif[discrCols] >= 4).all(axis='columns')
dfBadRelDif.drop(dfBadRelDif[df2Drop].index, axis='index', inplace=True)
len(dfBadRelDif)

In [None]:
# Le verdict (Cf. fichier Excel refFileName, feuille "DiffAuto" pour explications des différences Act/Ref)
dfBadRelDif.T

In [None]:
dfRefRes4c.loc[dfBadRelDif.index]

In [None]:
dfActRes4c.loc[dfBadRelDif.index]

In [None]:
nFails = len(dfBadRelDif.index)
if nFails > 0:
    print('Warning: {} test case(s) failed ;'.format(nFails))
    print(' ... see sheet "DiffAuto" of {} for possible explanations.'.format(refFileName))
else:
    print('All test cases succeeded !')

## 3. Sauvegarde des résultats.

In [None]:
resCompFileName = os.path.join(mcds.workDir, 'autods-validation-rescomp.xlsx')

with pd.ExcelWriter(resCompFileName) as xlsxWriter:

    dfRefRes.to_excel(xlsxWriter, sheet_name='RefResults', index=True)
    dfActRes4c.reset_index().to_excel(xlsxWriter, sheet_name='ActResults', index=False)
    dfRelDif.reset_index().to_excel(xlsxWriter, sheet_name='Diff2Ref', index=False)
    dfBadRelDif.reset_index().to_excel(xlsxWriter, sheet_name='BadDiff2Ref', index=False)
    dfRefRes4c.loc[dfBadRelDif.index].reset_index().to_excel(xlsxWriter, sheet_name='RefResWithDiff', index=False)
    dfActRes4c.loc[dfBadRelDif.index].reset_index().to_excel(xlsxWriter, sheet_name='ActResWithDiff', index=False)
    dfActRes.to_excel(xlsxWriter, sheet_name='RawActResults', index=True)

# MCDS : Rapports d'analyses Excel et HTML

In [None]:
# Sélection des colonnes pour les tableaux de synthèse du rapport
synthCols = \
[
    ('sample', 'Species', 'Value'),
    ('sample', 'Periods', 'Value'),
    ('sample', 'Precision', 'Value'),
    ('sample', 'Duration', 'Value'),
    
    ('model', 'Model', 'Value'),
    
    ('run output', 'run status', 'Value'),
    
    ('encounter rate', 'number of observations (n)', 'Value'),
    
    ('detection probability', 'Delta AIC', 'Value'),
    ('detection probability', 'AIC value', 'Value'),
    ('detection probability', 'chi-square test probability determined', 'Value'),
    ('detection probability', 'Kolmogorov-Smirnov test probability', 'Value'),
    ('density/abundance', 'density of animals', 'Cv'),
    
    ('detection probability', 'effective strip width (ESW) or effective detection radius (EDR)', 'Value'),
    ('detection probability', 'effective strip width (ESW) or effective detection radius (EDR)', 'Lcl'),
    ('detection probability', 'effective strip width (ESW) or effective detection radius (EDR)', 'Ucl'),
    
    ('density/abundance', 'density of animals', 'Value'),
    ('density/abundance', 'density of animals', 'Lcl'),
    ('density/abundance', 'density of animals', 'Ucl'),
    
    ('detection probability', 'probability of detection (Pw)', 'Value'),
    ('detection probability', 'probability of detection (Pw)', 'Lcl'),
    ('detection probability', 'probability of detection (Pw)', 'Ucl'),
    ('detection probability', 'probability of detection (Pw)', 'Df'),

    ('sample', 'Sample', 'Value'),
    ('run output', 'run folder', 'Value'),
]

In [None]:
# Specialized results reports class
class SpeMCDSResultsReport(ads.ResultsReport):

    DCustTrans = \
        dict(en={ 'Note: Some figures rounded or converted': 
                     "<strong>Note</strong>: Densities are expressed per square km,"
                     " and most figures have been rounded for readability",
                  'Note: All figures untouched, as output by MCDS': 
                     "<strong>Note</strong>: All values have been left untouched,"
                     " as outuput by MCDS (no rounding, no conversion)" },
             fr={ 'Note: Some figures rounded or converted':
                      "<strong>N.B.</strong> Les densités sont exprimées par km carré, et presque toutes les valeurs"
                      " ont été arrondies pour la lisibilité",
                  'Note: All figures untouched, as output by MCDS':
                      "<strong>N.B.</strong> Aucune valeur n'a été convertie ou arrondie,"
                      " elles sont toutes telles que produites par MCDS" })
    
    def __init__(self, resultsSet, title, subTitle, anlysSubTitle, description, keywords,
                       synthCols=None, lang='en', attachedDir='.', tgtFolder='.', tgtPrefix='results'):
    
        super().__init__(resultsSet, title, subTitle, anlysSubTitle, description, keywords,
                         self.DCustTrans, synthCols, lang, attachedDir, tgtFolder, tgtPrefix)
        
    # Styling colors
    cChrGray = '#869074'
    cBckGreen, cBckGray = '#e0ef8c', '#dae3cb'
    cSclGreen, cSclOrange, cSclRed = '#cbef8c', '#f9da56', '#fe835a'
    cChrInvis = '#e8efd1' # body background
    scaledColors = [cSclGreen, cSclOrange, cSclRed]
    scaledColorsRvd = list(reversed(scaledColors))
    
    dExCodeColors = dict(zip([1, 2, 3], scaledColors))
    
    @classmethod
    def colorExecCodes(cls, sCodes):
        return ['background-color: ' + cls.dExCodeColors.get(c, cls.dExCodeColors[3]) for c in sCodes]
    
    @classmethod
    def scaledColorV(cls, v, thresholds, colors): # len(thresholds) == len(colors) - 1
        if pd.isnull(v):
            return cls.cBckGray
        for ind, thresh in enumerate(thresholds):
            if v > thresh:
                return colors[ind]
        return colors[-1]
    
    def scaledColorS(cls, sValues, thresholds, colors):
        return ['background-color: ' + cls.scaledColorV(v, thresholds, colors) for v in sValues]
    
    # Final formatting of translated data tables, for HTML or SpreadSheet rendering
    # (sort, convert units, round values, and style).
    # Note: Use trEnColNames method to pass from EN-translated columns names to self.lang-ones
    # Return a pd.DataFrame.Styler
    def finalFormatData(self, dfTrData, sort=True, convert=True, round=True, style=True):
        
        # Sorting
        df = dfTrData
        if sort:
            df.sort_values(by=self.trEnColNames(['Sample', 'Delta AIC']), 
                           ascending=[True, True], inplace=True)
        
        # Converting to other units
        kVarDens = 1.0
        if convert:
            for col in self.trEnColNames(['Density', 'Min Density', 'Max Density']): # 'CoefVar Density', 
                df[col] *= 1000000 / 10000 # ha => km2
            kVarDens = 100.0
            df[self.trEnColNames('CoefVar Density')] *= kVarDens # [0, 1] => %
            
        # Reducing float precision
        if round:
            dColDecimals = { **{ col: 2 for col in ['Delta AIC', 'Chi2 P 3', 'KS P', # TODO: which Chi2 ????
                                                    'PDetec', 'Min PDetec', 'Max PDetec'] },
                             **{ col: 1 for col in ['AIC', 'EDR/ESW', 'Min EDR/ESW', 'Max EDR/ESW',
                                                    'Density', 'Min Density', 'Max Density', 'CoefVar Density'] } }
            df = df.round(decimals=self.trEnColNames(dColDecimals))
        
        # Styling
        dfs = df.style
        if style:
            dfs.set_properties(subset=pd.IndexSlice[df[df[self.trEnColNames('Delta AIC')] == 0].index, :],
                               **{'background-color': self.cBckGreen}) \
               .apply(self.colorExecCodes, subset=[self.trEnColNames('ExCod')], axis='columns') \
               .apply(self.scaledColorS, subset=[self.trEnColNames('CoefVar Density')], axis='columns',
                      thresholds=[v * kVarDens for v in [0.3, 0.2]], colors=self.scaledColorsRvd) \
               .apply(self.scaledColorS, subset=[self.trEnColNames('KS P')], axis='columns',
                      thresholds=[0.7, 0.2], colors=self.scaledColors) \
               .apply(self.scaledColorS, subset=[self.trEnColNames('Chi2 P 3')], axis='columns', # TODO: which Chi2 ????
                      thresholds=[0.7, 0.2], colors=self.scaledColors) \
               .set_properties(subset=pd.IndexSlice[df[~df[self.trEnColNames('ExCod')].isin([1, 2])].index, :],
                               **{'color': self.cChrGray}) \
               .where(pd.isnull, 'color: transparent').where(pd.isnull, 'text-shadow: none')
        
        return dfs

In [None]:
report = SpeMCDSResultsReport(resultsSet=results, synthCols=synthCols, title='Validation du module autods',
                              subTitle='Rapport d\'analyse global', anlysSubTitle='Rapport détaillé',
                              description='Qu\'ajouter de plus ?', keywords='autods, validation',
                              lang='fr', attachedDir='.', tgtFolder=mcds.workDir, tgtPrefix='autods-validation-report')

In [None]:
htmlRep = report.toHtml()

HTML(f'Rapport HTML : <a href="{htmlRep}" target="blank">{htmlRep}</a>')

In [None]:
xlsxRep = report.toExcel()

HTML(f'Rapport Excel : <a href="{xlsxRep}" target="blank">{xlsxRep}</a>')

In [None]:
_ = implib.reload(ads)

# Bac à sable

In [None]:
DecimalFields = ['SMP_EFFORT', 'DISTANCE']

ImportFieldAliasREs = \
    odict([('STR_LABEL', ['region', 'zone', 'strate', 'stratum']),
           ('STR_AREA', ['surface', 'area', 'ha', 'km2']),
           ('SMP_LABEL', ['point', 'lieu', 'location']),
           ('SMP_EFFORT', ['effort', 'passages', 'surveys', 'samplings']),
           ('DISTANCE', ['distance'])])

def matchDataFields(srcFields):

    print('Matching required data columns:', end=' ')

    # Try and match required data columns.
    matFields = list()
    matDecFields = list()
    for tgtField in ImportFieldAliasREs:
        print(tgtField, end='=')
        foundTgtField = False
        for srcField in srcFields:
            print(srcField, end=':')
            for pat in ImportFieldAliasREs[tgtField]:
                print(pat, end=';')
                if re.search(pat, srcField, flags=re.IGNORECASE):
                    print(srcField, end=', ')
                    matFields.append(srcField)
                    if tgtField in DecimalFields:
                        matDecFields.append(srcField)
                    foundTgtField = True
                    break
            if foundTgtField:
                break
        if not foundTgtField:
            raise Exception('Error: Failed to find a match for expected {} in dataset columns {}' \
                            .format(tgtField, srcFields))

    # Extra fields.
    extFields = [field for field in srcFields if field not in matFields]

    print('... success.')

    return matFields, matDecFields, extFields

In [None]:
matchDataFields(['Region*Label', 'Region*Area', 'Point transect*Label',
       'Point transect*Survey effort', 'Observation*Radial distance'])

In [None]:
mo = re.search('area', 'Region*Area', flags=re.IGNORECASE)
mo

In [None]:
def safeFloat2Str(val, prec=None, decPt='.'):
    strVal = '' if pd.isnull(val) else str(val) if prec is None \
                else '{:.{prec}f}'.format(val, prec=prec)
    if decPt != '.':
        strVal = strVal.replace('.', decPt)
    return strVal

In [None]:
safeFloat2Str(12.53, prec=None, decPt='.')

In [None]:
safeFloat2Str(12.53, prec=1, decPt='.')

In [None]:
safeFloat2Str(12.53, prec=4, decPt='.')

In [None]:
safeFloat2Str(12.53, prec=None, decPt=',')