<!-- Auto table of contents -->
<h1 class='tocIgnore'>AutoDS : Tests</h1>
<div style="overflow-y: auto">
  <h2 class='tocIgnore'>Table des matières</h2>
  <div id="toc"></div>
</div>

In [None]:
%%javascript
$.getScript('../ipython_notebook_toc.js')

# Mise au point, tests unitaires et d'intégration du module autods

(interface python à MCDS.exe)

In [None]:
%matplotlib inline

In [None]:
import sys
import os
import importlib as implib
from packaging import version

import re

from collections import OrderedDict as odict

import math
import numpy as np
import pandas as pd

from tqdm import tqdm

from IPython.display import HTML

import matplotlib.pyplot as plt

import plotly as ply
import plotly.graph_objs as plygo

In [None]:
# Activate Warnings as Exception
#import warnings
#warnings.filterwarnings('error')

# Communs

In [None]:
# Actual / reference closeness measure : -round(log10((actual - reference) / max(abs(actual), abs(reference))), 1)
# = Compute the orders of magnitude that separate the difference from the max. of the two values
def closeness(sRefAct):
    
    x, y = sRefAct.to_list()
    
    # Special cases with 1 NaN, or 1 or more inf => all different
    if np.isnan(x):
        if not np.isnan(y):
            return 0 # All different
    elif np.isnan(y):
        return 0 # All different
    
    if np.isinf(x) or np.isinf(y):
        return 0 # All different
    
    # Normal case
    c = abs(x - y)
    if not np.isnan(c) and c != 0:
        c = c / max(abs(x), abs(y))
    
    return round(-np.log10(c), 1)

# Tests unitaires et d'intégration module *autods*.

## 0. Détection de Distance

In [None]:
import autods as ads

## 1. Classe DataSet

In [None]:
# Excel source
ds = ads.DataSet(source=os.path.join('AutoDS', 'refin', 'ACDC2019-Papyrus-ALAARV-saisie-ttes-cols.xlsx'),
                 decimalFields=['EFFORT', 'DISTANCE', 'NOMBRE'])
ds.dfData.head()

In [None]:
# CSV source with ',' as decimal point
ds = ads.DataSet(source=os.path.join('AutoDS', 'refin', 'ACDC2019-Papyrus-TURMER-AB-5mn-1dec-dist.txt'),
                 decimalFields=['Point transect*Survey effort', 'Observation*Radial distance'])

assert not any(ds.dfData[col].dropna().apply(lambda v: isinstance(v, str)).any() for col in ds.decimalFields), \
       'Error: Some strings found in declared decimal fields ... any decimal format issue ?'

ds.dfData.head()

In [None]:
# CSV source with '.' as decimal point
ds = ads.DataSet(source=os.path.join('AutoDS', 'refin', 'ACDC2019-Papyrus-ALAARV-AB-10mn-1dotdec-dist.txt'),
                 decimalFields=['Point transect*Survey effort', 'Observation*Radial distance'])

assert not any(ds.dfData[col].dropna().apply(lambda v: isinstance(v, str)).any() for col in ds.decimalFields), \
       'Error: Some strings found in declared decimal fields ... any decimal format issue ?'

ds.dfData.head()

In [None]:
# DataFrame source.
dfData = pd.DataFrame(columns=['Date', 'TrucDec', 'Espece', 'Point', 'Effort', 'Distance'],
                      data=[('2019-05-13', 3.5, 'TURMER', 23, 2,   83),
                            ('2019-05-15', np.nan, 'TURMER', 23, 2,   27.355),
                            ('2019-05-13', 0, 'ALAARV', 29, 2,   56.85),
                            ('2019-04-03', 1.325, 'PRUMOD', 53, 1.3,  7.2),
                            ('2019-06-01', 2, 'PHICOL', 12, 1,  np.nan),
                            ('2019-06-19', np.nan, 'PHICOL', 17, 0.5, np.nan),
                           ])
dfData['Region'] = 'ACDC'
dfData['Surface'] = '2400'
dfData

In [None]:
ds = ads.DataSet(source=dfData, decimalFields=['Effort', 'Distance', 'TrucDec'])
ds.dfData

## 2. Classes XXEngine

### a. Instanciation et chargement des spécifs sur les stats en sortie

In [None]:
try:
    eng = ads.MCDSEngine(workDir=os.path.join('AutoDS', 'test out'))
    print('Error: Should have raised an AssertionError !')
except AssertionError as exc:
    print('Good forbidden chars detection:', exc)

In [None]:
eng = ads.MCDSEngine(workDir=os.path.join('AutoDS', 'mcds-out'))

In [None]:
_ = eng.setupRunFolder(runPrefix='uni') # Unit tests

### b. Génération fichier de données en entrée de MCDS

In [None]:
_ = implib.reload(ads)

In [None]:
dataFileName = eng.buildDataFile(dataSet=ds)

### c. Génération fichier de "commandes"

In [None]:
cmdFileName = eng.buildCmdFile(estimKeyFn='HNORMAL', estimAdjustFn='COSINE',
                               estimCriterion='AIC', cvInterval=95)

### d. Execution en mode "debug"

(génération des fichiers cmd et data, mais pas d'appel à l'exécutable)

In [None]:
runCode, runTime, runDir = eng.run(ds, realRun=False, runPrefix='int',
                                   estimKeyFn='UNIFORM', estimAdjustFn='POLY',
                                   estimCriterion='AIC', cvInterval=95)
assert runCode == 0, 'Should have NOT run (run code = 0)'
dict(runCode=runCode, runDir=runDir, runTime=runTime)

### e. Exécution réelle

In [None]:
runCode, runTime, runDir = eng.run(ds, realRun=True, runPrefix='int',
                                   estimKeyFn='UNIFORM', estimAdjustFn='POLY',
                                   estimCriterion='AIC', cvInterval=95)
assert runCode == 2, 'Should have run with warnings (run code = 2)'
dict(runCode=runCode, runDir=runDir, runTime=runTime)

### f. Génération fichier de données en entrée pour Distance

(mode 'point transect' uniquement pour le moment)

In [None]:
os.makedirs(os.path.join(eng.workDir, 'distance-in'), exist_ok=True)

In [None]:
distDataFileName = \
    eng.buildDistanceDataFile(ds, tgtFilePathName=os.path.join(eng.workDir, 'distance-in', 'import-data-noextra.txt'))

In [None]:
distDataFileName = \
    eng.buildDistanceDataFile(ds, tgtFilePathName=os.path.join(eng.workDir, 'distance-in', 'import-data-withextra.txt'),
                              withExtraFields=True)

### g. classe ResultsSet

In [None]:
miCustCols = pd.MultiIndex.from_tuples([('id', 'index', 'Value'),
                                        ('sample', 'species', 'Value'),
                                        ('sample', 'periods', 'Value'),
                                        ('sample', 'duration', 'Value'),
                                        ('variant', 'precision', 'Value')])
dfCustColTrans = \
    pd.DataFrame(index=miCustCols,
                 data=dict(en=['index', 'species', 'periods', 'duration', 'precision'],
                           fr=['numéro', 'espèce', 'périodes', 'durée', 'précision']))

rs = ads.ResultsSet(analysisClass=ads.MCDSAnalysis, miCustomCols=miCustCols, dfCustomColTrans=dfCustColTrans)

In [None]:
assert rs.dfData.empty

In [None]:
sHead = pd.Series(index=miCustCols, data=list(range(len(miCustCols))))
miResCols = ads.MCDSAnalysis.MIRunColumns.append(ads.MCDSEngine.statModCols())
sResult = pd.Series(index=miResCols, data=list(range(len(miResCols))))
rs.append(sResult, sCustomHead=sHead)

In [None]:
dfRaw = rs.dfData
dfRaw

In [None]:
dfTrans = rs.dfTransData('fr')
dfTrans

In [None]:
assert len(dfRaw.columns) == len(dfTrans.columns)

# Tests de validation module autods

## 1. MCDSEngine : Génération de fichiers d'entrée pour Distance

* via un jeu de fichiers d'entrée bruts Excel, et leur export de référence, éprouvé dans Distance,
* et comparaison du produit de XXEngine.buildDistanceDataFile à cette référence.

In [None]:
dfDistCases = pd.DataFrame([dict(inFileName='ACDC2019-Papyrus-ALAARV-saisie-ttes-cols.xlsx',
                                 decimalFields=['EFFORT', 'DISTANCE', 'NOMBRE'],
                                 refOutFileName='ACDC2019-Papyrus-ALAARV-saisie-5-cols.txt', withExtraFields=False),
                            dict(inFileName='ACDC2019-Papyrus-ALAARV-saisie-ttes-cols.xlsx',
                                 decimalFields=['EFFORT', 'DISTANCE', 'NOMBRE'],
                                 refOutFileName='ACDC2019-Papyrus-ALAARV-saisie-ttes-cols.txt', withExtraFields=True)])
dfDistCases

In [None]:
eng = ads.MCDSEngine(workDir=os.path.join('AutoDS', 'mcds-out'))

In [None]:
fails = 0
for ind, sCase in dfDistCases.iterrows():
    
    print('#', ind, ':', sCase.inFileName)

    # Create data set
    ds = ads.DataSet(source=os.path.join('AutoDS', 'refin', sCase.inFileName),
                     decimalFields=sCase.decimalFields)
    
    # Build distance import data file
    ofn = os.path.join(eng.workDir, 'distance-in', sCase.refOutFileName)
    ofn = eng.buildDistanceDataFile(dataSet=ds, tgtFilePathName=ofn, withExtraFields=sCase.withExtraFields)
    
    # Compare generated file to reference
    rfn = os.path.join('AutoDS', 'refout', sCase.refOutFileName)
    with open(ofn, 'r') as fOut, open(rfn, 'r') as fRef:
        if fOut.read() == fRef.read():
            print('Success : Conform to reference.')
        else:
            print('Error: Generated file differs from reference', rfn)
            fails += 1
            
    print()
    
print('All test cases succeeded !' if fails == 0 else 'Error: {} test case(s) failed.'.format(fails))

## 2. MCDSEngine : Exécution avec de vraies données

### A virer : tests MCDSAnalysis ci-dessous englobants 

In [None]:
ds = ads.DataSet(source=os.path.join('AutoDS', 'refin', 'ACDC2019-Papyrus-ALAARV-saisie-ttes-cols.xlsx'),
                 decimalFields=['EFFORT', 'DISTANCE', 'NOMBRE'])

eng = ads.MCDSEngine(workDir=os.path.join('AutoDS', 'mcds-out'))

runCode, runTime, runDir = eng.run(ds, realRun=True, runPrefix='int',
                                   estimKeyFn='UNIFORM', estimAdjustFn='POLY',
                                   estimCriterion='AIC', cvInterval=95)
assert runCode == 2, 'Should have run with warnings (run code = 2)'
dict(runCode=runCode, runDir=runDir, runTime=runTime)

## 3. MCDSAnalysis : Analyse avec de vraies données

(et comparaison à des analyses faites à la main avec Distance 7.3)

### a. Construction des cas tests

In [None]:
# Load refout results table
dfRefRes = pd.read_excel(os.path.join('AutoDS', 'refout', 'ACDC2019-Papyrus-ALAARV-TURMER-resultats-distance-73.xlsx'))
dfRefRes.rename(columns=dict(Name='Model'), inplace=True)

In [None]:
dfRefRes.head()

In [None]:
# Generate test cases definition code from refout results file (don't cheat : only input columns :-)
caseIdCols = ['Species', 'Sample', 'Precision', 'Duration', 'Model']
dfAnlysCases = dfRefRes[caseIdCols].copy()

#dfAnlysCases['Status'] = \
#    dfAnlysCases.Status.apply(lambda s: 1 if s == 'OK' else 2 if s == 'Warnings' else 3)
dfAnlysCases['KeyFn'] = \
    dfAnlysCases.Model.apply(lambda s: 'UNIFORM' if s.startswith('Unif') \
                                                 else 'HNORMAL' if s.startswith('Half') else 'HAZARD')
dfAnlysCases['AdjSer'] = \
    dfAnlysCases.Model.apply(lambda s: 'COSINE' if s.endswith('Cos') \
                                                else 'POLY' if s.endswith('SimPoly') else 'HERMITE')
dfAnlysCases['InFileName'] = \
    dfAnlysCases.apply(lambda sRow: 'ACDC2019-Papyrus-{}-{}-{}mn-{}dec-dist.txt' \
                                    .format(sRow.Species,
                                            'AB' if 'A+B' in sRow.Sample else 'A' if 'A' in sRow.Sample else 'B',
                                            5 if '5' in sRow.Duration == '5 mn' else 10,
                                            6 if sRow.Precision.startswith('6 déc') else 1),
                       axis='columns')

dfAnlysCases

### b. Préparation des analyses

In [None]:
decimalFields = ['Point transect*Survey effort', 'Observation*Radial distance']

In [None]:
# Analysis engine
mcds = ads.MCDSEngine(workDir=os.path.join('AutoDS', 'mcds-out'),
                      distanceUnit='Meter', areaUnit='Hectare',
                      surveyType='Point', distanceType='Radial')

In [None]:
# Frozen analysis parameters (a choice here)
KEstimCriterion = 'AIC'
KCVInterval = 95

In [None]:
# Result object construction
miCustCols = pd.MultiIndex.from_tuples([('sample', col, 'Value') for col in caseIdCols])
dfCustColTrans = \
    pd.DataFrame(index=miCustCols,
                 data=dict(en=caseIdCols, fr=['Espèce', 'Echantillon', 'Précision', 'Durée', 'Modèle']))

results = ads.ResultsSet(analysisClass=ads.MCDSAnalysis, miCustomCols=miCustCols, dfCustomColTrans=dfCustColTrans)

### c. Ou : Exécution des analyses

In [None]:
tsStart = pd.Timestamp.now()
print('Started at', tsStart)
print()

# Run all analyses
lastInFileName = ''
for ind, sCase in dfAnlysCases.iterrows():
    
    prefix = sCase.InFileName[len('ACDC2019-Papyrus')+1:-len('-dist.txt')]
    print('#{:3d}'.format(ind+1), prefix, sCase.KeyFn, sCase.AdjSer, end='\n'*2)
    
    # Create data set if not already done.
    if lastInFileName != sCase.InFileName:
        ds = ads.DataSet(os.path.join('AutoDS', 'refin', sCase.InFileName), decimalFields=decimalFields)
        lastInFileName = sCase.InFileName
        
    # Run analysis
    analysis = ads.MCDSAnalysis(engine=mcds, dataSet=ds, namePrefix=prefix,
                                estimKeyFn=sCase.KeyFn, estimAdjustFn=sCase.AdjSer,
                                estimCriterion=KEstimCriterion, cvInterval=KCVInterval)
    sResult = analysis.run()

    # Save results
    sHead = pd.Series(data=[sCase[col] for col in sCase.index[:len(caseIdCols)]], index=miCustCols)

    results.append(sResult, sCustomHead=sHead)
    
tsEnd = pd.Timestamp.now()
print('Finished at', tsEnd, ': duration', str(tsEnd - tsStart).replace('0 days ', ''))

In [None]:
# Analysis results
dfActRes = results.dfData

dfActRes.head()

In [None]:
# Check translation
dfActTrRes = results.dfTransData('fr')

dfActTrRes.head()

In [None]:
# Save results in case need for not recomputing them
resFileName = os.path.join(mcds.workDir, 'autods-validation-results.xlsx')

results.toExcel(resFileName, sheetName='AutoDSVal')

### c. Ou : Rechargement des résultats d'analyses

(déjà faites ci-dessus)

In [None]:
resFileName = os.path.join(mcds.workDir, 'autods-validation-results.xlsx')

results.fromExcel(resFileName, sheetName='AutoDSVal')

### d. Comparaison des résultats à la référence

(référence = analyses faites "à la main" avec distance)

In [None]:
# Sélection des colonnes des résultats autos et association aux disponibles dans la référence, pour comparaison.
dCompCols = \
{
    ('sample', 'Species', 'Value'):   'Species',
    ('sample', 'Sample', 'Value'):    'Sample',
    ('sample', 'Precision', 'Value'): 'Precision',
    ('sample', 'Duration', 'Value'):  'Duration',
    ('sample', 'Model', 'Value'):     'Model',
    
    ('run output', 'run status', 'Value') : 'Status',
    
    ('detection probability', 'total number of parameters (m)', 'Value'): '# params',
    ('encounter rate', 'number of observations (n)', 'Value'): '# obs',
    
    ('detection probability', 'AIC value', 'Value'): 'AIC',
    ('detection probability', 'chi-square test probability (distance set 3)', 'Value')         : 'GOF Chi-p',
    ('detection probability', 'Kolmogorov-Smirnov test probability', 'Value')                  : 'GOF K-S p',
    ('detection probability', 'Cramér-von Mises (uniform weighting) test probability', 'Value'): 'GOF CvM (unif) p',
    ('detection probability', 'Cramér-von Mises (cosine weighting) test probability', 'Value') : 'GOF CvM (cos) p',
    
    ('detection probability', 'effective strip width (ESW) or effective detection radius (EDR)', 'Value'): 'ESW/EDR',
    ('detection probability', 'effective strip width (ESW) or effective detection radius (EDR)', 'Lcl')  : 'ESW/EDR LCL',
    ('detection probability', 'effective strip width (ESW) or effective detection radius (EDR)', 'Ucl')  : 'ESW/EDR UCL',
    ('detection probability', 'effective strip width (ESW) or effective detection radius (EDR)', 'Cv')   : 'ESW/EDR CV',
    
    ('density/abundance', 'density of animals', 'Value'): 'D',
    ('density/abundance', 'density of animals', 'Lcl')  : 'D LCL',
    ('density/abundance', 'density of animals', 'Ucl')  : 'D UCL',
    ('density/abundance', 'density of animals', 'Cv')   : 'D CV',
    
    ('detection probability', 'probability of detection (Pw)', 'Value'): 'P',
    ('detection probability', 'probability of detection (Pw)', 'Lcl')  : 'P LCL',
    ('detection probability', 'probability of detection (Pw)', 'Ucl')  : 'P UCL',
    ('detection probability', 'probability of detection (Pw)', 'Cv')   : 'P CV',
    ('detection probability', 'probability of detection (Pw)', 'Df')   : 'P DF',
}
len(dCompCols)

In [None]:
# Sélection des colonnes de résultats, et renommage comme la référence, pour comparaison
dfActRes4c = dfActRes[list(dCompCols.keys())].copy()
dfActRes4c.columns = [dCompCols[col] for col in dCompCols]
dfActRes4c.set_index(caseIdCols, inplace=True)

dfActRes4c

In [None]:
# Sélection des colonnes utiles de la référence pour comparaison
dfRefRes4c = dfRefRes.copy()
dfRefRes4c.set_index(caseIdCols, inplace=True)
dfRefRes4c.drop(columns=['Run', 'Delta AIC'], inplace=True)

dfRefRes4c

In [None]:
# Premières vérifications : égalité des listes de cas tests (index) et des listes de noms de colonnes (columns)
assert sorted(dfActRes4c.index)   == sorted(dfRefRes4c.index)
assert sorted(dfActRes4c.columns) == sorted(dfRefRes4c.columns)

In [None]:
# Comparaison actual / reference : mesure de proximité
# => Plus c'est grand, plus petite est la différence relative entre les 2
#    Ex: 3 = facteur 10**3 entre différence et valeurs absolues ; +inf = AUCUNE différence
#        0 = pas bon, l'un des 2 est nul n'autre pas du tout
#        inf = égalité parfaite ref/act
# Cf. tests unitaires plus bas.
dfRelDif = dfRefRes4c.copy()
for col in dfRelDif.columns:
    dfRelDif['act'] = dfActRes4c[col]
    dfRelDif[col] = dfRelDif[[col, 'act']].apply(closeness, axis='columns')
    dfRelDif.drop(columns='act', inplace=True)
    
dfRelDif

### e. Sauvegarde des résultats.

In [None]:
resCompFileName = os.path.join(mcds.workDir, 'autods-validation-rescomp.xlsx')

with pd.ExcelWriter(resCompFileName) as xlsxWriter:

    dfRefRes.to_excel(xlsxWriter, sheet_name='RefResults', index=True)
    dfActRes4c.reset_index().to_excel(xlsxWriter, sheet_name='ActResults', index=True)
    dfRelDif.reset_index().to_excel(xlsxWriter, sheet_name='Diff2Ref', index=True)
    dfActRes.to_excel(xlsxWriter, sheet_name='RawActResults', index=True)

### f. Diagnostic automatique

In [None]:
# Diagnostic : on ne garde que ce qui n'est pas rigoureusement égal (lignes et colonnes).
dfBadRelDif = dfRelDif.copy()
len(dfBadRelDif)

In [None]:
# 1. Suppression lignes : Status identique et reste NaN (cas des status = 0/3/4 : erreur d'exécution ou pas d'exécution)
valCols = [col for col in dfRelDif.columns if col != 'Status']
dfBadRelDif.drop(dfBadRelDif[(dfBadRelDif.Status.abs() == np.inf) & dfBadRelDif[valCols].isnull().all(axis='columns')].index,
            axis='index', inplace=True)
len(dfBadRelDif)

In [None]:
# 2. Suppression lignes : Status et toutes autres colonnes à inf (stricte égalité)
dfBadRelDif.drop(dfBadRelDif[dfBadRelDif.apply(np.isinf, axis='columns').all(axis='columns')].index,
            axis='index', inplace=True)
len(dfBadRelDif)

In [None]:
# 3. Suppression lignes : Status identique et toutes autres colonnes suppérieures à 4 (quasi égalité)
dfBadRelDif.drop(dfBadRelDif[(dfBadRelDif >= 4).all(axis='columns')].index,
            axis='index', inplace=True)
len(dfBadRelDif)

In [None]:
dfBadRelDif

In [None]:
dfRefRes4c.loc[dfBadRelDif.index]

In [None]:
dfActRes4c.loc[dfBadRelDif.index]

In [None]:
#print('All test cases succeeded !' if fails == 0 else 'Error: {} test case(s) failed.'.format(fails))

## 4. MCDSAnalysis : Rapport d'analyses Excel et HTML

In [None]:
# Sélection des colonnes pour les tableaux de synthèse du rapport
synthCols = \
[
    ('sample', 'Species', 'Value'),
    ('sample', 'Sample', 'Value'),
    ('sample', 'Precision', 'Value'),
    ('sample', 'Duration', 'Value'),
    ('sample', 'Model', 'Value'),
    
    ('run output', 'run status', 'Value'),
    
    ('encounter rate', 'number of observations (n)', 'Value'),
    
    ('detection probability', 'AIC value', 'Value'),
    ('detection probability', 'chi-square test probability (distance set 3)', 'Value'),
    ('detection probability', 'Kolmogorov-Smirnov test probability', 'Value'),
    
    ('detection probability', 'effective strip width (ESW) or effective detection radius (EDR)', 'Value'),
    ('detection probability', 'effective strip width (ESW) or effective detection radius (EDR)', 'Lcl'),
    ('detection probability', 'effective strip width (ESW) or effective detection radius (EDR)', 'Ucl'),
    
    ('density/abundance', 'density of animals', 'Value'),
    ('density/abundance', 'density of animals', 'Lcl'),
    ('density/abundance', 'density of animals', 'Ucl'),
    ('density/abundance', 'density of animals', 'Cv'),
    
    ('detection probability', 'probability of detection (Pw)', 'Value'),
    ('detection probability', 'probability of detection (Pw)', 'Lcl'),
    ('detection probability', 'probability of detection (Pw)', 'Ucl'),
    ('detection probability', 'probability of detection (Pw)', 'Df'),

    ('run output', 'run folder', 'Value'),
]

In [None]:
_ = implib.reload(ads)

In [None]:
report = ads.ResultsReport(resultsSet=results, synthCols=synthCols, title='Validation du module autods',
                           subTitle='Rapport d\'analyse global', anlysSubTitle='Rapport détaillé',
                           description='Qu\'ajouter de plus ?', keywords='autods, validation',
                           lang='fr', attachedDir='.', tgtFolder=mcds.workDir, tgtPrefix='autods-validation-report')

In [None]:
htmlRep = report.toHtml()

HTML(f'Rapport HTML : <a href="{htmlRep}" target="blank">{htmlRep}</a>')

In [None]:
xlsxRep = report.toExcel()

HTML(f'Rapport Excel : <a href="{xlsxRep}" target="blank">{xlsxRep}</a>')

# Decode MCDS plots file

In [None]:
srcFileName = os.path.join('AutoDS', 'mcds-out', 'TURMER-AB-10mn-1dec-hno-cos-01qn02hg', 'plots.txt')

In [None]:
lines = open(srcFileName, 'r').readlines()
lines = [line.strip() for line in lines]
len(lines)

In [None]:
lines[:10]

In [None]:
itLines = iter(lines)
chapters = list()
for title in itLines:
    #title = next(itLines)
    subTitle = next(itLines)
    xLabel = next(itLines)
    yLabel = next(itLines)
    xMin, xMax, yMin, yMax = [float(s) for s in next(itLines).split()]
    nDataRows = int(next(itLines))
    dataRows = list()
    for l in range(nDataRows):
        dataRows.append([float(s) for s in next(itLines).split()])
    chapters.append(dict(title=title, subTitle=subTitle, dataRows=dataRows, #nDataRows=nDataRows,
                         xLabel=xLabel, yLabel=yLabel, xMin=xMin, xMax=xMax, yMin=yMin, yMax=yMax))
len(chapters), chapters[0]

In [None]:
## QQ-plot
chapter = chapters[0]
chapter

In [None]:
n = len(chapter['dataRows'])
dfQqData = pd.DataFrame(data=chapter['dataRows'], columns=['If the fit was perfect ...', 'Real observations'],
                        index=np.linspace(0.5/n, 1.0-0.5/n, n))
dfQqData

In [None]:
axes = dfQqData.plot(figsize=(16, 6), color=['blue', 'red'], grid=True,
                     xlim=(chapter['xMin'], chapter['xMax']), ylim=(chapter['yMin'], chapter['yMax']))
axes.legend(['If the fit was perfect ...', 'Real observations'], fontsize=12)
axes.set_facecolor('#f9fbf3')
axes.figure.patch.set_facecolor('#f9fbf3')
axes.set_title(label=chapter['title'] + ' : ' + chapter['subTitle'], fontdict=dict(fontsize=16), pad=20)
axes.set_xlabel(chapter['xLabel'], fontsize=12)
_ = axes.set_ylabel(chapter['yLabel'], fontsize=12)

In [None]:
axes.figure.savefig('tmp/mlb-qqplot.jpg', box_inches='tight')
axes.figure.savefig('tmp/mlb-qqplot.png', box_inches='tight', transparent=True)

In [None]:
plt.close(axes.figure)

In [None]:
# Plotly 4
fig = plygo.Figure()

fig.add_trace(plygo.Scatter(x=dfQqData.index, y=dfQqData['If the fit was perfect ...'],
                            name='If the fit was perfect ...', line=dict(color='blue', width=2), opacity=0.7))
fig.add_trace(plygo.Scatter(x=dfQqData.index, y=dfQqData['Real observations'],
                            name='Real observations', line=dict(color='red', width=2)))

fig.update_layout(title=chapter['title'] + ' : ' + chapter['subTitle'],
                  xaxis=dict(title=chapter['xLabel'], range=(chapter['xMin'], chapter['xMax']),
                             zeroline=True, linewidth=1, linecolor='black'),
                  yaxis=dict(title=chapter['yLabel'], range=(chapter['yMin'], chapter['yMax']),
                             zeroline=True, linewidth=1, linecolor='black'),
                  legend=plygo.layout.Legend(x=0.09, y=0.90, bordercolor='black', borderwidth=1),
                  shapes=[plygo.layout.Shape(type='line', x0=chapter['xMax'], y0=chapter['yMin'],
                                                          x1=chapter['xMax'], y1=chapter['yMax']),
                          plygo.layout.Shape(type='line', x0=chapter['xMin'], y0=chapter['yMax'],
                                                          x1=chapter['xMax'], y1=chapter['yMax'])],
                  template='none')

fig.show()

In [None]:
# Wow ... VERY slooooooooow !
fig.write_image("tmp/ply-qqplot.svg")
fig.write_image("tmp/ply-qqplot.png")

In [None]:
# Detection probability
chapter = chapters[1]
chapter

In [None]:
dfDetProbData = pd.DataFrame(data=chapter['dataRows'], 
                             columns=[chapter['xLabel'], chapter['yLabel'] + ' (sampled)', chapter['yLabel'] + ' (fitted)'])
dfDetProbData.set_index(chapter['xLabel'], inplace=True)
dfDetProbData

In [None]:
axes = dfDetProbData.plot(figsize=(16, 6), color=['blue', 'red'], grid=True,
                          xlim=(chapter['xMin'], chapter['xMax']), ylim=(chapter['yMin'], chapter['yMax']))

axes.set_title(label=chapter['title'] + ' : ' + chapter['subTitle'], fontdict=dict(fontsize=16), pad=20)
axes.legend(dfDetProbData.columns, fontsize=12)
axes.set_xlabel(chapter['xLabel'], fontsize=12)
_ = axes.set_ylabel(chapter['yLabel'], fontsize=12)

In [None]:
# Plotly 4
fig = plygo.Figure()

fig.add_trace(plygo.Scatter(x=dfDetProbData.index, y=dfDetProbData[chapter['yLabel'] + ' (sampled)'],
                            name=chapter['yLabel'] + ' (sampled)', line=dict(color='blue', width=2), opacity=0.7))
fig.add_trace(plygo.Scatter(x=dfDetProbData.index, y=dfDetProbData[chapter['yLabel'] + ' (fitted)'],
                            name=chapter['yLabel'] + ' (fitted)', line=dict(color='red', width=2)))

fig.update_layout(title=chapter['title'] + ' : ' + chapter['subTitle'],
                  xaxis=dict(title=chapter['xLabel'], range=(chapter['xMin'], chapter['xMax']),
                             zeroline=True, linewidth=1, linecolor='black'),
                  yaxis=dict(title=chapter['yLabel'], range=(chapter['yMin'], chapter['yMax']),
                             zeroline=True, linewidth=1, linecolor='black'),
                  legend=plygo.layout.Legend(x=0.65, y=0.85*chapter['yMax'], bordercolor='black', borderwidth=1),
                  shapes=[plygo.layout.Shape(type='line', x0=chapter['xMax'], y0=chapter['yMin'],
                                                          x1=chapter['xMax'], y1=chapter['yMax']),
                          plygo.layout.Shape(type='line', x0=chapter['xMin'], y0=chapter['yMax'],
                                                          x1=chapter['xMax'], y1=chapter['yMax'])],
                  template='none')

fig.show()

In [None]:
# Detection probability
chapter = chapters[2]
chapter

In [None]:
dfProdDensData = pd.DataFrame(data=chapter['dataRows'], 
                              columns=[chapter['xLabel'], chapter['yLabel'] + ' (sampled)', chapter['yLabel'] + ' (fitted)'])
dfProdDensData.set_index(chapter['xLabel'], inplace=True)
dfProdDensData

In [None]:
axes = dfProdDensData.plot(figsize=(16, 6), color=['blue', 'red'],
                           xlim=(chapter['xMin'], chapter['xMax']), ylim=(chapter['yMin'], chapter['yMax']))
axes.set_title(label=chapter['title'] + ' : ' + chapter['subTitle'], fontdict=dict(fontsize=16), pad=20)
axes.legend(dfProdDensData.columns, fontsize=12)
axes.set_xlabel(chapter['xLabel'], fontsize=12)
_ = axes.set_ylabel(chapter['yLabel'], fontsize=12)

In [None]:
# Plotly 4
fig = plygo.Figure()

fig.add_trace(plygo.Scatter(x=dfProdDensData.index, y=dfProdDensData[chapter['yLabel'] + ' (sampled)'],
                            name=chapter['yLabel'] + ' (sampled)', line=dict(color='blue', width=2), opacity=0.7))
fig.add_trace(plygo.Scatter(x=dfProdDensData.index, y=dfProdDensData[chapter['yLabel'] + ' (fitted)'],
                            name=chapter['yLabel'] + ' (fitted)', line=dict(color='red', width=2)))

fig.update_layout(title=chapter['title'] + ' : ' + chapter['subTitle'],
                  xaxis=dict(title=chapter['xLabel'], range=(chapter['xMin'], chapter['xMax']),
                             zeroline=True, linewidth=1, linecolor='black'),
                  yaxis=dict(title=chapter['yLabel'], range=(chapter['yMin'], chapter['yMax']),
                             zeroline=True, linewidth=1, linecolor='black'),
                  legend=plygo.layout.Legend(xanchor='right', yanchor='top', bordercolor='black', borderwidth=1),
                  #margin=plygo.layout.Margin(l=40, r=40, b=40, t=40, pad=0),
                  shapes=[plygo.layout.Shape(type='line', x0=chapter['xMax'], y0=chapter['yMin'],
                                                          x1=chapter['xMax'], y1=chapter['yMax']),
                          plygo.layout.Shape(type='line', x0=chapter['xMin'], y0=chapter['yMax'],
                                                          x1=chapter['xMax'], y1=chapter['yMax'])],
                  template='none')

fig.show()

# Extract results from MCDS work folders

In [None]:
_ = implib.reload(ads)

In [None]:
# Results set to store results into.
miCustCols = pd.MultiIndex.from_tuples([('id', 'ExecCase', 'Value')])
dfCustColTrans = \
    pd.DataFrame(index=miCustCols, data=dict(en=['ExecCase'], fr=['CasExec']))

results = ads.ResultsSet(analysisClass=ads.MCDSAnalysis, miCustomCols=miCustCols, dfCustomColTrans=dfCustColTrans)

In [None]:
# Analysis engine
mcds = ads.MCDSEngine(workDir=os.path.join('AutoDS', 'dist-order-sens'),
                      distanceUnit='Meter', areaUnit='Hectare',
                      surveyType='Point', distanceType='Radial')

In [None]:
# Process folders in engine work folder.
for folder in os.listdir(mcds.workDir):
    
    # Skip folders that are not MCDS run ones.
    folderPath = os.path.join(mcds.workDir, folder)
    if not os.path.isdir(folderPath):
        continue
    if os.path.splitext(folder)[1] or 'stats.txt' not in os.listdir(folderPath):
        print(f'Skipping {folderPath}, not an MCDS.exe run folder with a stats.txt file')
        continue
        
    # Tell the engine were it has run (even it does not rember it ;-)
    _ = mcds.setupRunFolder(forceSubFolder=folder)
    
    # Decode results.
    sRes = mcds.decodeStats()
    print()
    
    # Store them for later.
    sHead = pd.Series(data=[folder], index=miCustCols)
    results.append(sRes, sCustomHead=sHead)

# Tadaaaaaaa !
results.dfTransData('fr')

In [None]:
results.dfTransData('en').to_excel(mcds.workDir + '.auto.xlsx', index=False)

# Unitary tests for reference / actual results comparison

In [None]:
values = [np.nan, -np.inf, -1.0e12, -1.0e5, -1.0-1e-5, -1.0, -1.0+1e-5, -1.0e-8, 0.0, 1.0e-8, 1.0, 1.0e5, 1.0e12, np.inf]

In [None]:
aClose = np.ndarray(shape=(len(values), len(values)))
for r in range(len(values)):
    for c in range(len(values)):
        try:
            aClose[r, c] = closeness(pd.Series([values[r], values[c]]))
        except Exception as exc:
            print(exc, r, c, values[r], values[c])
pd.DataFrame(data=aClose, index=values, columns=values)

In [None]:
whereClose

In [None]:
# Proximité infinie sur la diagonale (sauf pour nan et +/-inf)
assert all(np.isnan(values[i]) or np.isinf(values[i]) or np.isinf(aClose[i, i]) for i in range(len(values))), \
       'Error: Inequality on the diagonal'

# Pas de proximité infinie ailleurs
assert all(r == c or not np.isinf(aClose[r, c]) for r in range(len(values)) for c in range(len(values))), \
       'Error: No equality should be found outside the diagonal'

# Bonne proximité uniquement autour de -1
whereClose = [i for i in range(len(values)) if abs(values[i] + 1) <= 1.0e-5]
assert all(aClose[r, c] > 4 for r in whereClose for c in whereClose), 'Error: Unexpectedly bad closeness around -1'

In [None]:
# Ancienne méthode qui ne marche pas.
# Comparaison actual / reference : -round(log10((actual - reference) / max(abs(actual), abs(reference))), 1)
# => Plus c'est grand, plus petite est la différence relative entre les 2
#    Ex: 3 = facteur 10**3 entre différence et valeurs absolues ; +inf = AUCUNE différence
#        0 = pas bon, l'un des 2 est nul n'autre pas du tout
# Cf. tests unitaires plus bas.
#dfRelDif = pd.DataFrame(index=dfRefRes4c.index)
#for col in dfRefRes4c.columns:
#    dfRelDif['NormalCases'] = ~((dfActRes4c[col].isnull() & dfRefRes4c.notnull()) \
#                                | (dfActRes4c[col].notnull() & dfRefRes4c.isnull()) \
#                                | dfActRes4c[col].notnull() | dfRefRes4c.isnull())
#    dfRelDif[col] = abs(dfActRes4c[col] - dfRefRes4c[col])
#    dfRelDif[col].where(dfRelDif[col].isnull() | dfRelDif[col] == 0,
#                        dfRelDif[col] / pd.DataFrame(dict(act=dfActRes4c[col], ref=dfRefRes4c[col])).abs().max(axis='columns'),
#                        inplace=True)
#    dfRelDif[col].where(dfRelDif['NormalCases'], 1, inplace=True) # Force special case to "all different"
#    dfRelDif.drop(columns=['NormalCases'], inplace=True)
#    dfRelDif[col] = np.round(-np.log10(dfRelDif[col]), 1)
#    
#dfRelDif

# Generate stats columns translation file

(from documentation stats & modules specs)

In [None]:
tgtTransFileName = os.path.join('AutoDS', 'mcds-stat-mod-trans.txt')

In [None]:
class Translator(object):
    
    def __init__(self, dTrans, lang='en'):
        assert 'en' in dTrans, 'At least "en" translation must be defined'
        self.dTrans = dTrans
        self.setLang(lang)
        
    def setLang(self, lang):
        self.lang = lang.lower()
        assert self.lang in ['en', 'fr'], 'No support for "{}" language'.format(lang)
        
    def __call__(self, s):
        return self.dTrans.get(self.lang, self.dTrans['en']).get(s, self.dTrans['en'].get(s, s))

In [None]:
DFigureTrans = \
    dict(en=dict(Value='', Cv='ConfInd', Lcl='Min', Ucl='Max', Df='DoF'),
         fr=dict(Value='', Cv='IndConf', Lcl='Min', Ucl='Max', Df='DegLib'))

figtr = Translator(DFigureTrans, lang='en')

In [None]:
DStatisticTrans = \
    dict(en={ 'number of observations (n)': 'NObs',
              'number of samples (k)': 'NSamp',
              'effort (L or K or T)': 'Effort',
              'encounter rate (n/L or n/K or n/T)': 'EncRate',
              'left truncation distance': 'LeftTruncDist',
              'right truncation distance (w)': 'RightTruncDist',
              'total number of parameters (m)': 'TotNumPars',
              'AIC value': 'AIC',
              'chi-square test probability (distance set 1)': 'Chi2 P 1',
              'chi-square test probability (distance set 2)': 'Chi2 P 2',
              'chi-square test probability (distance set 3)': 'Chi2 P 3',
              'f(0) or h(0)': 'f/h(0)',
              'probability of detection (Pw)': 'PDetec',
              'effective strip width (ESW) or effective detection radius (EDR)': 'EDR/ESW',
              'AICc': 'AICc',
              'BIC': 'BIC',
              'Log likelihood': 'LogLhood',
              'Kolmogorov-Smirnov test probability': 'KS P',
              'Cramér-von Mises (uniform weighting) test probability': 'CvM Uw P',
              'Cramér-von Mises (cosine weighting) test probability': 'CvM Cw P',
              'key function type': 'KeyFn',
              'adjustment series type': 'AdjSer',
              'number of key function parameters (NKP)': 'NumKFnPars',
              'number of adjustment term parameters (NAP)': 'NumASerPars',
              'number of covariate parameters (NCP)': 'NumCovars',
              'estimated value of A(1) adjustment term parameter': 'EstA(1)',
              'estimated value of A(2) adjustment term parameter': 'EstA(2)',
              'estimated value of A(3) adjustment term parameter': 'EstA(3)',
              'estimated value of A(4) adjustment term parameter': 'EstA(4)',
              'estimated value of A(5) adjustment term parameter': 'EstA(5)',
              'estimated value of A(6) adjustment term parameter': 'EstA(6)',
              'estimated value of A(7) adjustment term parameter': 'EstA(7)',
              'estimated value of A(8) adjustment term parameter': 'EstA(8)',
              'estimated value of A(9) adjustment term parameter': 'EstA(9)',
              'estimated value of A(10) adjustment term parameter': 'EstA(10)',
              'average cluster size': 'AvgClustSz',
              'size-bias regression correlation (r)': 'SzBias RegCorr',
              'p-value for correlation significance (r-p)': 'CorSignPVal',
              'estimate of expected cluster size corrected for size bias': 'EstExpFixedCluSz',
              'density of clusters (or animal density if non-clustered)': 'DensClu',
              'density of animals': 'Density',
              'number of animals, if survey area is specified': 'Number',
              'bootstrap density of clusters': 'BootsDensClu',
              'bootstrap density of animals': 'BootDensity',
              'bootstrap number of animals': 'BootNumber' },
         fr={ 'number of samples (k)': 'NEchant',
              'encounter rate (n/L or n/K or n/T)': 'TxContact',
              'left truncation distance': 'DistTroncGche',
              'right truncation distance (w)': 'DistTroncDte',
              'total number of parameters (m)': 'NbTotPars',
              'Log likelihood': 'LogProba',
              'key function type': 'FnClé',
              'adjustment series type': 'SérAjust',
              'number of key function parameters (NKP)': 'NbParsFnClé',
              'number of adjustment term parameters (NAP)': 'NbParsSérAjust',
              'number of covariate parameters (NCP)': 'NbCovars',
              'average cluster size': 'TailMoyClust',
              'size-bias regression correlation (r)': 'CorrReg BiaisTail',
              'p-value for correlation significance (r-p)': 'PVal SignifCorr',
              'estimate of expected cluster size corrected for size bias': 'TailCorrCluAttEst',
              'density of animals': 'Densité',
              'number of animals, if survey area is specified': 'Nombre',
              'bootstrap density of clusters': 'BootsDensClu',
              'bootstrap density of animals': 'DensitéBoot',
              'bootstrap number of animals': 'NombreBoot' })

statr = Translator(DStatisticTrans, lang='en')

In [None]:
dfStatModTrans = ads.MCDSEngine.MIStatModColumns.to_frame()
dfStatModTrans.reset_index(drop=True, inplace=True)
dfStatModTrans.rename(columns={ 0: 'Module', 1: 'Statistic', 2: 'Figure' }, inplace=True)
for lang in ['en', 'fr']:
    figtr.setLang(lang)
    statr.setLang(lang)
    dfStatModTrans[lang] = \
        dfStatModTrans.apply(lambda sRow: '{} {}'.format(figtr(sRow.Figure), statr(sRow.Statistic)).strip(),
                             axis='columns')

In [None]:
dfStatModTrans

In [None]:
dfStatModTrans.to_csv(tgtTransFileName, sep='\t', index=False)
tgtTransFileName

In [None]:
pd.DataFrame(index=analysis.MIRunColumns,
             data=dict(en=['ModKeyFn', 'ModAdjSer', 'ModChcCrit', 'ConfInter', 'RunCode', 'RunFolder'],
                       fr=['FnCléMod', 'SérAjustMod', 'CritChxMod', 'InterConf', 'CodeExec', 'DossierExec']))


In [None]:
dfStatModTransExt = pd.read_csv(tgtTransFileName, sep='\t')
dfStatModTransExt

In [None]:
dfStatModTransExt = pd.read_csv(tgtTransFileName, sep='\t')dfStatModTransExt.set_index(['Module', 'Statistic', 'Figure'])

In [None]:
lang = 'fr'
dTrans = dfStatModTransExt.set_index(['Module', 'Statistic', 'Figure'])[lang].to_dict()
resultats.dfData.columns = [dTrans.get(col, col) for col in resultats.dfData.columns]
resultats.dfData

In [None]:
x = resultats.dfData.columns


In [None]:
x.to_list()

In [None]:
dfStatModTransExt.set_index(['Module', 'Statistic', 'Figure'])[lang].to_dict()

# Test case class

(no use actually : pd.DataFrame already does the job !)

In [None]:
# Super-class for test cases
class TestCase(object):
    def __init__(self, **attrs):
        if not hasattr(self.__class__, 'AttributeNames'):
            self.__class__.AttributeNames = set(attrs.keys())
        else:
            assert set(attrs.keys()) == self.AttributeNames, \
                   'Some attribute name not in frozen set {{{}}}'.format(','.join(self.AttributeNames))
        for attrName, AttrValue in attrs.items():
            setattr(self, attrName, AttrValue)
    def __repr__(self):
        return '{}({})'.format(self.__class__.__name__, ','.join('{}:{}'.format(k, v) for k, v in self.__dict__.items()))

In [None]:
# Test this super-class.
class TCTest(TestCase):
    pass

tstTestCases = list()
tstTestCases.append(TCTest(x=1, y='a')) # Define attributes
tstTestCases.append(TCTest(x=2, y='b')) # Check attributes
try:
    tstTestCases.append(TCTest(x=2, z=None)) # Refuse new attributes
    assert False, 'Error: New attributes should be refused'
except AssertionError as exc:
    print('Good refuse of new attributes:', exc)
    
[str(tc) for tc in tstTestCases]

# Mise au point décodage sorties de MCDS : fichier de stats

TODO: Add french translation of variables / parameters names and descriptions

## 1. Nom et description des colonnes du tableau de stats

In [None]:
fileName = 'mcds-stat-row-specs.txt'

fStatRowSpecs = open(fileName, mode='r', encoding='utf8')

In [None]:
statRowSpecLines = [line.rstrip('\n') for line in fStatRowSpecs.readlines() if not line.startswith('#')]
statRowSpecs =  [(statRowSpecLines[i].strip(), statRowSpecLines[i+1].strip()) \
                 for i in range(0, len(statRowSpecLines)-2, 3)]
dfStatRowSpecs = pd.DataFrame(columns=['Name', 'Description'], data=statRowSpecs).set_index('Name')

dfStatRowSpecs

In [None]:
dfStatRowSpecs.index

## 2. Numéro et description des modules et statistiques associées

(colonnes Module et Statistic du tableau)

In [None]:
fileName = 'mcds-stat-mod-specs.txt'

fStatModSpecs = open(fileName, mode='r', encoding='utf8')

In [None]:
nMaxAdjParams = 10

statModSpecLines = [line.rstrip('\n') for line in fStatModSpecs.readlines() if not line.startswith('#')]
reModSpecNumName = re.compile('(.+) – (.+)')
statModSpecs = list()
moModule = None
for line in statModSpecLines:
    if not line:
        continue
    if moModule is None:
        moModule = reModSpecNumName.match(line.strip())
        continue
    if line == ' ':
        moModule = None
        continue
    moStatistic = reModSpecNumName.match(line.strip())
    modNum, modDesc, statNum, statDescNotes = \
        moModule.group(1), moModule.group(2), moStatistic.group(1), moStatistic.group(2)
    for i in range(len(statDescNotes)-1, -1, -1):
        if not re.match('[\d ,]', statDescNotes[i]):
            statDesc = statDescNotes[:i+1]
            statNotes = statDescNotes[i+1:].replace(' ', '')
            break
    modNum = int(modNum)
    if statNum.startswith('101 '):
        for num in range(nMaxAdjParams): # Assume no more than that ... a bit hacky !
            statModSpecs.append((modNum, modDesc, 101+num, # Make statDesc unique for later indexing
                                 statDesc.replace('each', 'A({})'.format(num+1)), statNotes))
    else:
        statNum = int(statNum)
        if modNum == 2 and statNum == 3: # Actually, there are 0 or 3 of these ...
            for num in range(3):
                statModSpecs.append((modNum, modDesc, num+201,
                                     # Change statNum & Make statDesc unique for later indexing
                                     statDesc+' (distance set {})'.format(num+1), statNotes))
        else:
            statModSpecs.append((modNum, modDesc, statNum, statDesc, statNotes))
dfStatModSpecs = pd.DataFrame(columns=['modNum', 'modDesc', 'statNum', 'statDesc', 'statNotes'],
                              data=statModSpecs).set_index(['modNum', 'statNum'])

dfStatModSpecs

In [None]:
# Modules
dfStatModSpecs.modDesc.unique()

## 3. Notes sur les statistiques des modules

(infos supplémentaire indiquant comment utiliser ou pas les 5 dernières colonnes Value, Cv, Lcl, Ucl, Df)

In [None]:
fileName = 'mcds-stat-mod-notes.txt'

fStatModNotes = open(fileName, mode='r', encoding='utf8')

In [None]:
statModNoteLines = [line.rstrip('\n') for line in fStatModNotes.readlines() if not line.startswith('#')]
statModNotes =  [(int(line[:2]), line[2:].strip()) for line in statModNoteLines if line]

dfStatModNotes = pd.DataFrame(data=statModNotes, columns=['Note', 'Text']).set_index('Note')

dfStatModNotes

## 4. Lecture du tableau

In [None]:
eng = mcds

In [None]:
eng.statsFileName

In [None]:
dfStatRows = pd.read_csv(eng.statsFileName, sep=' +', engine='python', names=dfStatRowSpecs.index)
dfStatRows

## 5. Décodage du tableau

Attention: On suppose 1 seule strate '0' (Stratum), 1 seul échantillon '0' (Sample) et 1 seul estimateur '1' (Estimator).

### a. Suppression des colonnes Stratum, Sample et Estimator

(puisqu'on se limite ici aux cas où il n'y a qu'1 de chaque)

In [None]:
dfStatRows.drop(columns=['Stratum', 'Sample', 'Estimator'], inplace=True)
dfStatRows

### b. Nettoyage des données sans objets

(selon les notes descriptives des statistiques)

In [None]:
# Empilage des "chiffres" (Figures) Value, Cv, Lcl, Ucl, Df pour chaque statistique / module
dfStats = dfStatRows.set_index(['Module', 'Statistic'], append=True).stack() \
                    .reset_index().rename(columns={'level_0': 'id', 'level_3': 'Figure', 0: 'Value'})
dfStats.head(10)

In [None]:
# 4. Fix multiple Module=2 & Statistic=3 rows (before joining with self.DfStatModSpecs)
newStatNum = 200
for lbl, sRow in dfStats[(dfStats.Module == 2) & (dfStats.Statistic == 3)].iterrows():
    if dfStats.loc[lbl, 'Figure'] == 'Value':
        newStatNum += 1
    dfStats.loc[lbl, 'Statistic'] = newStatNum
dfStats[(dfStats.Module == 2)]

In [None]:
# Ajout des colonnes de description/nommage des modules et statistiques
dfStats = dfStats.join(dfStatModSpecs, on=['Module', 'Statistic'])
dfStats.tail(10)

In [None]:
#dfStats[(dfStats.Module == 2) & (dfStats.Statistic > 200)]

In [None]:
# Vérification que les chiffres sans objet le sont vraiment (tous à 0.0 ?)
# Attention: Il doit y avoir un bug dans MCDS avec Module 2 / Statistic 10x : certains Cv ne sont pas nuls ...
sKeepOnlyValueFig = ~dfStats.statNotes.str.contains('1')
sFigs2Drop = (dfStats.Figure != 'Value') & sKeepOnlyValueFig
assert ~dfStats[sFigs2Drop & ((dfStats.Module != 2) | (dfStats.Statistic < 100))].Value.any(), \
       'Attention: Des chiffres supposés "sans objet" on des valeurs non nulles !'

In [None]:
# 2nde vérif. visuelle
dfStats[sFigs2Drop & dfStats.Value != 0].sort_values(by='Value', ascending=False)

In [None]:
# Suppression des lignes / chiffres sans objet.
dfStats.drop(dfStats[sFigs2Drop].index, inplace=True)
dfStats

In [None]:
dfStats.head()

In [None]:
dfStats = dfStats.reindex(columns=['modDesc', 'statDesc', 'Figure', 'Value'])
dfStats.set_index(['modDesc', 'statDesc', 'Figure'], inplace=True)
dfStats

In [None]:
dfStats.T.iloc[0]

# Bac à sable

In [None]:
DecimalFields = ['SMP_EFFORT', 'DISTANCE']

ImportFieldAliasREs = \
    odict([('STR_LABEL', ['region', 'zone', 'strate', 'stratum']),
           ('STR_AREA', ['surface', 'area', 'ha', 'km2']),
           ('SMP_LABEL', ['point', 'lieu', 'location']),
           ('SMP_EFFORT', ['effort', 'passages', 'surveys', 'samplings']),
           ('DISTANCE', ['distance'])])

def matchDataFields(srcFields):

    print('Matching required data columns:', end=' ')

    # Try and match required data columns.
    matFields = list()
    matDecFields = list()
    for tgtField in ImportFieldAliasREs:
        print(tgtField, end='=')
        foundTgtField = False
        for srcField in srcFields:
            print(srcField, end=':')
            for pat in ImportFieldAliasREs[tgtField]:
                print(pat, end=';')
                if re.search(pat, srcField, flags=re.IGNORECASE):
                    print(srcField, end=', ')
                    matFields.append(srcField)
                    if tgtField in DecimalFields:
                        matDecFields.append(srcField)
                    foundTgtField = True
                    break
            if foundTgtField:
                break
        if not foundTgtField:
            raise Exception('Error: Failed to find a match for expected {} in dataset columns {}' \
                            .format(tgtField, srcFields))

    # Extra fields.
    extFields = [field for field in srcFields if field not in matFields]

    print('... success.')

    return matFields, matDecFields, extFields

In [None]:
matchDataFields(['Region*Label', 'Region*Area', 'Point transect*Label',
       'Point transect*Survey effort', 'Observation*Radial distance'])

In [None]:
mo = re.search('area', 'Region*Area', flags=re.IGNORECASE)
mo

In [None]:
def safeFloat2Str(val, prec=None, decPt='.'):
    strVal = '' if pd.isnull(val) else str(val) if prec is None \
                else '{:.{prec}f}'.format(val, prec=prec)
    if decPt != '.':
        strVal = strVal.replace('.', decPt)
    return strVal

In [None]:
safeFloat2Str(12.53, prec=None, decPt='.')

In [None]:
safeFloat2Str(12.53, prec=1, decPt='.')

In [None]:
safeFloat2Str(12.53, prec=4, decPt='.')

In [None]:
safeFloat2Str(12.53, prec=None, decPt=',')

In [None]:
_ = implib.reload(ads)

In [None]:
cmdTxt = ads.MCDSEngine.CmdTxt.format(output='output.txt', log='log.txt',
                            stats='stats.txt', plots='plots.txt',
                            survType='Point', distType='Radial',
                            distUnit='m', areaUnit='ha',
                            dataFields=', '.join(['a', 'b', 'c']), dataFileName='data.txt',
                            estKeyFn='HNORMAL', estAdjustFn='COSINE',
                            estCriterion='AIC', cvInterv=95)
cmdTxt

In [None]:
ads.MCDSEngine.CmdTxt

## Appending series to series ... index order

In [None]:
s = pd.Series(index=pd.MultiIndex.from_tuples([('B', 'b'), ('B', 'a'), ('A', 'c')]), data=[1, 2, 3], name=0)
s

In [None]:
s.append(pd.Series(index=[('A', 'b'), ('A', 'a'), ('B', 'c')], data=[1, 2, 3], name=0))

## Appending series to DataFrame ... columns order

### a. Append

In [None]:
df = pd.DataFrame()

In [None]:
s = pd.Series(index=pd.MultiIndex.from_tuples([('B', 'b'), ('B', 'a'), ('A', 'c')]), data=[1, 2, 3], name=0)
#df = df.append(s, ignore_index=False) # => df.columns pas MultiIndex !
df = df.append([s], ignore_index=False)
df

In [None]:
s = pd.Series(index=[('A', 'c'), ('B', 'b'), ('B', 'a')], data=[4, 5, 6], name=1)  # Mêmes colonnes : append ne retrie pas
#s = pd.Series(index=[('A', 'a'), ('A', 'b'), ('B', 'c')], data=[4, 5, 6], name=1)  # Nouvelle colonne : append retrie
df = df.append([s], ignore_index=True)
df

In [None]:
s = pd.Series(index=[('A', 'a'), ('B', 'c')], data=[7, 8])
df = df.append(s, ignore_index=True)
df

In [None]:
s = pd.Series(index=[], data=[])
df = df.append([s], ignore_index=True)
df

In [None]:
s = pd.Series(index=[('C', 'd')], data=[9])
df = df.append([s], ignore_index=True)
df

In [None]:
s = pd.Series(index=[('d',)], data=[10])
df = df.append(s, ignore_index=True)
df

In [None]:
df

### b. Concat

In [None]:
df = pd.DataFrame()

In [None]:
s = pd.Series(index=pd.MultiIndex.from_tuples([('B', 'b'), ('B', 'a'), ('A', 'c')]), data=[1, 2, 3], name=0)
df = pd.concat([df, s], axis='columns')
df

In [None]:
s = pd.Series(index=[('B', 'b'), ('B', 'a'), ('A', 'c')], data=[4, 5, 6], name=1) # Mêmes colonnes : concat ne retrie pas
#s = pd.Series(index=[('A', 'a'), ('A', 'b'), ('B', 'c')], data=[4, 5, 6], name=1) # Nouvelle colonne : concat retrie
df = pd.concat([df, s], axis='columns')
df

### c. Restore desired columns

* desired order,
* desired list of columns : new ones, and / or ignored ones.

In [None]:
df

In [None]:
# Add new A/b, D/a and remove B/c and C/d
i = pd.MultiIndex.from_tuples([('A', 'c'), ('A', 'b'), ('A', 'a'), ('B', 'b'), ('B', 'a'), ('D', 'a')])
i

In [None]:
# Keep added columns (with no data inside)
df2 = df.reindex(i, axis='columns')
df2

In [None]:
# Remove added columns (with no data inside)
df2 .dropna(how='all', axis='columns')