<!-- Table des matières automatique -->
<h1 class='tocIgnore'>Sensitivity tests (old and unmaintained)</h1>

**pyaudisam**: Automation of Distance Sampling analyses with [Distance software](http://distancesampling.org/)

Copyright (C) 2021 Jean-Philippe Meuret

This program is free software: you can redistribute it and/or modify it under the terms
of the GNU General Public License as published by the Free Software Foundation,
either version 3 of the License, or (at your option) any later version.

This program is distributed in the hope that it will be useful, but WITHOUT ANY WARRANTY;
without even the implied warranty of MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.
See the GNU General Public License for more details.

You should have received a copy of the GNU General Public License along with this program.
If not, see https://www.gnu.org/licenses/.

<div style="overflow-y: auto">
  <h1>Table des matières</h1>
  <div id="toc"></div>
</div>

In [None]:
%%javascript
$.getScript('ipython_notebook_toc.js')

# Tests de sensibilité de MCDS.exe à diverses choses

In [None]:
import sys
import os
import pathlib as pl
from packaging import version

import re

from collections import OrderedDict as odict

import math
import numpy as np
import pandas as pd

In [None]:
sys.path.insert(0, '..')

In [None]:
import pyaudisam as ads

ads.runtime

In [None]:
# Create temporary directory if not yet done.
tmpDir = pl.Path('tmp')
tmpDir.mkdir(exist_ok=True)

In [None]:
# Activate Warnings as Exception
#import warnings
#warnings.filterwarnings('error')

# Communs

In [None]:
# Actual / reference closeness measure : -round(log10((actual - reference) / max(abs(actual), abs(reference))), 1)
# = Compute the orders of magnitude that separate the difference from the max. of the two values
def closeness(sRefAct):
    
    x, y = sRefAct.to_list()
    
    # Special cases with 1 NaN, or 1 or more inf => all different
    if np.isnan(x):
        if not np.isnan(y):
            return 0 # All different
    elif np.isnan(y):
        return 0 # All different
    
    if np.isinf(x) or np.isinf(y):
        return 0 # All different
    
    # Normal case
    c = abs(x - y)
    if not np.isnan(c) and c != 0:
        c = c / max(abs(x), abs(y))
    
    return round(-np.log10(c), 1)

In [None]:
# Compare multiple variants to one of them, using closeness fn above
# Aimed at being used as the fn to groupby(...).apply(fn) (see examples below).
def variantCloseness(dfg, indexCols, refLabelInd=0):
    
    # Backup and the drop "indexing" columns : we don't check closeness on them
    dfgi = dfg[indexCols].copy()
    dfg.drop(columns=dfgi.columns.to_list(), inplace=True)
    
    # Compute closeness of each row to the 1st one.
    dfgd = pd.DataFrame(columns=dfg.columns)
    refLbl = dfg.index[0] # Label of the first row.
    for lbl in dfg.index:
        try:
            dfgd.loc[lbl] = dfg.loc[[refLbl, lbl]].apply(closeness)
        except:
            print(lbl, refLbl)
            print(dfg.loc[[refLbl, lbl]])
            raise
        
    # Restore "indexing" columns : done.
    return dfgi.join(dfgd)

# Sensibilité de MCDS.exe à l'ordre des données

## a. Construction des variantes d'analyse

In [None]:
# Generate test cases definition code from refout results file (don't cheat : only input columns :-)
caseIdCols = ['Species', 'Sample', 'Model', 'DataOrder']

dfAnlysCases = pd.DataFrame(data=[(spec, samp, keyFn + adjSer, dataOrd) \
                                  for spec in ['SYLATR', 'TURMER', 'LUSMEG', 'ALAARV', 'COLPAL',
                                               'PHYCOL', 'EMBCIT', 'EMBCIR', 'ANTTRI', 'MILCAL'] \
                                  for samp in ['AB-10mn-ttdec'] \
                                  for keyFn in ['HNo', 'Uni', 'Haz'] \
                                  for adjSer in ['Cos', 'Pol'] \
                                  for dataOrd in ['pcdc',   # Sort by point, and increasing distances
                                                  'pcdd',   # Sort by point, and decreasing distances
                                                  'pc',     # Sort by point, but distance order untouched
                                                  'dc']],   # Sort by increasing distances
                            columns=caseIdCols)

dfAnlysCases['InFileName'] = \
    dfAnlysCases.apply(lambda sRow: 'ACDC2019-Papyrus-{}-{}-dist.txt'.format(sRow.Species, sRow.Sample),
                       axis='columns')

dfAnlysCases

In [None]:
assert all(pl.Path('refin', sCase.InFileName).exists() \
           for _, sCase in dfAnlysCases.iterrows()), 'Oh, oh ... Some missing file(s) !'

In [None]:
#dfAnlysCases = dfAnlysCases[:5]

In [None]:
len(dfAnlysCases)

## b. Exécution des analyses

In [None]:
decimalFields = ['Point transect*Survey effort', 'Observation*Radial distance']

In [None]:
# Analysis engine
mcds = ads.MCDSEngine(workDir='tmp/mcds-sens',
                      distanceUnit='Meter', areaUnit='Hectare',
                      surveyType='Point', distanceType='Radial', clustering=False)

In [None]:
# Frozen analysis parameters (a choice here)
KEstimCriterion = 'AIC'
KCVInterval = 95

In [None]:
tsStart = pd.Timestamp.now()
print('Started at', tsStart)
print()

# Run all analyses
miCustCols = pd.MultiIndex.from_tuples([('sample', col, 'Value') for col in caseIdCols])
dfCustColTrans = \
    pd.DataFrame(index=miCustCols,
                 data=dict(en=caseIdCols, fr=['Espèce', 'Echantillon', 'Modèle', 'OrdreDonnées']))

results = ads.MCDSAnalysisResultsSet(miCustomCols=miCustCols, dfCustomColTrans=dfCustColTrans,
                                     distanceUnit='Meter', areaUnit='Hectare',
                                     surveyType='Point', distanceType='Radial', clustering=False)

lastInFileName = ''
for ind, sCase in dfAnlysCases.iterrows():
    
    prefix = '{}-{}-{}'.format(sCase.Species, sCase.Sample, sCase.DataOrder)
    print('#{:3d} {} {}'.format(ind+1, prefix, sCase.Model), end='\n'*2)
    
    # Create data set.
    dfInData = ads.SampleDataSet.csv2df(os.path.join('refin', sCase.InFileName), decCols=decimalFields)
    sortCols = list()
    sortAscg = list()
    for srt in [sCase.DataOrder[i:i+2] for i in range(0, len(sCase.DataOrder), 2)]:
        assert srt[0] in 'pd' and srt[1] in 'cd'
        if srt[0] == 'p':
            sortCols.append('Point transect*Label')
        else: # 'd'
            sortCols.append('Observation*Radial distance')
        sortAscg.append(srt[1] == 'c')
    dfInData.sort_values(by=sortCols, ascending=sortAscg, inplace=True)
    sds = ads.SampleDataSet(dfInData, decimalFields=decimalFields)
        
    # Run analysis
    analysis = ads.MCDSAnalysis(engine=mcds, sampleDataSet=sds, name=prefix,
                                estimKeyFn=sCase.Model[:3].upper(), estimAdjustFn=sCase.Model[3:].upper(),
                                estimCriterion=KEstimCriterion, cvInterval=KCVInterval)
    sResult = analysis.submit().getResults()

    # Save results
    sHead = pd.Series(data=[sCase[col] for col in sCase.index[:len(caseIdCols)]], index=miCustCols)

    results.append(sResult, sCustomHead=sHead)
    
tsEnd = pd.Timestamp.now()
print('Finished at', tsEnd, ': duration', str(tsEnd - tsStart).replace('0 days ', ''))

In [None]:
# Save analysis results
dfRes = results.dfData

dfRes.head()

## c. Comparaison des résultats à la référence

(pour chaque groupe { espèce, échantillon, modèle }, la 1ère variante de tri)

In [None]:
# Remove useless columns for comparison
dfRes4c = dfRes.copy()
dfRes4c.drop(columns=[('run output', 'run time', 'Value'), ('run output', 'run folder', 'Value'),
                      ('detection probability', 'key function type', 'Value'),
                      ('detection probability', 'adjustment series type', 'Value')], inplace=True)

In [None]:
# Compare data order variant results
miGroupCols = \
    pd.MultiIndex.from_tuples([('sample', col, 'Value') for col in caseIdCols if col != 'DataOrder']) \
                 .append(pd.MultiIndex.from_tuples([('parameters', col, 'Value') \
                                                    for col in dfRes['parameters'].columns.get_level_values(0)]))
indexCols = miGroupCols.to_list() + [('sample', 'DataOrder', 'Value')]
dfRelDif = dfRes4c.groupby(miGroupCols.to_list()).apply(variantCloseness, indexCols=indexCols, refLabelInd=0)

dfRelDif

## d. Sauvegarde des résultats.

In [None]:
resFileName = os.path.join(mcds.workDir, 'ACDC2019-Papyrus-auto-sens-data-order-results.xlsx')

with pd.ExcelWriter(resFileName) as xlsxWriter:

    dfRes.to_excel(xlsxWriter, sheet_name='RawResults', index=True)
    dfRelDif.to_excel(xlsxWriter, sheet_name='Diff2Ref', index=True)

# Sensibilité de MCDS.exe à l'ordre des données (bis)

Construction semi-manuelle d'un exemple de taille réduite soumise à Eric Rexstadt :
  Cf. refout/dist-order-sens-min/dist-order-sens.odt

# Ordre des données générées par Distance 7 pour MCDS.exe

In [None]:
df = pd.read_csv('refout/dist-order-sens-min/cmd-win7-dist-order/data.txt', sep='\t',
                 names=['region', 'area', 'point', 'effort', 'distance'])
df.head(20)

In [None]:
df['npoint'] = df.point.apply(lambda s: int(s.split(' ')[1]))

In [None]:
# Changement de l'ordre : tri par point et par distances croissantes
df.sort_values(by=['npoint', 'distance'], inplace=True)
df.head(20)

In [None]:
pl.Path('refout/dist-order-sens-min/cmd-win7-sorted-order').mkdir(exist_ok=True)
df[['region', 'area', 'point', 'effort', 'distance']] \
  .to_csv('refout/dist-order-sens-min/cmd-win7-sorted-order/data.txt', sep='\t', index=False, header=False)

# Ordre des données en entrée de distance

In [None]:
# Même fichier exactement que refout/dist-order-sens-min/import-data-set.txt normalement.
df = pd.read_csv('refin/ACDC2019-Papyrus-TURMER-AB-10mn-1dec-dist.txt', sep='\t', header=0)
df.head(20)

In [None]:
# Changement de l'ordre : tri par par distances alphabétiques croissantes, oui, oui (en ignorant les points)
# But: Voir si Distance reclasse autt par point
df.sort_values(by=['Observation*Radial distance'], inplace=True)
df.head(20)

In [None]:
df.to_csv('tmp/ACDC2019-Papyrus-TURMER-AB-10mn-1dec-trialpha-dist.txt', sep='\t', index=False)

# Bac à sable