<!-- Auto table of contents -->
<h1 class='tocIgnore'>AutoDS : Validation tests</h1>
<p>(for the <b>autods</b> module, a python interface to MCDS.exe, http://distancesampling.org/)</p>
<div style="overflow-y: auto">
  <h2 class='tocIgnore'>Table of contents</h2>
  <div id="toc"></div>
</div>

In [None]:
%%javascript
$.getScript('ipython_notebook_toc.js')

In [None]:
%matplotlib inline

In [None]:
import sys
import os
import importlib as implib

import re

from collections import OrderedDict as odict, namedtuple as ntuple

import math
import numpy as np
import pandas as pd

from tqdm import tqdm

from IPython.display import HTML

import matplotlib.pyplot as plt

import plotly as ply
import plotly.graph_objs as plygo

In [None]:
import autods as ads

In [None]:
# Activate Warnings as Exception
import warnings

if False:
    
    warnings.filterwarnings(action='error')

    # pd.read_excel
    warnings.filterwarnings(action='default', module='etree')
    warnings.filterwarnings(action='default', module='xlrd')
    warnings.filterwarnings(action='default', module='defusedxml')

# Analyses with real life field data

(for comparison to manually issued analyses with Distance 7.3)

## 0. Load analyses set specifications

In [None]:
# Load refout results table
refFileName = 'ACDC2019-Papyrus-ALAARV-TURMER-resultats-distance-73.xlsx'
dfRefRes = pd.read_excel(os.path.join('AutoDS', 'refout', refFileName))
dfRefRes.rename(columns=dict(Name='Model'), inplace=True)

dfRefRes.head()

## 1. Build test cases

In [None]:
# Generate test cases definition code from refout results file (don't cheat : only input columns :-)
modelIdCols = ['Model'] #, 'LTrunc', 'RTrunc', 'FitDistCuts', 'DiscrDistCuts']
modelParamCols = ['LTrunc', 'RTrunc', 'FitDistCuts', 'DiscrDistCuts']
sampleIdCols = ['Species', 'Periods', 'Prec.', 'Duration']
caseIdCols = sampleIdCols + modelIdCols
dfAnlysCases = dfRefRes[caseIdCols + modelParamCols].copy()

dfAnlysCases['KeyFn'] = \
    dfAnlysCases.Model.apply(lambda s: 'UNIFORM' if s.startswith('Unif') \
                                                 else 'HNORMAL' if s.startswith('Half') else 'HAZARD')
dfAnlysCases['AdjSer'] = \
    dfAnlysCases.Model.apply(lambda s: 'COSINE' if s.find(' Cos') > 0 \
                                                else 'POLY' if s.find(' SimPoly') > 0 else 'HERMITE')
dfAnlysCases['InFileName'] = \
    dfAnlysCases.apply(lambda sRow: 'ACDC2019-Papyrus-{}-{}-{}mn-{}dec-dist.txt' \
                                    .format(sRow.Species,
                                            'AB' if 'A+B' in sRow.Periods else 'A' if 'A' in sRow.Periods else 'B',
                                            sRow.Duration.split(' ')[0], sRow['Prec.'].split(' ')[0]),
                       axis='columns')
dfAnlysCases

## 2. Prepare analyses

In [None]:
decimalFields = ['Point transect*Survey effort', 'Observation*Radial distance']

In [None]:
# Analysis engine
mcds = ads.MCDSEngine(workDir=os.path.join('AutoDS', 'mcds-out'),
                      distanceUnit='Meter', areaUnit='Hectare',
                      surveyType='Point', distanceType='Radial')

In [None]:
# Frozen analysis parameters (a choice here)
KEstimCriterion = 'AIC'
KCVInterval = 95

In [None]:
# Results object construction
sampCols = [('sample', col, 'Value') for col in sampleIdCols]
miSampCols = pd.MultiIndex.from_tuples(sampCols)

custCols = sampCols + [('model', 'Model', 'Value')] # + [('model', col, 'Value') for col in modelIdCols]
miCustCols = pd.MultiIndex.from_tuples(custCols)

dfCustColTrans = \
    pd.DataFrame(index=miCustCols,
                 data=dict(en=caseIdCols, 
                           fr=['Espèce', 'Périodes', 'Préc.', 'Durée', # 'Echant.', 
                               'Modèle'])) #, 'TroncGche', 'TroncDrte', 'TranchDistMod', 'TranchDistDiscr']))

results = ads.MCDSResultsSet(miCustomCols=miCustCols, miSampleCols=miSampCols, dfCustomColTrans=dfCustColTrans)

In [None]:
computed = False

## 3a. Or : Really run analyses

In [None]:
# Réduire éventuellement la liste des cas test et des résultats de référence, pour aller plus vite
# Attention: Si on ne prend pas des échantillons entiers, les comparaisons référence / calcul échoueront sur Delta AIC.
#selCaseInds = [0, 5, 7, 22, 31] # Some random cases, with uncomplete samples.
#selCaseInds = dfAnlysCases[dfAnlysCases.Sample.isin([3, 4])].index # A shorter selection, with complete samples.
selCaseInds = range(len(dfAnlysCases)) # All of them.

nOrigAnlysCases = len(dfAnlysCases)
dfAnlysCases = dfAnlysCases.loc[selCaseInds]
dfRefRes = dfRefRes.loc[selCaseInds]

print('Retained {} out of {}.'.format(len(selCaseInds), nOrigAnlysCases))

In [None]:
tsStart = pd.Timestamp.now()
print('Started at', tsStart)
print()

# Run all analyses
lastInFileName = None
for nCase, sCase in dfAnlysCases.iterrows():
    
    name = sCase.InFileName[len('ACDC2019-Papyrus')+1:-len('-dist.txt')]
    name += '-' + sCase.Model.lower().translate(str.maketrans({c:'-' for c in ' ,.:;()/'}))
    print('#{:3d}'.format(nCase+1), name, sCase.KeyFn, sCase.AdjSer, end='\n'*2)
    
    # Create data set if not already done.
    if lastInFileName != sCase.InFileName:
        ds = ads.DataSet(os.path.join('AutoDS', 'refin', sCase.InFileName), decimalFields=decimalFields)
        lastInFileName = sCase.InFileName
        
    # Run analysis
    def nan2None(v):
        return None if pd.isnull(v) else v
    def distCutsFromSpecs(v):
        if pd.isnull(v):
            return None
        if isinstance(v, int):
            return v
        return [float(x) for x in v.split(',')]

    analysis = ads.MCDSAnalysis(engine=mcds, dataSet=ds, name=name,
                                estimKeyFn=sCase.KeyFn, estimAdjustFn=sCase.AdjSer,
                                estimCriterion=KEstimCriterion, cvInterval=KCVInterval,
                                minDist=nan2None(sCase.LTrunc), maxDist=nan2None(sCase.RTrunc),
                                fitDistCuts=distCutsFromSpecs(sCase.FitDistCuts),
                                discrDistCuts=distCutsFromSpecs(sCase.DiscrDistCuts))
    sResult = analysis.run()

    # Save results
    sHead = pd.Series(data=[sCase[col] for col in sCase.index[:len(caseIdCols)]], index=miCustCols)

    results.append(sResult, sCustomHead=sHead)
    
tsEnd = pd.Timestamp.now()
print('Finished at', tsEnd, ': duration', str(tsEnd - tsStart).replace('0 days ', ''))

computed = True

In [None]:
# Save results in case need for not recomputing them
resFileName = os.path.join(mcds.workDir, 'autods-validation-results.xlsx')

results.toExcel(resFileName, sheetName='AutoDSVal')

In [None]:
# Check translation
dfActTrRes = results.dfTransData('fr')

dfActTrRes.head().T.iloc[:30] #.at['TroncGche', 0]

## 3b. Or : Load analyses from a previous run

(already run and saved above)

In [None]:
if not computed:
    
    resFileName = os.path.join(mcds.workDir, 'autods-validation-results.xlsx')
    print('Loading results from {} ...'.format(resFileName))

    results.fromExcel(resFileName, sheetName='AutoDSVal')
    
else:
    
    print('Just computed, not reloading ...')
    
print('... {} analyses to compare'.format(len(results)))

# Compare actual results to reference

(reference = manually run analyses with Distance software)

## 1. Extract actual results to compare

In [None]:
# Analysis results
dfActRes = results.dfData

dfActRes.head().T[:30]

In [None]:
# Sélection des colonnes des résultats autos et association aux disponibles dans la référence, pour comparaison.
dCompCols = \
{
    ('sample', 'Species', 'Value'):   'Species',
    ('sample', 'Periods', 'Value'):   'Periods',
    ('sample', 'Prec.', 'Value'):     'Prec.',
    ('sample', 'Duration', 'Value'):  'Duration',
    
    ('model',  'Model', 'Value'):         'Model',
    ('parameters', 'left truncation distance', 'Value'):           'LTrunc',
    ('parameters', 'right truncation distance', 'Value'):          'RTrunc',
    ('parameters', 'model fitting distance cut points', 'Value'):  'FitDistCuts',
    ('parameters', 'distance discretisation cut points', 'Value'): 'DiscrDistCuts',
    
    ('run output', 'run status', 'Value') : 'Status',
    
    ('detection probability', 'total number of parameters (m)', 'Value'): '# params',
    ('encounter rate', 'number of observations (n)', 'Value'): '# obs',
    
    ('detection probability', 'Delta AIC', 'Value'): 'Delta AIC',
    ('detection probability', 'AIC value', 'Value'): 'AIC',
    ('detection probability', 'chi-square test probability determined', 'Value')               : 'GOF Chi-p',
    ('detection probability', 'Kolmogorov-Smirnov test probability', 'Value')                  : 'GOF K-S p',
    ('detection probability', 'Cramér-von Mises (uniform weighting) test probability', 'Value'): 'GOF CvM (unif) p',
    ('detection probability', 'Cramér-von Mises (cosine weighting) test probability', 'Value') : 'GOF CvM (cos) p',
    
    ('detection probability', 'effective strip width (ESW) or effective detection radius (EDR)', 'Value'): 'ESW/EDR',
    ('detection probability', 'effective strip width (ESW) or effective detection radius (EDR)', 'Lcl')  : 'ESW/EDR LCL',
    ('detection probability', 'effective strip width (ESW) or effective detection radius (EDR)', 'Ucl')  : 'ESW/EDR UCL',
    ('detection probability', 'effective strip width (ESW) or effective detection radius (EDR)', 'Cv')   : 'ESW/EDR CV',
    
    ('density/abundance', 'density of animals', 'Value'): 'D',
    ('density/abundance', 'density of animals', 'Lcl')  : 'D LCL',
    ('density/abundance', 'density of animals', 'Ucl')  : 'D UCL',
    ('density/abundance', 'density of animals', 'Cv')   : 'D CV',
    
    ('detection probability', 'probability of detection (Pw)', 'Value'): 'P',
    ('detection probability', 'probability of detection (Pw)', 'Lcl')  : 'P LCL',
    ('detection probability', 'probability of detection (Pw)', 'Ucl')  : 'P UCL',
    ('detection probability', 'probability of detection (Pw)', 'Cv')   : 'P CV',
    ('detection probability', 'probability of detection (Pw)', 'Df')   : 'P DF',
}
len(dCompCols)

In [None]:
# Warning: Unused columns (full of NaNs) have been atomatically removed
# (see last line of ResultsSet.dfData getter)
dCompCols = { k: v for k, v in dCompCols.items() if k in dfActRes.columns }
len(dCompCols)

In [None]:
len(modelParamCols)

In [None]:
# So we need to cleanup modelParamCols too
modelParamCols = [id_ for id_ in modelParamCols if id_ in dCompCols.values()]
len(modelParamCols)

In [None]:
# Safe stringification of model params
# * needed for use in indexes (hashability)
# * needed to cope with to_excel/read_excel unconsistent None management
def modelParam2Str(par):
    #print(par)
    if isinstance(par, list):
        spar = str([float(v) for v in par])
    elif pd.isnull(par):
        spar = 'None'
    elif isinstance(par, str):
        if ',' in par: # Assumed already somewhat stringified list
            spar = str([float(v) for v in par.strip('[]').split(',')])
    else:
        spar = str(par)
    return spar

In [None]:
# Select results columns and rename them as reference, for easier comparison
dfActRes4c = dfActRes[list(dCompCols.keys())].copy()
dfActRes4c.columns = [dCompCols[col] for col in dCompCols]
dfActRes4c[modelParamCols] = dfActRes4c[modelParamCols].applymap(modelParam2Str) # Hashable mandatory for indexing
dfActRes4c.set_index(caseIdCols + modelParamCols, inplace=True)

dfActRes4c

In [None]:
# Select usefull reference columns for comarison
dfRefRes4c = dfRefRes.copy()
dfRefRes4c[modelParamCols] = dfRefRes4c[modelParamCols].applymap(modelParam2Str) # Hashable mandatory for indexing
dfRefRes4c.set_index(caseIdCols + modelParamCols, inplace=True)
dfRefRes4c = dfRefRes4c.reindex(columns=dfActRes4c.columns)

dfRefRes4c

## 2. Diagnostic automatique

In [None]:
# First checks : equality of test case lists (index) and of column names (columns)
assert sorted(dfActRes4c.index)   == sorted(dfRefRes4c.index)
assert sorted(dfActRes4c.columns) == sorted(dfRefRes4c.columns)

In [None]:
# Actual / reference closeness measure : -round(log10((actual - reference) / max(abs(actual), abs(reference))), 1)
# = Compute the order of magnitude that separate the difference from the absolute max. of the two values
# The greater it is, the lower the relative difference
#    Ex: 3 = 10**3 ratio between difference absolue max. of the two,
#        +inf = NO difference at all,
#        0 = bad, one of the two is 0, and the other not,
# See unitary test below.
def closeness(sRefAct):
    
    x, y = sRefAct.to_list()
    
    # Special cases with 1 NaN, or 1 or more inf => all different
    if np.isnan(x):
        if not np.isnan(y):
            return 0 # All different
    elif np.isnan(y):
        return 0 # All different
    
    if np.isinf(x) or np.isinf(y):
        return 0 # All different
    
    # Normal case
    c = abs(x - y)
    if not np.isnan(c) and c != 0:
        c = c / max(abs(x), abs(y))
    
    return np.inf if c == 0 else round(-np.log10(c), 1)

In [None]:
# Actual / reference comparison : compute closeness indicator
dfRelDif = dfRefRes4c.copy()
for col in dfRelDif.columns:
    dfRelDif['act'] = dfActRes4c[col]
    dfRelDif[col] = dfRelDif[[col, 'act']].apply(closeness, axis='columns')
    dfRelDif.drop(columns='act', inplace=True)
    
dfRelDif

In [None]:
# Diagnosis : we only keep lines and columns with some relevant differences.
dfBadRelDif = dfRelDif.copy()
len(dfBadRelDif)

In [None]:
# 1. Suppression lignes : Status identique et reste NaN (cas des status = 0/3/4 : erreur d'exécution, ou pas d'exécution)
valCols = [col for col in dfRelDif.columns if col != 'Status']
dfBadRelDif.drop(dfBadRelDif[(dfBadRelDif.Status.abs() == np.inf) & dfBadRelDif[valCols].isnull().all(axis='columns')].index,
                 axis='index', inplace=True)
assert len(dfBadRelDif) == 29, len(dfBadRelDif)
len(dfBadRelDif)

In [None]:
# 2. Suppression lignes : Status et toutes autres colonnes à inf (stricte égalité)
#    NB. Somme very small differences observed when results have just been computed or when they have been
#        loaded from a previously saved Excel file (above 10**15 closeness value)
dfBadRelDif.drop(dfBadRelDif[dfBadRelDif.apply(np.isinf, axis='columns').all(axis='columns')].index,
                 axis='index', inplace=True)
assert (computed and len(dfBadRelDif) == 26) or (not computed and len(dfBadRelDif) == 19), len(dfBadRelDif)
len(dfBadRelDif)

In [None]:
# 3. Suppression lignes : Status et toutes autres colonnes supérieures à 15 (quasi stricte égalité)
dfBadRelDif.drop(dfBadRelDif[(dfBadRelDif >= 15).all(axis='columns')].index, axis='index', inplace=True)
assert len(dfBadRelDif) == 9, len(dfBadRelDif)
len(dfBadRelDif)

In [None]:
# 4. Suppression lignes : Status identique et toutes autres colonnes supérieures à 4 (quasi égalité)
dfBadRelDif.drop(dfBadRelDif[(dfBadRelDif >= 4).all(axis='columns')].index, axis='index', inplace=True)
assert len(dfBadRelDif) == 4, len(dfBadRelDif)
len(dfBadRelDif)

In [None]:
# 5. Suppression lignes : Status identique et toutes autres colonnes supérieures à 4 (quasi égalité)
#                         sauf colonnes GOF KS et CvM à NaN, non calculées quand on discrétise les distances.
if 'DiscrDistCuts' in dfBadRelDif.index.names:
    discrCols = [col for col in dfRelDif.columns if not col.startswith('GOF') or col.find('Chi') > 0]
    df2Drop = (dfBadRelDif.index.get_level_values('DiscrDistCuts') != -1) & (dfBadRelDif[discrCols] >= 4).all(axis='columns')
    dfBadRelDif.drop(dfBadRelDif[df2Drop].index, axis='index', inplace=True)
assert len(dfBadRelDif) == 2, len(dfBadRelDif)
len(dfBadRelDif)

In [None]:
# Le verdict (Cf. fichier Excel refFileName, feuille "DiffAuto" pour explications des 2 différences Act/Ref)
dfBadRelDif.T

In [None]:
dfRefRes4c.loc[dfBadRelDif.index]

In [None]:
dfActRes4c.loc[dfBadRelDif.index]

In [None]:
nFails = len(dfBadRelDif.index)
if nFails > 0:
    print('Warning: {} test case(s) failed ;'.format(nFails))
    print(' ... see sheet "DiffAuto" of {} for possible explanations.'.format(refFileName))
else:
    print('All test cases succeeded !')

## 3. Sauvegarde des résultats.

In [None]:
resCompFileName = os.path.join(mcds.workDir, 'autods-validation-rescomp.xlsx')

with pd.ExcelWriter(resCompFileName) as xlsxWriter:

    dfRefRes.to_excel(xlsxWriter, sheet_name='RefResults', index=True)
    dfActRes4c.reset_index().to_excel(xlsxWriter, sheet_name='ActResults', index=False)
    dfRelDif.reset_index().to_excel(xlsxWriter, sheet_name='Diff2Ref', index=False)
    dfBadRelDif.reset_index().to_excel(xlsxWriter, sheet_name='BadDiff2Ref', index=False)
    dfRefRes4c.loc[dfBadRelDif.index].reset_index().to_excel(xlsxWriter, sheet_name='RefResWithDiff', index=False)
    dfActRes4c.loc[dfBadRelDif.index].reset_index().to_excel(xlsxWriter, sheet_name='ActResWithDiff', index=False)
    dfActRes.to_excel(xlsxWriter, sheet_name='RawActResults', index=True)

In [None]:
dfActRes.head()

# Rapports d'analyses Excel et HTML

In [None]:
# Sélection des colonnes pour les tableaux de synthèse du rapport
synthCols = \
[
    ('sample', 'Species', 'Value'),
    ('sample', 'Periods', 'Value'),
    ('sample', 'Prec.', 'Value'),
    ('sample', 'Duration', 'Value'),
    
    ('model', 'Model', 'Value'),
    ('parameters', 'left truncation distance', 'Value'),
    ('parameters', 'right truncation distance', 'Value'),
    ('parameters', 'model fitting distance cut points', 'Value'),
    ('parameters', 'distance discretisation cut points', 'Value'),
    
    ('run output', 'run status', 'Value'),
    
    ('encounter rate', 'number of observations (n)', 'Value'),
    
    ('detection probability', 'Delta AIC', 'Value'),
    ('detection probability', 'AIC value', 'Value'),
    ('detection probability', 'chi-square test probability determined', 'Value'),
    ('detection probability', 'Kolmogorov-Smirnov test probability', 'Value'),
    ('density/abundance', 'density of animals', 'Cv'),
    
    ('detection probability', 'effective strip width (ESW) or effective detection radius (EDR)', 'Value'),
    ('detection probability', 'effective strip width (ESW) or effective detection radius (EDR)', 'Lcl'),
    ('detection probability', 'effective strip width (ESW) or effective detection radius (EDR)', 'Ucl'),
    
    ('density/abundance', 'density of animals', 'Value'),
    ('density/abundance', 'density of animals', 'Lcl'),
    ('density/abundance', 'density of animals', 'Ucl'),
    
    ('detection probability', 'probability of detection (Pw)', 'Value'),
    ('detection probability', 'probability of detection (Pw)', 'Lcl'),
    ('detection probability', 'probability of detection (Pw)', 'Ucl'),
    ('detection probability', 'probability of detection (Pw)', 'Df'),

    ('run output', 'run folder', 'Value'),
]

In [None]:
report = ads.MCDSResultsFullReport(resultsSet=results, synthCols=synthCols, title='Validation du module autods',
                                   subTitle='Rapport d\'analyse global', anlysSubTitle='Rapport détaillé',
                                   description='Qu\'ajouter de plus ?', keywords='autods, validation',
                                   lang='fr', attachedDir='.', tgtFolder=mcds.workDir, tgtPrefix='autods-validation-report')

In [None]:
htmlRep = report.toHtml()

HTML(f'Rapport HTML : <a href="{htmlRep}" target="blank">{htmlRep}</a>')

In [None]:
xlsxRep = report.toExcel()

HTML(f'Rapport Excel : <a href="{xlsxRep}" target="blank">{xlsxRep}</a>')

# Auto-run and report pre-analyses

(to help users to setup the full analyses plan : run first try simple analyses and show PDF and few results)

## 0. Determine samples from input data

* in real life, we'd simply load field collected data, and deduce individual "samples" from it ;
* but there, for testing, it's easier to deduce samples from manual analysis specification file)

In [None]:
# Create sample table from refout results table
refFileName = 'ACDC2019-Papyrus-ALAARV-TURMER-resultats-distance-73.xlsx'

sampleIdCols = ['Species', 'Periods', 'Prec.', 'Duration']

dfSamples = pd.read_excel(os.path.join('AutoDS', 'refout', refFileName), usecols=sampleIdCols)
dfSamples.rename(columns=dict(Name='Model'), inplace=True)
dfSamples.drop_duplicates(inplace=True)
dfSamples.reset_index(drop=True, inplace=True)

dfSamples

## 1. Prepare analyses

In [None]:
decimalFields = ['Point transect*Survey effort', 'Observation*Radial distance']

In [None]:
# Analysis engine
mcds = ads.MCDSEngine(workDir=os.path.join('AutoDS', 'mcds-pout'),
                      distanceUnit='Meter', areaUnit='Hectare',
                      surveyType='Point', distanceType='Radial')

In [None]:
# Results object construction
custCols = [('sample', col, 'Value') for col in sampleIdCols]
miCustCols = pd.MultiIndex.from_tuples(custCols)
dfCustColTrans = \
    pd.DataFrame(index=miCustCols,
                 data=dict(en=sampleIdCols, 
                           fr=['Espèce', 'Périodes', 'Préc.', 'Durée']))

results = ads.MCDSResultsSet(miCustomCols=miCustCols, dfCustomColTrans=dfCustColTrans)

In [None]:
computed = False

## 2. Or : Really run pre-analyses

In [None]:
tsStart = pd.Timestamp.now()
print('Started at', tsStart)
print()

# Run all analyses
lastInFileName = None
for nSamp, sSamp in dfSamples.iterrows():
    
    sampId = '{}-{}-{}mn-{}dec' \
             .format(sSamp.Species,
                     'AB' if 'A+B' in sSamp.Periods else 'A' if 'A' in sSamp.Periods else 'B',
                     sSamp.Duration.split(' ')[0], sSamp['Prec.'].split(' ')[0])
    print('#{:3d}'.format(nSamp+1), sampId, end='\n'*2)
    
    # Create data set if not already done.
    inFileName = 'ACDC2019-Papyrus-{}-dist.txt'.format(sampId)
    if lastInFileName != inFileName:
        ds = ads.DataSet(os.path.join('AutoDS', 'refin', inFileName), decimalFields=decimalFields)
        lastInFileName = inFileName
        
    # Run analysis
    analysis = ads.MCDSAnalysis(engine=mcds, dataSet=ds, name=sampId + '-' + 'hnor-cos',
                                estimKeyFn='HNORMAL', estimAdjustFn='COSINE',
                                estimCriterion='AIC', cvInterval=95)
    sResult = analysis.run()

    # Save results
    sHead = sSamp.copy()
    sHead.index = miCustCols

    results.append(sResult, sCustomHead=sHead)
    
tsEnd = pd.Timestamp.now()
print('Finished at', tsEnd, ': duration', str(tsEnd - tsStart).replace('0 days ', ''))

computed = True

In [None]:
# Look at results
results.dfTransData('fr')[['Espèce', 'Périodes', 'Préc.', 'Durée', 'Fn Clé',
                           'Sér Ajust', 'CodEx', 'NObs', 'AIC', 'Chi2 P', 'KS P', 
                           'Densité', 'CoefVar Densité', 'Min Densité', 'Max Densité']]

In [None]:
# Save results in case need for not recomputing them
resFileName = os.path.join(mcds.workDir, 'autods-validation-preresults.xlsx')

results.toExcel(resFileName, sheetName='AutoDSVal')

## 3b. Or : Load analyses from a previous run

(already run and saved above)

In [None]:
if not computed:
    
    resFileName = os.path.join(mcds.workDir, 'autods-validation-preresults.xlsx')
    print('Loading pre-results from {} ...'.format(resFileName))

    results.fromExcel(resFileName, sheetName='AutoDSVal')
    
else:
    
    print('Just computed, not reloading ...')
    
print('... {} pre-analyses loaded'.format(len(results)))

In [None]:
# Look at results
results.dfTransData('fr')[['Espèce', 'Périodes', 'Préc.', 'Durée', 'Fn Clé',
                           'Sér Ajust', 'CodEx', 'NObs', 'AIC', 'Chi2 P', 'KS P', 
                           'Densité', 'CoefVar Densité', 'Min Densité', 'Max Densité']]

## 4. Generate HTML and Excel reports

In [None]:
# Sélection des colonnes pour les tableaux de synthèse du rapport
synthCols = \
[
    ('sample', 'Species', 'Value'),
    ('sample', 'Periods', 'Value'),
    ('sample', 'Prec.', 'Value'),
    ('sample', 'Duration', 'Value'),
    
    ('parameters', 'estimator key function', 'Value'),
    ('parameters', 'estimator adjustment series', 'Value'),
    ('parameters', 'CV interval', 'Value'),
    ('parameters', 'left truncation distance', 'Value'),
    ('parameters', 'right truncation distance', 'Value'),
    ('parameters', 'model fitting distance cut points', 'Value'),
    ('parameters', 'distance discretisation cut points', 'Value'),
    
    ('run output', 'run status', 'Value'),
    
    ('encounter rate', 'number of observations (n)', 'Value'),
    
    ('detection probability', 'AIC value', 'Value'),
    ('detection probability', 'chi-square test probability determined', 'Value'),
    ('detection probability', 'Kolmogorov-Smirnov test probability', 'Value'),
    ('density/abundance', 'density of animals', 'Cv'),
    
    ('detection probability', 'effective strip width (ESW) or effective detection radius (EDR)', 'Value'),
    ('detection probability', 'effective strip width (ESW) or effective detection radius (EDR)', 'Lcl'),
    ('detection probability', 'effective strip width (ESW) or effective detection radius (EDR)', 'Ucl'),
    
    ('density/abundance', 'density of animals', 'Value'),
    ('density/abundance', 'density of animals', 'Lcl'),
    ('density/abundance', 'density of animals', 'Ucl'),
    
    ('detection probability', 'probability of detection (Pw)', 'Value'),
    ('detection probability', 'probability of detection (Pw)', 'Lcl'),
    ('detection probability', 'probability of detection (Pw)', 'Ucl'),
    ('detection probability', 'probability of detection (Pw)', 'Df'),

    ('run output', 'run folder', 'Value'),
]

In [None]:
# Select analysis results columns for the 3 textual columns of the synthesis pre-report
sampleCols = \
[
    ('sample', 'Species', 'Value'),
    ('sample', 'Periods', 'Value'),
    ('sample', 'Prec.', 'Value'),
    ('sample', 'Duration', 'Value')
]

paramCols = \
[
    ('parameters', 'estimator key function', 'Value'),
    ('parameters', 'estimator adjustment series', 'Value'),
    ('parameters', 'CV interval', 'Value')
]
    
resultCols = \
[
    ('run output', 'run status', 'Value'),
    
    ('encounter rate', 'number of observations (n)', 'Value'),
    
    ('detection probability', 'AIC value', 'Value'),
    ('detection probability', 'chi-square test probability determined', 'Value'),
    ('detection probability', 'Kolmogorov-Smirnov test probability', 'Value'),
    ('detection probability', 'probability of detection (Pw)', 'Value'),
    ('detection probability', 'effective strip width (ESW) or effective detection radius (EDR)', 'Value'),

    ('density/abundance', 'density of animals', 'Cv'),
    ('density/abundance', 'density of animals', 'Value'),
    ('density/abundance', 'density of animals', 'Lcl'),
    ('density/abundance', 'density of animals', 'Ucl'),
]

In [None]:
report = ads.MCDSResultsPreReport(resultsSet=results,
                                  title='Validation du module autods', subTitle='Rapport de pré-analyse',
                                  anlysSubTitle='Détail des pré-analyses', description='Qu\'ajouter de plus ?',
                                  keywords='autods, validation', pdfPlotHeight=384, lang='fr',
                                  sampleCols=sampleCols, paramCols=paramCols, resultCols=resultCols, anlysSynthCols=synthCols,
                                  attachedDir='.', tgtFolder=mcds.workDir, tgtPrefix='autods-validation-prereport')

In [None]:
htmlRep = report.toHtml()

HTML(f'Pré-rapport HTML : <a href="{htmlRep}" target="blank">{htmlRep}</a>')

In [None]:
xlsxRep = report.toExcel()

HTML(f'Rapport Excel : <a href="{xlsxRep}" target="blank">{xlsxRep}</a>')

# Bac à sable

In [None]:
df = results.dfTransData('fr')
df['x'] = ['C', 'F', 'B', 'E', 'A']
df.index = [3, 2, 4, 1, 5]
df[['x']]

In [None]:
sCurrUrl = df['x']
dfUrls = pd.DataFrame(dict(current=sCurrUrl, previous=np.roll(sCurrUrl, 1), next=np.roll(sCurrUrl, -1)))
dfUrls

In [None]:
pd.options.display.float_format

In [None]:
s = pd.Series(dict(a=1, b=2))

In [None]:
dfRes = results.dfTransData('fr')
dfRes

In [None]:
s = dfRes.loc[0]

In [None]:
df = s.to_frame(name='toto')

In [None]:
x = df.to_html(header=False, border=0, classes='layout-only')

In [None]:
re.sub('\\\n *', '', x)

In [None]:
dPlots = mcds.decodePlots('AutoDS/mcds-out/ALAARV-AB-10mn-6dec-hazard-simpoly-tgd-fitint-z1rrb_m3')

In [None]:
print('\n'.join(dPlots.keys()))

In [None]:
df = pd.DataFrame(data=[dict(x='toto', 
                             y="""<a href="./autods-validation-report.xlsx" target="_blank"><img height="72" style="margin-right: 16px" src="./fa-file-excel.svg"
                                       onmouseover="this.src='./fa-file-excel-hover.svg';"
                                       onmouseout="this.src='./fa-file-excel.svg';"
                                       title="Download Excel" alt="Dnld" /></a>""", z=2)])

In [None]:
df.to_html('AutoDS/mcds-out/test.html', escape=False)

In [None]:
dfDet = results.dfTransData('fr')

In [None]:
[results.dfCustomColTrans['fr'][col] for col in results.miCustomCols]

In [None]:
s = dfDet.loc[0, [results.dfCustomColTrans['fr'][col] for col in results.miCustomCols]]
s

In [None]:
', '.join(['{}={}'.format(k, v) for k, v in s.iteritems()])

In [None]:
print('#{}'.format(0), ' '.join(['{}={}'.format(k, v) for k, v in s.iteritems()]))

In [None]:
results.dfCustomColTrans['fr'].to_list()

In [None]:
dfSyn = results.dfTransData('fr', subset=synthCols)

In [None]:
sort=True
convert=True
round=True
style=True

In [None]:
self = report

In [None]:
def isNull(o):
    return not isinstance(o, list) and pd.isnull(o)

In [None]:
isNull(pd.NaT)

In [None]:
dfTrSynRes = results.dfTransData('fr', subset=synthCols)
dfTrSynRes

In [None]:
cChrGray = '#869074'
cBckGreen, cBckGray = '#e0ef8c', '#dae3cb'
cSclGreen, cSclOrange, cSclRed = '#cbef8c', '#f9da56', '#fe835a' #'#f25e2d'
scaledColors = [cSclGreen, cSclOrange, cSclRed]
scaledColorsRvd = list(reversed(scaledColors))

dExCodeColors = dict(zip([1, 2, 3], scaledColors))
def colorExecCodes(sCodes):
    return ['background-color: ' + dExCodeColors.get(c, dExCodeColors[3]) for c in sCodes]

def scaledColorV(v, thresholds, colors): # len(thresholds) == len(colors) - 1
    if pd.isnull(v):
        return cBckGray
    for ind, thresh in enumerate(thresholds):
        if v > thresh:
            return colors[ind]
    return colors[-1]
def scaledColorS(sValues, thresholds, colors):
    return ['background-color: ' + scaledColorV(v, thresholds, colors) for v in sValues]

densCVThresholds = [0.4, 0.1]

In [None]:
dfs = dfTrSynRes \
        .sort_values(by=['Espèce', 'Préc.', 'Durée']) \
        .style \
        .set_precision(3) \
        .apply(colorExecCodes, subset=['CodEx'], axis='columns') \
        .apply(scaledColorS, subset=['CoefVar Densité'], axis='columns',
               thresholds=densCVThresholds, colors=scaledColors) \
        .set_properties(subset=pd.IndexSlice[dfTrSynRes[~dfTrSynRes.CodEx.isin([1, 2])].index, :],
                         **{'color': cChrGray}) \
        .where(pd.isnull, 'color: transparent')

        #.set_properties(subset=pd.IndexSlice[dfTrSynRes[dfTrSynRes['Delta AIC'] == 0].index, :],
        #                **{'background-color': cBckGreen}) \

    #.format(lambda v: v if not pd.isnull(v) else '') # Détruit une partie des arrondis, auugmente la précision ???

    #.set_precision(3) # Not really usable, as only for the whole frame

    #.apply(lambda s: ['color: grey']*len(s), subset=pd.IndexSlice[dfTrSynRes[~dfTrSynRes.CodEx.isin([1, 2])].index, :],
    #       axis='index') # OK
    
    #.apply(lambda s: ['color: grey']*len(s), subset=dfTrSynRes[~dfTrSynRes.CodEx.isin([1, 2])].index,
    #       axis='index') # KO
    
dfs.to_excel('tmp/styled-results.xlsx')

dfs

In [None]:
df2 = dfTrSynRes.iloc[0].to_frame()
df2

In [None]:
df2.index

In [None]:
df2.loc['CodEx']

In [None]:
pd.IndexSlice[df2.loc['CodEx'].index, :]

In [None]:
dfs2 = df2.style

In [None]:
dfs2.apply(colorExecCodes, subset=['CodEx'], axis='index')

In [None]:
df2.style \
        .set_precision(3) \
        .apply(colorExecCodes, subset=['CodEx'], axis='index')

#        .apply(scaledColorS, subset=['CoefVar Densité'], axis='index',
#               thresholds=densCVThresholds, colors=scaledColors)
#
#        .set_properties(subset=[] if df2.loc['CodEx'].isin([1, 2]) else df2.index,
#                         **{'color': cChrGray})

In [None]:
from matplotlib.ticker import MultipleLocator
#, FormatStrFormatter, AutoMinorLocator)

tMax = 600

t = np.arange(0.0, tMax, 0.1)
s = np.sin(0.1 * np.pi * t) * np.exp(-t * 0.01)

fig, ax = plt.subplots(figsize=(16, 5))
ax.plot(t, s)
ax.grid(True)
ax.grid(True, which='minor')

# Make a plot with major ticks that are multiples of 20 and minor ticks that
# are multiples of 5.  Label major ticks with '%d' formatting but don't label
# minor ticks.
#ax.xaxis.set_major_locator(MultipleLocator(25))
#ax.xaxis.set_major_formatter(FormatStrFormatter('%d'))

# For the minor ticks, use no labels; default NullFormatter.
aTicks = ax.get_xticks()

ax.xaxis.set_minor_locator(MultipleLocator((aTicks[1]-aTicks[0])/5))

ax.tick_params(which='minor', grid_linestyle='-.', grid_alpha=0.6) #length=4, color='r')

plt.show()

In [None]:
x = ax.xaxis.get_major_ticks()[0]

In [None]:
ax.get_xticks()