<!-- Auto table of contents -->
<h1 class='tocIgnore'>Visionature : Préparation données points d'écoute printaniers pour le DS</h1>
<ul>
  <li>à partir des traces GPS,</li>
  <li>présentes dans les formulaires à partir Naturalist V0.128 (ou beta mai 2019),</li>
  <li>à condition de cocher la case "Enregistrer ma trace" en début de formulaire,</li>
  <li>via l'export Excel exclusivement
      (pas encore d'API pour ça, et absent des exports XML, JSON, KML, CSV en décembre 2019),</li>
  <li>avec la colonne "trace" sélectionnée dans l'export,</li>
  <li>uniquement via Faune-France (pas dispo. via les sites régionaux).</li>
</ul>
<div style="overflow-y: auto">
  <h2 class='tocIgnore'>Table des matières</h2>
  <div id="toc"></div>
</div>

In [None]:
%%javascript
$.getScript('ipython_notebook_toc.js')

# Imports

In [None]:
import sys
import os
import re

import pathlib as pl
import datetime as dt
from lxml import etree
import requests

import pandas as pd
import numpy as np
import math

import folium
import folium.plugins

import pyproj
from shapely import geometry

#import overpy
import geojson
import osm2geojson as o2g

import matplotlib.pyplot as plt

from collections import OrderedDict as odict
import json

from IPython.display import HTML

In [None]:
import autods as ads
import visionat as vsn

# Communs

In [None]:
# Logging configuration.
ads.logger('ads', level=ads.INFO, handlers=[sys.stdout, 'tmp/vndon.log'], verbose=True)
#ads.logger('ads.eng', level=ads.INFO)
#ads.logger('ads.opn', level=ads.DEBUG)
#ads.logger('ads.opr', level=ads.DEBUG)

ads.logger('visionat', level=ads.INFO, handlers=[sys.stdout, 'tmp/vndon.log'], verbose=True)

logger = ads.logger('vndon', level=ads.DEBUG, handlers=[sys.stdout, 'tmp/vndon.log'], verbose=True)

# I. Chargement des données (issues d'exports de Faune-France)

## 1. Paramètres d'import / filtrage (fichier, observateur, commentaire liste, ...)

In [None]:
# Par défaut.
feuille = 0
source = 'FA' # Faune-Auvergne
ignorerLignes = []
renommerCols = None
titreCarte = None

colLongTrace = None
avecTraces = False
garderCols = ['ID liste', 'Liste complète ?', 'Commentaire de la liste',
              'Date', 'Ref', 'Horaire', 'Lieu-dit', 'Commune', 'Nom latin',
              'Estimation', 'Nombre', 'Détails', 'Code atlas',
              'Lat (WGS84)', 'Lon (WGS84)', 'UTM X [m]', 'UTM Y [m]', 'Remarque', 'Trace']
garderAutreCols = []
calculerCols = dict()

groupage = False

In [None]:
# ZPS Crêtes du Cantal 2020 (Mathis Vérité)
dossier = 'donnees/cretes-cantal'
ficSrcVN = [dossier+'/Export PE Monts du Cantal - nettoyé.xlsx']
source = 'FF' # Faune-France
nomEtude = 'CretesPlombCantalZPS2020'
titreCarte = 'ZPS Crêtes et Plomb du Cantal 2020'
ignorerLignes = [0]
avecTraces = True

def numeroPoint(sObs):
    return int(re.compile('PE\s+(\d+)\s+').match(sObs['Commentaire de la liste']).group(1))
calculerCols = {'Num Point': numeroPoint}
garderAutreCols = ['Num Point']

dZoneEtude = dict(Zone='ZPS Cretes Plomb Cantal', Surface=6416) # ha

## 2. Chargement

In [None]:
assert source in ['FA', 'FF']

In [None]:
sightCountsParser = vsn.SexCatSightingCountsParser(atlasCodex='EBCC' if source == 'FF' else ''
                                                   categoryCols=['nMalAd', 'nAutAd', 'nJuv', 'nVol'])

In [None]:
if not renommerCols:
    renommerCols = dict()
if source == 'FF':
    renommerCols['Nom scientifique'] = 'Nom latin'

In [None]:
vnds = vsn.VisionatureDataSet(ficSrcVN, sheet=feuille, skipRows=ignorerLignes,
                              dRenameCols=renommerCols, dComputeCols=calculerCols, 
                              keepCols=garderCols, listExtraCols=garderAutreCols,
                              listsHaveTrace=avecTraces, listTraceLengthCol=colLongTrace,
                              sightCountsParser=sightCountsParser,
                              sightDistComperClass=vsn.PointTransectSightingDistanceComputer)

In [None]:
vnds.columns

In [None]:
vnds.dfData.tail()

In [None]:
#vnds.toExcel('tmp/sample.xlsx', subset=['Lat (WGS84)', 'Lon (WGS84)'])

# II. Filtrage des données

# III. Examen / correction des traces

## 1. ZPS Crêtes et Plomb du Cantal

(traces Naturalist + 3 manquantes lues dans KML)

#### a. Les traces Naturalist

In [None]:
vnds.lists()

In [None]:
dfTraces = vnds.listTraces()
dfTraces

In [None]:
dfTraces['ID liste'].unique(), dfTraces['ID liste'].nunique()

In [None]:
print('{} traces manquantes ... mais voir ci-dessous !'.format(len(vnds.lists()) - dfTraces['ID liste'].nunique()))

#### b. Les points sans trace (par oubli), via KML complémentaire

In [None]:
kmlRoot = etree.ElementTree().parse('donnees/cretes-cantal/Traces manquantes.kml')

In [None]:
def iterPlacemarks(kmlDoc, kmlNameSpaces):
    for pm in kmlDoc.findall('kml:Document/kml:Folder/kml:Placemark', namespaces=kmlNameSpaces):
        name = pm.find('kml:ExtendedData/kml:SchemaData/kml:SimpleData', namespaces=kmlNameSpaces).text
        long, lat = pm.find('kml:Point/kml:coordinates', namespaces=kmlNameSpaces).text.split(',')
        yield {'Num Point': int(name), 'lon':float(long), 'lat':float(lat)}

In [None]:
dfPointsManq = pd.DataFrame(data=list(iterPlacemarks(kmlRoot, vsn.KKmlNameSpaces)))
dfPointsManq.set_index('Num Point', inplace=True)
dfPointsManq

#### c. Les 'ID liste' associés au numéros de point

In [None]:
dfNumPoint2IdList = vnds.lists()[['ID liste', 'Num Point']].drop_duplicates().set_index('Num Point')
dfNumPoint2IdList

In [None]:
dfPointsManq = dfPointsManq.join(dfNumPoint2IdList)
dfPointsManq['ptIndx'] = 0
dfPointsManq

#### d. Remplacer les traces manquantes avec ces points complémentaires

In [None]:
vnds.setListTraces(dfPointsManq)
dfTraces = vnds.listTraces()
dfTraces

In [None]:
dfTraces['ID liste'].unique(), dfTraces['ID liste'].nunique()

#### e. Amélioration des traces : réduction au point moyen (outliers exclus)

In [None]:
vnds.improveTraces(keepPtsPct=70, maxMltStd=1.0)

vnds.listTraces()

#### f. Les points prévus à l'origine (théoriques), via KML

In [None]:
kmlRoot = etree.ElementTree().parse('donnees/cretes-cantal/POINTS_ECOUTE.kml')

In [None]:
dfPoints = pd.DataFrame(data=list(iterPlacemarks(kmlRoot, vsn.KKmlNameSpaces)))
dfPoints.set_index('Num Point', inplace=True)
dfPoints = dfPoints.join(dfNumPoint2IdList)
dfPoints['ptIndx'] = 0
dfPoints

# IV. Limites géographiques à superposer sur la carte

## 1. ZPS Crêtes et Plomb du Cantal

(polygones des limites de la zone d'étude)

In [None]:
kmlRoot = etree.ElementTree().parse('donnees/cretes-cantal/LIMITES_ZPS_ETENDUE.kml')

In [None]:
def iterPolygons(kmlDoc, kmlNameSpaces):
    polyInd = 0
    for polyCoords in kmlDoc.findall('kml:Document/kml:Folder/kml:Placemark/kml:MultiGeometry/'
                                     'kml:Polygon/kml:outerBoundaryIs/kml:LinearRing/kml:coordinates',
                                     namespaces=kmlNameSpaces):
        polyCoords = polyCoords.text
        dfPoly = pd.DataFrame(data=[[float(v) for v in point.split(',')] for point in polyCoords.split(' ')],
                              columns=['long', 'lat'])
        yield dfPoly
        polyInd += 1

gjZone = geojson.MultiPolygon([([tuple(nt)[1:] for nt in dfPoly.itertuples()],)
                               for dfPoly in iterPolygons(kmlRoot, vsn.KKmlNameSpaces)])

In [None]:
geometry.MultiPolygon()

# V. Décodage des effectifs comptés

In [None]:
vnds.computeSightingCounts()

vnds.columns

In [None]:
vnds.dfData[['ID liste', 'Horaire', 'Date', 'Nom latin',
             'Nombre', 'Détails', 'Code atlas', 'nMalAd', 'nAutAd', 'nJuv', 'nVol']].head(30)

In [None]:
dfObs = vnds.dfData

In [None]:
# Comparaison globale Détails et Nombre * Code Atlas
dfObs[['nMalAd', 'nAutAd', 'nJuv', 'nVol']].sum().sum(), dfObs['Nombre'].sum()

In [None]:
# Les données avec Détails et Nombre * Code Atlas incohérents
dfObs.loc[dfObs['Nombre'] != dfObs[['nMalAd', 'nAutAd', 'nJuv', 'nVol']].sum(axis='columns'),
          ['ID liste', 'Horaire', 'Date', 'Nom latin',
           'Nombre', 'Détails', 'Code atlas', 'nMalAd', 'nAutAd', 'nJuv', 'nVol']]

In [None]:
vnds.toExcel('tmp/after.xlsx') # => Vérifier à l'oeil les effectifs de détail

In [None]:
vnds.dfData['Détails'].fillna('').str.contains('vol').sum()

In [None]:
vnds.dfData.loc[vnds.dfData['Détails'].fillna('').str.contains('vol'),
                ['ID liste', 'Horaire', 'Date', 'Nom latin',
                 'Nombre', 'Détails', 'Code atlas', 'nMalAd', 'nAutAd', 'nJuv', 'nVol']]

In [None]:
vnds.dfData.loc[vnds.dfData['Code atlas'] == 2,
                ['ID liste', 'Horaire', 'Date', 'Nom latin',
                 'Nombre', 'Détails', 'Code atlas', 'nMalAd', 'nAutAd', 'nJuv', 'nVol']]

# VI. Calcul des distances observateur - oiseau

In [None]:
vnds.computeTraceSightDistances(distanceCol='Distance')

vnds.dfData[['ID liste', 'Horaire', 'Date', 'Nom latin',
             'Nombre', 'Détails', 'Code atlas', 'nMalAd', 'nAutAd', 'nJuv', 'nVol', 'Distance']].head(20)

In [None]:
np.histogram(vnds.dfData.Distance, bins=10)[0]

In [None]:
vnds.dfData.Distance.hist(figsize=(16, 4), bins=40)

# VII. Cartographie des données

In [None]:
def sight2String(sSight, fmt='{date} {heure}<br>{espece} {nombre} (code {atlas}) {distance} {obseur}<br>{comment}'):
    
    ref = sSight['Ref'] if 'Ref' in sSight.index else ''
    
    esp = ''.join(w[:4].title() for w in sSight['Nom latin'].split()) if 'Nom latin' in sSight.index else ''
    
    if 'nMalAd' in sSight.index:
        nbre = ', '.join(f'{col}={int(sSight[col])}' \
                         for col in ['nMalAd', 'nAutAd', 'nJuv', 'nVol'] \
                         if col in sSight.index and not pd.isnull(sSight[col]) and sSight[col] > 0)
    elif 'M' in sSight.index:
        nbres = dict()
        for col in ['M', 'F', 'Juv']:
            if col in sSight.index and not pd.isnull(sSight[col]):
                nbres[col] = sSight[col]
                if isinstance(sSight[col], float):
                    nbres[col] = int(nbres[col])
        nbre = ', '.join(f'{col}={nbres[col]}' for col in nbres)
    else:
        nbre = str(int(sSight.Nombre))
        if not pd.isnull(sSight['Détails']):
            nbre += f"[{sSight['Détails']}]"
        
    dist = ('d={}m'.format(int(sSight['Distance'])) if 'Distance' in sSight.index else ''
    
    codAtlas = 0 if pd.isnull(sSight['Code atlas']) else int(sSight['Code atlas'])
    
    date = sSight.Date.date().isoformat() if 'Date' in sSight.index else ''
                                
    heure = sSight.Horaire if 'Horaire' in sSight.index else ''
                                
    obseur = sSight.Observateur if 'Observateur' in sSight.index else ''
                                
    comment = sSight.Commentaires if 'Commentaires' in sSight.index and not pd.isnull(sSight.Commentaires) else ''
                                
    return fmt.format(date=date, heure=heure, espece=esp, nombre=nbre, atlas=codAtlas,
                      distance=dist, obseur=obseur, comment=comment).strip().replace('  ', ' ')

In [None]:
# Serveurs et couches carto. pour folium / Leaflet
mdOSM = dict(tiles='http://{s}.tile.openstreetmap.org/{z}/{x}/{y}.png', 
             attr='Open Street Map',
             name='Open Street Map', max_zoom=22, photo=False)

mdOTM = dict(tiles='http://{s}.tile.opentopomap.org/{z}/{x}/{y}.png',
             attr='<a href="https://opentopomap.org/">OpenTopoMap</a> '
                  '(<a href="https://creativecommons.org/licenses/by-sa/3.0/">CC-BY-SA</a>)',
             name='Open Topo Map', max_zoom=22, photo=False)
mdThOut = dict(tiles='https://{s}.tile.thunderforest.com/outdoors/{z}/{x}/{y}.png',
               attr='Thunderforest Outdoors', 
               name='Thunderforest Outdoors', max_zoom=22, photo=False)

mdSatArcGis = dict(tiles='https://server.arcgisonline.com/ArcGIS/rest/services/World_Imagery/MapServer/tile/{z}/{y}/{x}',
                   attr='Tiles &copy; Esri &mdash; Source: Esri, i-cubed, USDA, USGS, AEX, GeoEye, Getmapping, Aerogrid,'
                        ' IGN, IGP, UPR-EGP, and the GIS User Community',
                   name='ArcGIS Satellite', max_zoom=22, photo=True)

mdIGNMaps = dict(tiles='https://wxs.ign.fr/pratique/geoportail/wmts?'
                       '&REQUEST=GetTile&SERVICE=WMTS&VERSION=1.0.0&TILEMATRIXSET=PM'
                       '&LAYER=GEOGRAPHICALGRIDSYSTEMS.MAPS&STYLE=normal&FORMAT=image/jpeg'
                       '&TILECOL={x}&TILEROW={y}&TILEMATRIX={z}',
                 attr='&copy; <a href="http://www.ign.fr/">IGN</a>',
                 name='Cartes IGN', max_zoom=22, photo=False)
mdIGNOPhoto = dict(tiles='https://wxs.ign.fr/pratique/geoportail/wmts?'
                         '&REQUEST=GetTile&SERVICE=WMTS&VERSION=1.0.0&TILEMATRIXSET=PM'
                         '&LAYER=ORTHOIMAGERY.ORTHOPHOTOS&STYLE=normal&FORMAT=image/jpeg'
                         '&TILECOL={x}&TILEROW={y}&TILEMATRIX={z}',
                   attr='&copy; <a href="http://www.ign.fr/">IGN</a>',
                   name='OrthoPhoto IGN', max_zoom=22, photo=True)

In [None]:
# Colors for sightings (with atlas code) and traces,
# whether the map layer is a photo (True => dark) or not (False => clear).
DColors = { True: dict(sight=dict(none='blue', poss='yellow', prob='orange', sure='red', stroke='gold'),
                       trace=dict(point='yellowgreen', segment='yellowgreen')), 
            False: dict(sight=dict(none='blue', poss='yellow', prob='orange', sure='red', stroke='gold'),
                        trace=dict(point='green', segment='green')) }

def atlasCode2NestingCode(atlasCode): #atlasCodex='Biolovision'):
    if atlasCode in [None, 0, 99]:
        return 'none'
    elif atlasCode >= 9:
        return 'sure'
    elif atlasCode >= 4:
        return 'prob'
    else:
        return 'poss'
    
def color(mapSrc, sightPoint=False, stroke=False, atlasCode=None, tracePoint=False, traceSegment=False):
    isMapDark = mapSrc['photo']
    dColors = DColors[isMapDark]
    if sightPoint:
        clr = dColors['sight']['stroke' if stroke else atlasCode2NestingCode(atlasCode)]
    elif tracePoint:
        clr = dColors['trace']['point']
    elif traceSegment:
        clr = dColors['trace']['segment']
    else:
        raise Exception('No target selected for color')
        
    return clr
    
def buildMap(dfSights=None, sightTitle='Observations', dfTraces=None, tracesTitle='Trace',
             dfPoints1=None, points1Title='Points1', dfPoints2=None, points2Title='Points2',
             geoBounds=None, geoBoundsTitle='Limites', mapLayers=[], mapTitle='Carte', scale=True,
             sightLatLonCols=['Lat (WGS84)', 'Lon (WGS84)'], clusterSights=False,
             sightFmt='{date} {heure}<br>{espece}{nombre} (code {atlas}) {distance}',
             tracesLatLonCols=['lat', 'lon'], tracesPointIndexCol='ptIndx', tracesListIndexCol='ID liste'):
    
    if not isinstance(mapLayers, list):
        mapLayers = [mapLayers]

    mp = folium.Map(tiles=None, control_scale=scale)
    
    for mapLayer in mapLayers:
        folium.TileLayer(**mapLayer).add_to(mp)
        
    # Boundaries layer
    if geoBounds:
        gb = folium.GeoJson(geoBounds, name=geoBoundsTitle)
        mp.add_child(gb)
        
    # Points1 layer
    latCol, lonCol = tracesLatLonCols
    if dfPoints1 is not None and not dfPoints1.empty:
        
        fg = folium.FeatureGroup(name=points1Title or 'Points1')

        # Prepare traces (lines between points).
        dfPoints1 = dfPoints1[[tracesListIndexCol, tracesPointIndexCol] + tracesLatLonCols]
        dfPoints1.set_index(tracesListIndexCol, drop=True, inplace=True)

        # Draw each points
        for trcId, sPt in dfPoints1.iterrows():
            mrk = folium.CircleMarker(location=(sPt[latCol], sPt[lonCol]), 
                                      popup=folium.Popup('#{}'.format(trcId)),
                                      color='red', radius=4, weight=2, fill=True)
            mrk.add_to(fg)
                
        fg.add_to(mp)

    # Points2 layer
    if dfPoints2 is not None and not dfPoints2.empty:
        
        fg = folium.FeatureGroup(name=points2Title or 'Points2')

        # Prepare traces (lines between points).
        dfPoints2 = dfPoints2[[tracesListIndexCol, tracesPointIndexCol] + tracesLatLonCols]
        dfPoints2.set_index(tracesListIndexCol, drop=True, inplace=True)

        # Draw each points
        for trcId, sPt in dfPoints2.iterrows():
            mrk = folium.CircleMarker(location=(sPt[latCol], sPt[lonCol]), 
                                      popup=folium.Popup('#{}'.format(trcId)),
                                      color='blue', radius=4, weight=2, fill=True)
            mrk.add_to(fg)
                
        fg.add_to(mp)

    # Traces layer
    if dfTraces is not None and not dfTraces.empty:
        
        fg = folium.FeatureGroup(name=tracesTitle or 'Traces')

        # Prepare traces (lines between points).
        dfTraces = dfTraces[[tracesListIndexCol, tracesPointIndexCol] + tracesLatLonCols]
        dfTraces.set_index(tracesListIndexCol, drop=True, inplace=True)

        # Draw each trace
        for trcId in dfTraces.index.unique():

            dfTrace = dfTraces.loc[trcId:trcId]  # Make sure we get a DataFrame even if only 1 row
            if len(dfTrace) > 1:
                
                dfTrace = dfTrace.append(dfTrace.iloc[-1])  # Duplicate last points to them keep all after shift below
                dfTrace[lonCol+'_sfd'] = dfTrace[lonCol].shift(-1)
                dfTrace[latCol+'_sfd'] = dfTrace[latCol].shift(-1)
                dfTrace.dropna(inplace=True)
            
                # a. Lines between points points
                lines = list(zip(zip(dfTrace[latCol], dfTrace[lonCol]),
                                 zip(dfTrace[latCol+'_sfd'], dfTrace[lonCol+'_sfd'])))[:-1]
                pline = folium.PolyLine(lines, color=color(mapLayers[0], traceSegment=True),
                                        weight=1, opacity=0.6, popup=folium.Popup(f'Trace #{trcId}'))
                pline.add_to(fg)

            # b. Points
            for _, sPt in dfTrace.iterrows():
                mrk = folium.CircleMarker(location=(sPt[latCol], sPt[lonCol]), 
                                          popup=folium.Popup('#{}: {}'.format(trcId, sPt[tracesPointIndexCol])),
                                          color=color(mapLayers[0], tracePoint=True),
                                          radius=2, weight=2, fill=True)
                mrk.add_to(fg)
                
        fg.add_to(mp)

    # Sightings
    if dfSights is not None and not dfSights.empty:
        
        fg = folium.FeatureGroup(name=sightTitle or 'Observations')

        latCol, lonCol = sightLatLonCols
        if clusterSights:
            mc = folium.plugins.MarkerCluster(name=sightTitle, control=False, 
                                              options=dict(maxClusterRadius=160, spiderfyOnMaxZoom=True,
                                                           disableClusteringAtZoom=12))
            fg.add_child(mc)
        else:
            mc = fg

        for indSight, sSight in dfSights.iterrows():
            mrk = folium.CircleMarker(location=(sSight[latCol], sSight[lonCol]),
                                      color=color(mapLayers[0], sightPoint=True, stroke=True),
                                      radius=8, weight=1, fill_opacity=0.8,
                                      fill_color=color(mapLayers[0], sightPoint=True, atlasCode=sSight['Code atlas']),
                                      popup=folium.Popup(sight2String(sSight, sightFmt), max_width=256))
            mc.add_child(mrk)
        
        fg.add_to(mp)

    # Layer control.
    if len(mapLayers) > 1 or geoBounds or dfTraces is not None or dfSights is not None:
        lc = folium.LayerControl(collapsed=False)
        mp.add_child(lc)

    # Title
    if mapTitle:
        #mapTitle = codecs.decode(codecs.encode(mapTitle, encoding='utf-8'), encoding='utf-8-sig')
        htmlTitle = f"""
          <div style="position: fixed; bottom: 0px; left: 100px; z-index:9999">
            <p style="font-size:20px; padding: 1px 1px 1px 5px; background-color: white; border-radius: 5px;
                      box-shadow: 0 4px 8px 0 rgba(0, 0, 0, 0.2), 0 6px 20px 0 rgba(0, 0, 0, 0.2);">{mapTitle}</p>
          </div>
          """
        mp.get_root().html.add_child(folium.Element(htmlTitle))
                
    mp.fit_bounds(mp.get_bounds())
    
    return mp

## 1. Carte avec limites zone d'étude

In [None]:
# Carte avec limites zone d'étude
mp = buildMap(vnds.dfData, sightTitle='Données', clusterSights=True,
              dfTraces=dfTraces, tracesTitle='Traces GPS', 
              dfPoints1=vnds.listTraces(), points1Title='Points retenus',
              dfPoints2=dfPoints, points2Title='Points prévus',
              geoBounds=gjZone, geoBoundsTitle='Zone d\'étude',
              mapLayers=[mdOTM, mdSatArcGis, mdIGNOPhoto, mdIGNMaps], mapTitle=titreCarte,
              sightFmt='{date} {heure}<br>{espece} {nombre} (code {atlas}) {distance}<br>{comment}')
mp

In [None]:
# Save map as shareable / web-publishable interactive one.
mp.save(f'tmp/{nomEtude}.html')

# Note: For auto selection of 1 map layer, remove / comment-out its "tile_layer_<uuid>.remove()" line
# after searching for its uuid by name

In [None]:
vnds.toExcel('tmp/_.xlsx')

# VII. Extraction des inventaires

In [None]:
vnds.lists()

In [None]:
# Les inventaires (les transects)
dfTransects = vnds.lists(columns=['ID liste', 'Date', 'Num Point']).copy()
dfTransects

In [None]:
dfTransects['Num Point'].value_counts().value_counts()

# VIII. Individualisation des données

In [None]:
vnds.columns

In [None]:
# Colonnes d'effectifs à prendre en compte (on ignore les autres)
countCols =  ['nMalAd', 'nAutAd']

# Calcul des catégories : 1 seule, "Adulte" = Mâle ou Autre.
def count2AdultCat(sCounts):
    return 'm' if 'Mal' in sCounts[sCounts > 0].index[0] else 'a'

# Création d'un FieldDataSet
fds = ads.FieldDataSet(vnds.sightings(), countCols=countCols, addMonoCatCols={ 'Adulte': count2AdultCat })

# ... pour individualiser et catégoriser les données.
dfObsCatIndiv = fds.individualise()
print(dfObsCatIndiv[countCols].sum().to_dict(), 'individus')

# On ne garde que les colonnes utiles (comptes à 0 ou 1 <=> catégories), et avec des noms améliorés
dfObsCatIndiv = dfObsCatIndiv[['ID liste', 'Date', 'Num Point', 'Nom latin', 'Distance', 'Adulte']].copy()
dfObsCatIndiv.rename(columns={ 'Nom latin': 'Espèce' }, inplace=True)

print(len(dfObsCatIndiv), 'individus au total')

In [None]:
dfObsCatIndiv.head()

In [None]:
# Export des données et inventaires.
nomFicCible = pl.Path(dossier) / f'{nomEtude}-ObsIndiv.xlsx'

with pd.ExcelWriter(nomFicCible) as xlsWriter:
    
    dfObsCatIndiv.to_excel(xlsWriter, index=False, sheet_name='Donnees')
    dfTransects.to_excel(xlsWriter, index=False, sheet_name='Inventaires')
    
print(nomFicCible)

# IX. Export pour analyses dans Distance

In [None]:
especes = 20

In [None]:
# Examen des données : Nombre d'individus par espèce, pour voir quelles espèces on va analyser
if groupage: # Clustering lors des analyses DS
    dfIndivCounts = dfObsCatIndiv[['Espèce', 'Nombre']].groupby('Espèce').sum()
    dfIndivCounts.rename(columns=dict(Nombre='Individus'), inplace=True)
else:
    dfIndivCounts = dfObsCatIndiv[['Espèce', 'Distance']].groupby('Espèce').count()
    dfIndivCounts.rename(columns=dict(Distance='Individus'), inplace=True)

dfIndivCounts.sort_values(by='Individus', ascending=False, inplace=True)

dfIndivCounts[dfIndivCounts.Individus >= especes if isinstance(especes, int) else 20]

In [None]:
# Spec. implicite des variantes (=> combinaisons à générer automatiquement)
# a. 1 variante espèce ... par espèce <8-]
assert isinstance(especes, list) or isinstance(especes, int)
if isinstance(especes, list):
    varEspeces = especes
else:
    varEspeces = list(dfIndivCounts[dfIndivCounts.Individus >= especes].index)

# b. Variantes adultes.
varAdultes = ['m', 'm+a'] # Tous les adultes ensemble => 1 variante

# c. Variantes passages (= dates, car pas plus d'1 passage par jour).
varPassages = [''] # Tous les passages ensemble => 1 variante

# c. La spec. des variantes
dImplSamples = { 'Espèce': varEspeces, 'Adulte': varAdultes, 'Passage': varPassages }
dImplSamples

In [None]:
# Explicitation des specs.
dfExplSampleSpecs = ads.DSAnalyser.explicitVariantSpecs(odict([('echant_impl', dImplSamples)]))
dfExplSampleSpecs

In [None]:
dfObsCatIndiv.head()

In [None]:
# Jeu de données individualisées.
mds = ads.MonoCategoryDataSet(dfObsCatIndiv, dfTransects=dfTransects, dSurveyArea=dZoneEtude,
                              transectPlaceCols=['Num Point'], passIdCol='Passage', effortCol='Effort',
                              sampleDecFields=['Distance'])

In [None]:
# Chaîne courte d'identification d'une spec. d'échantillon.
def sampleAbbreviation(sSample):
    
    abrvSpe = ''.join(word[:4].title() for word in sSample['Espèce'].split(' ')[:2])
    
    sampAbbrev = '{}-{}'.format(abrvSpe, sSample.Adulte.replace('+', ''))
    
    return sampAbbrev

In [None]:
logger.info('Export au format Distance : C\'est parti ...')

# Moteur MCDS pour l'export.
mcds = ads.MCDSEngine(workDir=dossier,
                      distanceUnit='Meter', areaUnit='Hectare',
                      surveyType='Point', distanceType='Radial', clustering=groupage)

# Pour chaque échantillon :
for sampInd, sSamp in dfExplSampleSpecs.iterrows():
    
    sampAbbrev = sampleAbbreviation(sSamp)

    # Selection des données
    sds = mds.sampleDataSet(sSamp)
    if not sds:
        logger.info('#{:02d} : {} => Pas de données, pas de fichier'.format(sampInd+1, sampAbbrev))
        continue

    # Export au format Distance
    fpn = pl.Path(dossier) / f'{sampAbbrev}-dist.txt'
    fpn = mcds.buildDistanceDataFile(sds, tgtFilePathName=fpn)

    logger.info('#{:02d} : {} => {}'.format(sampInd+1, sampAbbrev, fpn.name))

# Arrêt moteur.
mcds.shutdown()

# Terminé.
logger.info('Terminé.')

# A. Mise au point : Amélioration traces en mode "point transect"

In [None]:
dfTraces

In [None]:
def polyLine2MeanPoint(dfTrc, keepPtsPct=70, maxMltStd=1.0):

    """Reduce given WGS84 trace to its mean point, after removing outlier points
    
    Parameters:
    :param dfTrc: the trace to improve, as a DataFrame with at least TraceWgs84Cols columns
    :param keepPtsPct: min percentage of points to keep for final mean computation
    :param maxMltStd: max number of std value times for keeping points for final mean computation
    """

    # Compute raw mean point and standard deviation (independently for lat and long)
    sRawMean = dfTrc[['lon', 'lat']].mean()
    sRawStd = dfTrc[['lon', 'lat']].std()
    
    # Compute distance to raw mean for each point
    dfImpTrc = dfTrc.copy()  # Don't change input trace
    dfImpTrc['dmlon'] = (dfImpTrc['lon'] - sRawMean['lon']).abs()
    dfImpTrc['dmlat'] = (dfImpTrc['lat'] - sRawMean['lat']).abs()
    
    # Keep only non-outlier points: numerous enough (keepPtsPct), or close enough to mean (maxMltStd)
    dfImpTrc = dfImpTrc[(dfImpTrc['dmlat'] <= max(dfImpTrc['dmlat'].quantile(keepPtsPct/100),
                                                  maxMltStd*sRawStd['lat']))
                        & (dfImpTrc['dmlon'] <= max(dfImpTrc['dmlon'].quantile(keepPtsPct/100),
                                                    maxMltStd*sRawStd['lon']))]
         
    return pd.Series(data=[0, dfTrc['lon'].mean(), dfTrc['lat'].mean()],
                     index=['ptIndx', 'lon', 'lat'])

In [None]:
KImproveTraceKeepPtsPct = 70
KImproveTraceMaxMltStd = 1.0
dfImpTraces = dfTraces.groupby('ID liste') \
                      .apply(polyLine2MeanPoint, keepPtsPct=KImproveTraceKeepPtsPct, maxMltStd=KImproveTraceMaxMltStd) \
                      .reset_index()
dfImpTraces

In [None]:
dfTraceStats = dfTraces.join(dfImpTraces[['ID liste', 'lon', 'lat']].set_index('ID liste'), on='ID liste', rsuffix='_imp')
dfTraceStats = dfTraceStats[['ID liste', 'lon', 'lon_imp', 'lat', 'lat_imp']]
dfTraceStats

In [None]:
dfTraceStats['ID liste'].unique()

In [None]:
dfTraceStats[dfTraceStats['ID liste'] == 1028708]

In [None]:
# Carte avec limites zone d'étude
mp = buildMap(vnds.dfData, sightTitle='Données', clusterSights=True,
              dfTraces=dfTraces, tracesTitle='Traces',
              dfPoints1=dfImpTraces, points1Title='Points moyens',
              dfPoints2=dfPoints, points2Title='Points prévus',
              geoBounds=gjZone, geoBoundsTitle='Zone d\'étude',
              mapLayers=[mdOTM, mdSatArcGis, mdIGNOPhoto, mdIGNMaps], mapTitle=titreCarte,
              sightFmt='{date} {heure}<br>{espece}{nombre} (code {atlas})<br>{comment}')
mp

In [None]:
# Save map as shareable / web-publishable interactive one.
mp.save(f'tmp/{nomEtude}.html')

# Note: For auto selection of 1 map layer, remove / comment-out its "tile_layer_<uuid>.remove()" line
# after searching for its uuid by name

Pb 1035425 : pas assez d'ouliers virés ? (trace étirée en lat)
Pb 1039772 : pas assez d'ouliers virés ? (trace étirée en lat)
Pb 1091665 : formulaire pas fermé assez tôt ?

# B. Mise au point : Distances en mode "point transect"

(vérification paranoïaque fonction geometry.Point().distance() ... suite échelle carte imprécise ...
 ouf, elle fonctionne :-O)

In [None]:
dfListTraces = vnds.listTraces()
dfListTraces

In [None]:
dfObs = vnds.dfData

In [None]:
idListe = 1028708

In [None]:
dfObs1Trans = dfObs[dfObs['ID liste'] == idListe].copy()[['ID liste', 'UTM X [m]', 'UTM Y [m]', 'Distance']]
dfObs1Trans

In [None]:
sListCoords = dfListTraces[dfListTraces['ID liste'] == idListe].iloc[0]
dfObs1Trans['lonUtm'] = sListCoords['lonUtm']
dfObs1Trans['latUtm'] = sListCoords['latUtm']
dfObs1Trans

In [None]:
dfObs1Trans['Distance2'] = dfObs1Trans.apply(lambda s: np.sqrt((s['UTM X [m]'] - s.lonUtm)*(s['UTM X [m]'] - s.lonUtm)
                                                               + (s['UTM Y [m]'] - s.latUtm)*(s['UTM Y [m]'] - s.latUtm)),
                                            axis='columns')
dfObs1Trans

In [None]:
assert ((dfObs1Trans.Distance - dfObs1Trans.Distance2) < 1.e-5).all()

# C. Astuce : Calcul surface multi-polygones source geojson

In [None]:
# Grabbed from https://www.programcreek.com/python?code=Wireless-Innovation-Forum%2FSpectrum-Access-System%2FSpectrum-Access-System-master%2Fsrc%2Fharness%2Freference_models%2Fgeo%2Futils.py
def geoJson2ShapelyGeometry(gjGeo):
    
    """Returns a |shapely| geometry from a GeoJSON geometry.
  
    Args:
      gjGeo: A dict or string representing a GeoJSON geometry.
  
    Raises:
      ValueError: If invalid GeoJSON geometry is passed.
    """
    
    if not isinstance(gjGeo, dict) or 'type' not in gjGeo:
        raise ValueError('Invalid GeoJSON geometry.')
  
    if 'geometries' in gjGeo:
        return sgeo.GeometryCollection([geoJson2ShapelyGeometry(g) for g in gjGeo['geometries']])
    gjGeo = geometry.shape(gjGeo)
    if isinstance(gjGeo, geometry.Polygon) or isinstance(gjGeo, geometry.MultiPolygon):
        gjGeo = gjGeo.buffer(0)
        
    return gjGeo

In [None]:
shgeoZone = geoJson2ShapelyGeometry(gjZone)
shgeoZone

In [None]:
# Surface en degrés**2 ! (puisque coordonnées de la zone en degrés WGS84)
shgeoZone.area

In [None]:
# FAUX: Surface en km**2 (à la louche, terre = sphère parfaite de rayon 6370 km)
shgeoZone.area * (6370 * math.pi / 180)

# => Mais c'est complètement faux : 6416 ha ! (Cf. KML, c'est écrit dedans)

# => il faudrait d'abord projeter le multipolygone en UTM31, avant de redemander .area
# => ce qui est expliqué ici : https://stackoverflow.com/a/21420950