diff --git a/.github/workflows/build.yml b/.github/workflows/build.yml new file mode 100644 index 000000000..113bf1193 --- /dev/null +++ b/.github/workflows/build.yml @@ -0,0 +1,59 @@ +name: Build + +on: + push: + branches: + - '*' + tags: + - '*' + +jobs: + + build: + + runs-on: ubuntu-latest + steps: + - name: Checkout project sources + uses: actions/checkout@v4 + + - name: Setup Java + uses: actions/setup-java@v3 + with: + java-version: '11' + distribution: 'temurin' + + - name: Setup Gradle + uses: gradle/gradle-build-action@v2 + + - name: Run build with Gradle Wrapper + run: ./gradlew build -x test + + - name: Upload war + uses: actions/upload-artifact@v3 + with: + name: package + path: build/libs + + - name: Log in to the Container registry + if: github.event_name == 'push' && contains(github.ref, 'refs/tags/') + uses: docker/login-action@v3 + with: + registry: ghcr.io + username: ${{ github.actor }} + password: ${{ secrets.GITHUB_TOKEN }} + + - name: Extract metadata (tags, labels) for Docker + if: github.event_name == 'push' && contains(github.ref, 'refs/tags/') + id: meta + uses: docker/metadata-action@v5 + with: + images: ghcr.io/${{ github.repository }} + + - name: Build and push Docker image + if: github.event_name == 'push' && contains(github.ref, 'refs/tags/') + uses: docker/build-push-action@v5 + with: + context: . + push: true + tags: ${{ steps.meta.outputs.tags }} + labels: ${{ steps.meta.outputs.labels }} diff --git a/Dockerfile b/Dockerfile new file mode 100644 index 000000000..b90d7204e --- /dev/null +++ b/Dockerfile @@ -0,0 +1,29 @@ +FROM tomcat:9.0-jdk11-temurin + +ENV TZ=Europe/Stockholm + +RUN mkdir -p \ + /data/biocache/config \ + /data/biocache/heatmap \ + /data/biocache/tmp \ + /data/biocache-load \ + /data/biocache-media \ + /data/biocache-upload \ + /data/biocache-delete \ + /data/biocache-download/tmp \ + /data/cache \ + /data/offline/exports \ + /data/logger-client/config + +COPY sbdi/data/config/*.json /data/biocache/config/ +COPY sbdi/data/config/*.xml /data/biocache/config/ +COPY sbdi/data/config/*.html /data/biocache/config/ + +COPY build/libs/biocache-service-*.war $CATALINA_HOME/webapps/ws.war + +ENV DOCKERIZE_VERSION v0.7.0 + +RUN apt-get update \ + && apt-get install -y wget \ + && wget -O - https://github.com/jwilder/dockerize/releases/download/$DOCKERIZE_VERSION/dockerize-linux-amd64-$DOCKERIZE_VERSION.tar.gz | tar xzf - -C /usr/local/bin \ + && apt-get autoremove -yqq --purge wget && rm -rf /var/lib/apt/lists/* diff --git a/Makefile b/Makefile new file mode 100644 index 000000000..64c1a9080 --- /dev/null +++ b/Makefile @@ -0,0 +1,14 @@ +run: + docker compose up cassandra solr --detach + ./gradlew bootRun + +# Change cassandra and solr connections in biocache-config.properties for this to work +# Replace localhost with cassandra and solr respectively +# Also, the service may fail on startup if cassandra isn't ready. Just restart the service if that happens. +run-docker: + ./gradlew clean build -x test + docker compose build --no-cache + docker compose up --detach + +release: + ../sbdi-install/utils/make-release.sh diff --git a/docker-compose.yml b/docker-compose.yml new file mode 100644 index 000000000..e4d6c14d6 --- /dev/null +++ b/docker-compose.yml @@ -0,0 +1,40 @@ +version: '3.8' + +services: + + cassandra: + image: cassandra:3.11 + environment: + MAX_HEAP_SIZE: 128M + HEAP_NEWSIZE: 24M + ports: + - 9042:9042 + volumes: + - db_cassandra:/var/lib/cassandra + + solr: + image: ghcr.io/biodiversitydata-se/biocache-solr:1.0.0 + #image: sbdi/biocache-solr-dev + environment: + SOLR_HEAP: 4g + SOLR_OPTS: "-Dlog4j2.formatMsgNoLookups=true" + #build: + # context: ../biocache-solr + ports: + - "8983:8983" + volumes: + - data_solr:/var/solr + + biocache-service: + image: sbdi/biocache-service-dev + build: + context: . + ports: + - 8080:8080 + volumes: + - ./sbdi/data/config/biocache-config.properties:/data/biocache/config/biocache-config.properties + - ./sbdi/data/config/logger-client.properties:/data/logger-client/config/logger-client.properties + +volumes: + db_cassandra: + data_solr: diff --git a/sbdi/README.md b/sbdi/README.md new file mode 100644 index 000000000..2578936b9 --- /dev/null +++ b/sbdi/README.md @@ -0,0 +1,54 @@ +# Biocache-service + +## Setup +This service requires a rather elaborate directory and file structure in `/data/`: +``` +mats@xps-13:/data$ tree biocache* cache offline logger-client +biocache +├── config +│ ├── applicationContext.xml -> /home/mats/src/biodiversitydata-se/biocache-service/sbdi/data/config/applicationContext.xml +│ ├── biocache-config.properties -> /home/mats/src/biodiversitydata-se/biocache-service/sbdi/data/config/biocache-config.properties +│ ├── download-csdm-email.html -> /home/mats/src/biodiversitydata-se/biocache-service/sbdi/data/config/download-csdm-email.html +│ ├── download-doi-email.html -> /home/mats/src/biodiversitydata-se/biocache-service/sbdi/data/config/download-doi-email.html +│ ├── download-doi-readme.html -> /home/mats/src/biodiversitydata-se/biocache-service/sbdi/data/config/download-doi-readme.html +│ ├── download-email.html -> /home/mats/src/biodiversitydata-se/biocache-service/sbdi/data/config/download-email.html +│ ├── download-readme.html -> /home/mats/src/biodiversitydata-se/biocache-service/sbdi/data/config/download-readme.html +│ ├── facets.json -> /home/mats/src/biodiversitydata-se/biocache-service/sbdi/data/config/facets.json +│ ├── groups.json -> /home/mats/src/biodiversitydata-se/biocache-service/sbdi/data/config/groups.json +│ ├── pipelines-field-config.json -> /home/mats/src/biodiversitydata-se/biocache-service/sbdi/data/config/pipelines-field-config.json +│ └── subgroups.json -> /home/mats/src/biodiversitydata-se/biocache-service/sbdi/data/config/subgroups.json +├── heatmap +└── tmp +biocache-delete +biocache-download +biocache-load +biocache-media +biocache-upload +cache +offline +└── exports +logger-client +└── config + └── logger-client.properties -> /home/mats/src/biodiversitydata-se/biocache-service/sbdi/data/config/logger-client.properties +``` + +## Usage + +Run locally: +``` +make run +``` + +Build and run in Docker (using Tomcat): +``` +make run-docker +``` + +Make a release. This will create a new tag and push it. A new Docker container will be built on Github. +``` +mats@xps-13:~/src/biodiversitydata-se/biocache-service (master *)$ make release + +Current version: 1.0.1. Enter the new version (or press Enter for 1.0.2): +Updating to version 1.0.2 +Tag 1.0.2 created and pushed. +``` \ No newline at end of file diff --git a/sbdi/data/config/applicationContext.xml b/sbdi/data/config/applicationContext.xml new file mode 100644 index 000000000..f8fb6ce4a --- /dev/null +++ b/sbdi/data/config/applicationContext.xml @@ -0,0 +1,36 @@ + + + + + + \ No newline at end of file diff --git a/sbdi/data/config/biocache-config.properties b/sbdi/data/config/biocache-config.properties new file mode 100644 index 000000000..cc51978a5 --- /dev/null +++ b/sbdi/data/config/biocache-config.properties @@ -0,0 +1,495 @@ +###################################################################### +# +# Biocache configuration file +# +# This file has been generated via an ansible script. +# +###################################################################### + +server.port=8080 +server.servlet.context-path=/ws + +# The email address of the technical contact +technical.contact=support@biodiversitydata.se + +# Whether to enable performance analysis using JMX +jmx.debug.enabled=true + +# The base URL for biocache web services +webservices.root=https://records.biodiversitydata.se/ws + +# Cassandra Config +db=cassandra3 +# cassandra hosts - this should be comma separated list in the case of a cluster +cassandra.hosts=localhost +#cassandra.hosts=cassandra +cassandra.port=9042 +#local.node.ip=127.0.0.1 +cassandra.pool=biocache-store-pool +cassandra.keyspace=biocache +cassandra.max.connections=-1 +cassandra.max.retries=6 +thrift.operation.timeout=80000 +cluster.size=1 +node.number=0 +cassandra.async.updates.enabled = false +cassandra.async.updates.threads = 1 +cassandra.async.paging.enabled = false +cassandra.token.split = 10 + +# The number of concurrent Cassandra update threads +cassandra.update.threads=1 + +# Record page size used by processing, indexing and other operations using a full table scan +cassandra.fetch.size=500 + +# Database read timeout in milliseconds +cassandra.timeout=120000 + +# Zookeeper address - used to retrieve SOLR configuration if not available locally +zookeeper.address = + +####################################################### +# File system usage +####################################################### + +# Directory used to track the status of uploads +upload.status=/data/biocache-upload/status + +# Directory used by sandbox functionality to write data files to be ingested +upload.temp=/data/biocache-upload/temp + +# Base URL to media files +media.url=https://records.biodiversitydata.se/ws/biocache-media/ + +# Directory root for images +media.dir=/data/biocache-media/ + +# The type of media store (none, local, remote or auto) +media.store.type=auto +media.store.local=false + +# URL instance of image-service to store media +media.store.url=https://images.biodiversitydata.se +media.store.connection.pool.size=25 +media.store.connection.pool.maxperroute=25 +media.store.maxrequests.persec=10 +#API key for imageservice +imageservice.api.key= + +# Directory to log deleted row keys to +deleted.file.store=/data/biocache-delete/ + +# List tool endpoint +list.tool.url=https://lists.biodiversitydata.se + +# Whether to enable list tool integration (this is used at indexing time only) +include.species.lists=true + +# Whether or not to enable SDS checks +sds.enabled=false + +# SDS data file +sds.url=https://sds.biodiversitydata.se +sds.species.data=https://sds.biodiversitydata.se/sensitive-species-data.xml +sds.category.data=https://sds.biodiversitydata.se/sensitivity-categories.xml +sds.zone.data=https://sds.biodiversitydata.se/sensitivity-zones.xml +sds.spatial.layers=cl10038 + +# The directory to write files to while ingesting data +load.dir=/data/biocache-load/ + +# Charts services +charts.facets.string.max= 1000 +charts.facets.number.max= 1000 + +####################################################### +# External services +####################################################### + +# The URL of SOLR services. In the case of embedded SOLR (no for production), this could be a directory path instead +# solrHome=http://localhost:8080/solr +# OR a list of Zookeeper nodes if the Solr configuration is stored in Zookeeper (advertised on port 2181) +# solrHome=zookeeper-1.example:2181,zookeeper-2.example:2181,zookeeper-3.ala:2181 +solr.home=http://localhost:8983/solr/biocache +#solr.home=http://solr:8983/solr/biocache + +# Whether to use HTTP2 connector +solr.usehttp2=true + +# Solr HTTP Client Connection Pool configuration to avoid opening too many concurrent connections to the Solr server +solr.connection.pool.size= 50 +solr.connection.pool.maxperroute= 50 + +# Solr HTTP Connection timeout defaults, in milliseconds (could have been overriden in internal Solr Client code in various places) +solr.connection.connecttimeout= 30000 +solr.connection.requesttimeout= 30000 +solr.connection.sockettimeout= 30000 + +# Solr HTTP Cache settings (requires you to setup HTTP Cache headers in your solr configuration before they will take effect) +solr.connection.cache.entries=500 +# Maximum object size to store in the cache, in bytes (Default 256 kilobytes: 1024 * 256 = 262144 bytes) +solr.connection.cache.object.size=262144 + +# The number of concurrent Solr update threads +solr.update.threads=4 + +# The HTTP User Agent used for some queries from biocache-store +biocache.useragent=Biocache + +# The SOLR collection to query +solr.collection=biocache + +# Base URL for registry (collectory) web services +registry.url=https://collections.biodiversitydata.se/ws + +# URL to use for retrieve a citations CSV for downloads +citations.url=https://collections.biodiversitydata.se/ws/citations + +# API key to use to authenticate WS requests +registry.api.key= + +# If enabled, processing & loading operations will cause an metadata update in the registry +allow.registry.updates=false + +# Base URL for taxon services (BIE) +service.bie.ws.url=https://species.biodiversitydata.se/ws + +# Base URL for taxon pages (BIE) +service.bie.ui.url=https://species.biodiversitydata.se + +# Allow service to be disabled via config (enabled by default) +service.bie.enabled=false + +# Base URL for Biocache UI +biocache.ui.url=https://records.biodiversitydata.se + +# Base URL for DOI service DOI +doi.ui.url=https://doi.ala.org.au +doi.service.url=https://doi.ala.org.au/api/ +doi.service.apiKey= +doi.service.readTimeout=900000 + +doi.provider=ALA + +# A fixed period after minting a DOI to allow it to propagate +download.doi.propagation.delay=60000 + +####################################################### +# Miscellaneous configuration options +####################################################### + +# The URL of layer services +layers.service.url=https://spatial.biodiversitydata.se/ws + +# The URL of the spatial-service /fields service +spatial.layers.url=https://spatial.biodiversitydata.se/ws/fields + +# To use layers service for sampling +layers.service.sampling=true + +# Used by location processor for associating a country with an occurrence record where only stateProvince supplied and not coordinates are available +default.country= + +# Specify fields to sample - set to 'none' for no sampling +sample.fields= + +# The list of default fields to use if a list can not be obtained from the spatial layers. +default.sample.fields= +#defaultFieldsToSample=cl20,cl23,cl901,cl914,cl916,cl935,el594,el848,el815,el834,el707,el794,el786,el789,el774,el851,el647,el717,el708,el748,el821,el777,el832,el814,el823,el816,el712,el841,el845,el839,el844,el836,el817,el811,el855,el804,el899,el737,el756,el759,el752,el739,el776,el753,el716,el729,el751,el827,el673,el810,el820,el830,el779,el813,el598,el835,el808,el807,el715,el833,el837,el719,el809,el829,el838,el725,el765,el745,el822,el798,cl606,cl611,cl612,cl613,cl614,cl617,cl605,cl620,el727,el843,el760,el758,el842,el818,el714,el812,el840,el730,el722,el866,el773,el876,el871,el872,el886,el887,el894,el877,el874,el862,el875,el883,el892,el879,el889,el881,el880,el890,el882,el864,el885,el868,el891,el724,el873,el884,el865,el895,el878,el863,el867,el870,el858,el850,el860,el768,el854,el857,el859,el849,el757,el755,el682,el681,el733,el856,el720,el732,el761,el721,el762,el772,el668,el746,el731,el671,el728,el743,el749,el744,el734,el750,el600,el726,el718,el736,el713,el602,el593,el771,el601,el764,el778,el595,el888,el596,el599,el723,el788,el791,el782,el806,el742,el797,el893,el735,el754,el766,el740,el775,el763,el853,el796,el869,el861,el675,el793,el787,el781,el795,el785,el852,el670,el799,el790,el783,el704,el666,el672,el591,el802,el800,el803,el801,el805,el661,el738,el705,el792,el784,el667,cl917,cl925,cl926,el676,el597,el680,el674,el747,el711,el828,el770,el819,el898,el706,el767,cl928,cl929,cl930,cl900,el769,el741,cl902,cl903,cl904,cl905,cl906,cl907,cl908,cl909,cl910,cl932,cl923,cl911,cl912,cl678,el831,el669,el825,el826,el662,el709,cl618,cl896,cl604,cl619,cl922,el824,cl927,cl913,cl942,cl21,cl22 + +# geospatial layers used to add darwin core properties from lat/lng. uses IDs of layers from layer service +layer.state.province=cl10097 +layer.terrestrial= +layer.marine= +layer.countries=cl10087 +layer.localgov= + +# Name search services +namesearch.url = https://namematching.biodiversitydata.se + +# The languages to use for common names. Applicable for people using GBIF name matching indexes +commonname.lang= sv,en + +# Exclude sensitive values for the listed data resources (comma separated list) +exclude.sensitive.values= + +# Additional fields to index (used by biocache-store only) +extra.misc.fields= +#extraMiscFields=OriginalSeedQuantity_i,AdjustedSeedQuantity_i,CurrentSeedQuantity_i,ViabilitySummary_d + +additional.fields.to.index=materialSampleID,sampleSizeUnit,sampleSizeValue,organismQuantity,organismQuantityType + +# Max number of threads to use when processing an endemic request +endemic.query.maxthreads= 30 + +# Max number of threads to use when processing a solr search query which is not an endemic query, or an online download or an offline download +solr.downloadquery.maxthreads=30 + +# Max number of threads to use when processing an online download (occurrences/index/download) +online.downloadquery.maxthreads=30 + +# Max number of threads to use when processing an offline download (occurrences/offline/download) +download.offline.parallelquery.maxthreads=30 + +# An artificial throttle used between calls to Solr for paged results, including online and offline downloads +download.throttle.ms=50 + +# The batch size for individual queries to Solr during downloads +download.batch.size=500 + +# The size of an internal fixed length blocking queue used to parallelise reading from Solr before writing to this queue +download.internal.queue.size=100 + +# Maximum total time for downloads to be execute. Defaults to 1 week (604,800,000ms) +download.max.execute.time=604800000 + +# Maximum total time to wait for downloads to be written to disk after Solr queries are complete. Defaults to 5 minutes (300,000ms) +# Increase this if you are seeing messages about downloads being interrupted +# This needs to be fairly large as shapefiles are completely written to temp files after all of the Solr queries complete +download.max.completion.time=300000 + +# The fixed page size used by the biocache-service scatterplot service when querying solr +scatterplot.query.page.size=100000 + +# The fixed page size used by the biocache-service WMS tiles server when querying solr if uncertainty circles are required +wms.pagesize=100000 + +# Base directory for heatmap images +heatmap.output.dir=/data/biocache/heatmap + +# AuthService properties to inject +auth.user.details.url=https://auth.biodiversitydata.se/userdetails/userDetails/ +auth.user.names.id.path=getUserList +auth.usernames.for.numeric.id.path=getUserListWithIds +auth.substitution.fields=assertionUserId,userId + +# Enable and customise this to check API keys +apikey.check.enabled=true +apikey.check.url=https://auth.biodiversitydata.se/apikey/ws/check?apikey={0} + +# Caches to enable/disable. Comment out the caches that you wish to enable +caches.auth.enabled=true +caches.log.enabled=true + +# Note: The collections cache is problematic when collectory/biocache-service on deployed on the same tomcat +caches.collections.enabled=true +caches.layers.enabled=true + +taxon.profile.cache.all=true + +# Cache sizes can be lowered to reduce memory footprint at the possible cost of throughput +taxon.profile.cache.size=10000 +classification.cache.size=10000 +commonname.cache.size=10000 +spatial.cache.size=10000 +attribution.cache.size=10000 +sensitivity.cache.size=10000 +location.cache.size=10000 + +# Citations disable - for now we can disable them in the future will need a way to customise the source. +citations.enabled=true + +# URL for retrieve list of contacts for collection +collection.contacts.url=https://collections.biodiversitydata.se/ws/collection + +# URL for LoggerService +logger.service.url=https://logger.biodiversitydata.se/service/logger +occurrence.log.enabled=true + +# Temporary working directory (used by processing) +tmp.work.dir=/data/biocache/tmp + +# Restart Data Service +# Warning: Uses unsynchronised reflection to access private fields and save/restore them from disk +# Enable at your own risk +restart.data.enabled=false +restart.data.frequency=60000 + +####################################################### +# Volunteer portal (DigiVol) integration +####################################################### + +# Hub ID for the DigiVol +volunteer.hub.uid= +# Data Provider ID for DigiVol +volunteer.dp.uid= + +####################################################### +# SFTP integration +####################################################### + +# User details to SFTP/SCP from upload +uploadUser= +uploadPassword= + + +####################################################### +# Facets +####################################################### + +# Limit to &facets term count for all queries +facets.max=32 + +# Limit the default &facets term count. This limits the default facets assigned from facets.json +facets.defaultmax=0 + +# Default &facet value (true|false). Clients must always set &facet=true when facets are required and this default is false. +facet.default=true + +# Autocomplete related caches, it will still work when these are disabled but the cached information will be unavailable. +autocomplete.species.images.enabled=true +autocomplete.species.counts.enabled=true +autocomplete.commonnames.extra.enabled=true + +# species.counts.async.updates is only used when autocomplete.species.counts.enabled=true +# When true the autocomplete will return without waiting for species counts to be populated or updated. +species.counts.async.updates=true + +# species.counts.cache.minage is only used when autocomplete.species.counts.enabled=true +# Each species counts cache is updated at the first use, after a change to the index, and when the last update is older +# than this time in ms. The default is no more than 1 update every 30 minutes for each species counts cache. +species.counts.cache.minage=1800000 + +# Set SOLR batch size. Default=1000 +solr.batch.size=500 + +# Set SOLR hard commit size. Default=10000 - used in indexing. +solr.hardcommit.size=5000 + +# Temporary directory where shapefiles are created for downloads +shapefile.tmp.dir=/data/biocache-download/tmp + +# URL or path to subgroups JSON configuration +species.subgroups.url=/data/biocache/config/subgroups.json + +####################################################### +# Downloads +####################################################### + +# offline downloads email +download.email.subject=SBDI Occurrence Download Complete - [filename] +download.date.format=dd MMMMM yyyy +download.auth.sensitive=false +download.solr.only=true + +download.email.template=/data/biocache/config/download-email.html +download.readme.enabled=true +download.readme.template=/data/biocache/config/download-readme.html + +download.doi.licence.prefix=Datasets are covered by the following licence(s): +download.doi.title.prefix=Occurrence download +download.doi.landing.page.baseUrl=https://records.biodiversitydata.se/download/doi?doi= +download.doi.resolver=https://doi.ala.org.au/doi/ + +download.support.email.enabled=false +download.support.email=support@biodiversitydata.se +download.support=support@biodiversitydata.se +my.download.doi.baseUrl= + +download.doi.failure.message=A DOI was requested for this download however it could not be minted. + +download.doi.email.template=/data/biocache/config/download-doi-email.html +download.doi.readme.template=/data/biocache/config/download-doi-readme.html + +download.csdm.email.template=/data/biocache/config/download-csdm-email.html + +email.sender=download@biodiversitydata.se +mail.smtp.host=postfix +mail.smtp.port=25 + +#DOI Default metadata +doi.author=SBDI +doi.description=SBDI record download +doi.resourceText=Species information + +# Base URL for generated download files +download.url=https://records.biodiversitydata.se/ws/biocache-download +download.dir=/data/biocache-download + +# Download queue configuration +concurrent.downloads.json=[{"label": "smallSolr", "threads": 4, "maxRecords": 50000, "type": "index", "pollDelay": 10, "executionDelay": 10, "threadPriority": 5}, {"label": "largeSolr", "threads": 1, "maxRecords": 100000000, "type": "index", "pollDelay": 100, "executionDelay": 100, "threadPriority": 1}, {"label": "smallCassandra", "threads": 1, "maxRecords": 50000, "type": "db", "pollDelay": 10, "executionDelay": 10, "threadPriority": 5}, {"label": "defaultUnrestricted", "threads": 1, "pollDelay": 1000, "executionDelay": 100, "threadPriority": 1}] + +# Maximum points in WKT string supported before simplification applies +# Set this to 0 to disable simplification (disabling not recommended due to performance issues) +qid.wkt.maxPoints=50000 + +# The step size factor during the iteration simplification algorithm +qid.wkt.simplification.factor=2.0 + +# The initial precision to attempt during simplification +qid.wkt.simplification.initialprecision=0.0001 + +# The maximum precision to attempt during simplification before giving up +qid.wkt.simplification.maxprecision=10.0 + +# Maximum size of the WMS cache +wms.cache.size.max=1048576 + +# Default set of fields in the download - for the legacy format +#default.download.fields=id,data_resource_uid,data_resource,institution_uid,institution_name,collection_uid,collection_name,license,catalogue_number,taxon_concept_lsid,raw_taxon_name,raw_common_name,taxon_name,rank,common_name,kingdom,phylum,class,order,family,genus,species,subspecies,institution_code,collection_code,locality,raw_latitude,raw_longitude,raw_datum,latitude,longitude,coordinate_precision,coordinate_uncertainty,country,state,cl959,min_elevation_d,max_elevation_d,min_depth_d,max_depth_d,individual_count,recorded_by,year,month,day,verbatim_event_date,basis_of_record,raw_basis_of_record,occurrence_status,sex,preparations,information_withheld,data_generalizations,outlier_layer,taxonomic_kosher,geospatial_kosher,materialSampleID,sampleSizeUnit,sampleSizeValue,organismQuantity,organismQuantityType +downloads.dwcExtraFields=data_resource_uid,materialSampleID,sampleSizeUnit,sampleSizeValue,organismQuantity,organismQuantityType + +# Maximum offline file size +download.offline.max.size=50000000 + +# Shapefile downloads enabled +download.shp.enabled=true + +# Grid indexing enabled (used by biocache store only) +gridref.indexing.enabled=true + +# National checklist GUID pattern +national.checklist.guid.pattern=* + +# Used in OGC XML services +organizationName= +orgCity= +orgStateProvince= +orgPostcode= +orgCountry= +orgPhone= +orgFax= +orgEmail= + +geoserver.url=https://spatial.biodiversitydata.se/geoserver + +# Used when creating new layer fields (el or cl) in the live SOLR instance +solr.index.docvalues.layer=false +solr.index.indexed.layer=true +solr.index.stored.layer=true + +# Used when creating new misc fields in the live SOLR instance +solr.index.docvalues.misc=false +solr.index.misc=true +solr.index.stored.misc=true + +# Where to get the IRMNG data from +irmng.archive.url=https://archives.ala.org.au/archives/nameindexes/irmng/IRMNG_DWC.zip + +dataquality.baseUrl=https://dataquality.ala.org.au/ +dataquality.enabled=false +dataquality.apiKey= + +sensitiveAccessRoles20={} + +#oidc/jwt related +security.jwt.enabled=false +security.jwt.discovery-uri= +security.jwt.clientId= + +security.apikey.enabled=true +security.apiKey.auth.serviceUrl = https://auth.biodiversitydata.se/apikey/ +security.apikey.check.serviceUrl=https://auth.biodiversitydata.se/apikey/ws/check?apikey= +security.apikey.userdetails.serviceUrl=https://auth.biodiversitydata.se/userdetails/ + +#websevice jwt +webservice.jwt=false +webservice.jwt-scopes= +webservice.client-id= +webservice.client-secret= +webservices.cache-tokens=false + + +vernacularName.show=true +userdetails.url=https://auth.biodiversitydata.se/userdetails/ diff --git a/sbdi/data/config/download-csdm-email.html b/sbdi/data/config/download-csdm-email.html new file mode 100644 index 000000000..be09cf435 --- /dev/null +++ b/sbdi/data/config/download-csdm-email.html @@ -0,0 +1,27 @@ + + + + + + + +
+

+ Import data for modelling using this link: import +

+

+ The DOI for this download is [officialDoiUrl] +

+

+ Your download can be accessed on the page: [url] +

+

+ All of your downloads are available here. +

+
+ + diff --git a/sbdi/data/config/download-doi-email.html b/sbdi/data/config/download-doi-email.html new file mode 100644 index 000000000..3ce928ac7 --- /dev/null +++ b/sbdi/data/config/download-doi-email.html @@ -0,0 +1,1045 @@ + + + + + + + + + +Your download can be accessed on the page:

+[url]

+The DOI for this download is +DOI[doi] +

+When using this dataset please use the following citation:

+BioAtlas Sweden occurrence download at [searchUrl] accessed on [date].

+Also cite the contributing data providers which are listed in the included "citation.csv" file.

+More information can be found at citing BioAtlas Sweden. + + diff --git a/sbdi/data/config/download-doi-readme.html b/sbdi/data/config/download-doi-readme.html new file mode 100644 index 000000000..c983c0848 --- /dev/null +++ b/sbdi/data/config/download-doi-readme.html @@ -0,0 +1,72 @@ + + + + + + + + + +When using this dataset please use the following citation:

+BioAtlas Sweden download at [searchUrl] accessed on [date].

+The DOI DOI[doi] for this download is available at [url]

+Data contributed by the following providers:
+[dataProviders]
+More information can be found at citing the BioAtlas Sweden.

+ + diff --git a/sbdi/data/config/download-email.html b/sbdi/data/config/download-email.html new file mode 100644 index 000000000..a03687f27 --- /dev/null +++ b/sbdi/data/config/download-email.html @@ -0,0 +1,992 @@ + + + + + + + + +Your download is available on the URL:

+[url]

+When using this dataset please use the following citation:

+BioAtlas Sweden occurrence download at [searchUrl] accessed on [date].

+Also cite the contributing data providers which are listed in the included "citation.csv" file.

+More information can be found at citing the BioAtlas Sweden. + + diff --git a/sbdi/data/config/download-readme.html b/sbdi/data/config/download-readme.html new file mode 100644 index 000000000..777f99262 --- /dev/null +++ b/sbdi/data/config/download-readme.html @@ -0,0 +1,24 @@ + + + + + + + + + +When using this dataset please use the following citation:

+BioAtlas Sweden occurrence download at [searchUrl] accessed on [date].

+Data contributed by the following providers:
+[dataProviders]
+More information can be found at citing the BioAtlas Sweden.

+ + diff --git a/sbdi/data/config/facets.json b/sbdi/data/config/facets.json new file mode 100644 index 000000000..837a48c1c --- /dev/null +++ b/sbdi/data/config/facets.json @@ -0,0 +1,447 @@ +[{ + "title": "Taxon", + "facets": [{ + "sort": "index", + "dwcTerm": "scientificName", + "description": "Matched Scientific Name", + "field": "scientificName" + }, + { + "sort": "index", + "dwcTerm": "scientificName_raw", + "description": "Scientific Name", + "field": "raw_scientificName" + }, + { + "sort": "index", + "description": "Subspecies", + "field": "subspecies" + }, + { + "sort": "index", + "dwcTerm": "specificEpithet_raw", + "i18nValues": true, + "field": "species" + }, + { + "sort": "index", + "dwcTerm": "genus_raw", + "field": "genus" + }, + { + "sort": "index", + "dwcTerm": "family_raw", + "field": "family" + }, + { + "sort": "index", + "dwcTerm": "order_raw", + "field": "order" + }, + { + "sort": "index", + "dwcTerm": "class", + "description": "Class - matched", + "field": "class" + }, + { + "sort": "index", + "dwcTerm": "phylum_raw", + "field": "phylum" + }, + { + "sort": "index", + "dwcTerm": "kingdom_raw", + "field": "kingdom" + }, + { + "sort": "count", + "dwcTerm": "taxonRank", + "i18nValues": true, + "description": "Taxon Rank - matched", + "field": "taxonRank" + }, + { + "sort": "count", + "field": "matchType" + }, + { + "sort": "index", + "field": "speciesGroup" + }, + { + "sort": "index", + "dwcTerm": "vernacularName", + "description": "Vernacular Name - matched", + "field": "vernacularName" + }, + { + "sort": "count", + "description": "Species subgroups", + "field": "speciesSubgroup" + }, + { + "sort": "count", + "field": "interaction" + } + ] +}, + { + "title": "Identification", + "facets": [{ + "sort": "index", + "dwcTerm": "identifiedBy_raw", + "field": "identifiedBy" + }, + { + "sort": "index", + "dwcTerm": "identificationQualifier", + "field": "identificationQualifier" + }, + { + "sort": "count", + "description": "Taxon identification issue", + "field": "taxonomicIssues" + }, + { + "sort": "index", + "dwcTerm": "typeStatus", + "field": "typeStatus" + }, + { + "sort": "index", + "dwcTerm": "originalNameUsage", + "field": "originalNameUsage" + } + ] + }, + { + "title": "Location", + "facets": [ + { + "sort": "index", + "dwcTerm": "country_raw", + "i18nValues": true, + "field": "country" + }, + { + "sort": "count", + "field": "stateProvince" + }, + { + "sort": "count", + "field": "cl2079" + }, + { + "sort": "count", + "field": "cl2078" + }, + { + "sort": "count", + "field": "cl925" + }, + { + "sort": "count", + "field": "cl901" + }, + { + "sort": "count", + "field": "cl1918" + }, + { + "sort": "count", + "field": "cl958" + }, + { + "sort": "count", + "field": "cl1048" + }, + { + "sort": "count", + "field": "cl1049" + }, + { + "sort": "count", + "field": "cl21" + }, + { + "sort": "count", + "field": "cl966" + }, + { + "sort": "count", + "field": "cl1085" + }, + { + "sort": "count", + "field": "cl678" + }, + { + "sort": "count", + "field": "cl959" + }, + { + "sort": "count", + "field": "cl991" + }, + { + "sort": "count", + "field": "cl916" + }, + { + "sort": "count", + "field": "cl935" + }, + { + "sort": "count", + "field": "cl1057" + }, + { + "sort": "count", + "field": "cl2013" + }, + { + "sort": "count", + "field": "cl927" + }, + { + "sort": "count", + "field": "cl923" + }, + { + "sort": "count", + "field": "cl619" + }, + { + "sort": "count", + "field": "el1076" + }, + { + "sort": "count", + "field": "cl617" + }, + { + "sort": "count", + "field": "cl620" + }, + { + "sort": "index", + "description": "Elevation", + "field": "elevation_d_rng" + }, + { + "sort": "index", + "field": "min_elevation_d_rng" + }, + { + "sort": "count", + "description": "Sensitive", + "field": "sensitive", + "i18nValues": true + }, + { + "sort": "count", + "description": "Species habitats", + "field": "speciesHabitats" + }, + { + "sort": "index", + "dwcTerm": "coordinateUncertaintyInMeters", + "description": "Coordinate Uncertainty in Metres - parsed", + "field": "coordinateUncertaintyInMeters" + }, + { + "sort": "count", + "i18nValues": true, + "description": "Spatial validity", + "field": "spatiallyValid" + }, + { + "sort": "index", + "dwcTerm": "locationID", + "field": "locationID" + } + ] + }, + { + "title": "Occurrence", + "facets": [ + { + "sort": "index", + "dwcTerm": "recordedBy_raw", + "description": "Collector", + "field": "recordedBy" + }, + { + "sort": "count", + "dwcTerm": "sex", + "description": "Sex", + "field": "sex" + }, + { + "sort": "count", + "field": "lifeStage" + }, + { + "sort": "count", + "dwcTerm": "establishmentMeans", + "field": "establishmentMeans" + }, + { + "sort": "index", + "dwcTerm": "month_raw", + "i18nValues": true, + "field": "month" + }, + { + "sort": "index", + "dwcTerm": "year_raw", + "field": "year" + }, + { + "sort": "count", + "description": "Date precision", + "field": "datePrecision" + }, + { + "sort": "index", + "field": "decade" + }, + { + "sort": "count", + "description": "State conservation", + "field": "stateConservation" + }, + { + "sort": "count", + "description": "State conservation (unprocessed)", + "field": "raw_stateConservation" + }, + { + "sort": "index", + "dwcTerm": "eventID", + "field": "eventID" + }, + { + "sort":"index", + "dwcTerm": "parentEventID", + "field":"parentEventID" + }, + { + "sort":"index", + "dwcTerm": "fieldNumber", + "field":"fieldNumber" + }, + { + "sort":"index", + "dwcTerm": "datasetName", + "field":"datasetName" + } + ] + }, + { + "title": "Record", + "facets": [ + { + "sort": "index", + "dwcTerm": "basisOfRecord", + "description": "Basis Of Record - processed", + "field": "basisOfRecord" + }, + { + "sort": "count", + "description": "Multimedia", + "field": "multimedia" + }, + { + "sort": "index", + "description": "Presence/Absence", + "field": "occurrenceStatus" + }, + { + "sort": "index", + "description": "Content types", + "field": "contentTypes" + } + ] + }, + { + "title": "Assertions", + "facets": [ + { + "sort": "count", + "description": "Sensitive", + "field": "sensitive" + }, + { + "sort": "count", + "i18nValues": true, + "description": "Record issues", + "field": "assertions" + }, + { + "sort": "count", + "i18nValues": true, + "description": "Outlier for layer", + "field": "outlierLayer" + }, + { + "sort": "count", + "description": "Outlier layer count", + "field": "outlierLayerCount" + }, + { + "sort": "count", + "i18nValues": true, + "field": "userAssertions" + }, + { + "sort": "index", + "description": "Assertions by user", + "field": "assertionUserId" + }, + { + "sort": "count", + "field": "duplicateStatus" + }, + { + "sort": "count", + "field": "duplicateType" + } + ] + }, + { + "title": "Attribution", + "facets": [{ + "sort": "count", + "field": "license" + }, + { + "sort": "count", + "field": "userId" + }, + { + "sort": "count", + "field": "collectionUid" + }, + { + "sort": "count", + "field": "institutionUid" + }, + { + "sort": "count", + "field": "dataProviderUid" + }, + { + "sort": "count", + "description": "Data Resource ID", + "field": "dataResourceUid" + }, + { + "sort": "count", + "dwcTerm": "datasetName", + "field": "datasetName" + } + ] + } +] diff --git a/sbdi/data/config/groups.json b/sbdi/data/config/groups.json new file mode 100644 index 000000000..4244ee997 --- /dev/null +++ b/sbdi/data/config/groups.json @@ -0,0 +1,150 @@ +[{ + "name": "Animals", + "rank": "kingdom", + "included": ["Animalia"], + "excluded": [] +}, + { + "name": "Mammals", + "rank": "classs", + "included": ["Mammalia"], + "excluded": [], + "parent": "Animals" + }, + { + "name": "Birds", + "rank": "classs", + "included": ["Aves"], + "excluded": [], + "parent": "Animals" + }, + { + "name": "Reptiles", + "rank": "classs", + "included": ["Reptilia"], + "excluded": [], + "parent": "Animals" + }, + { + "name": "Amphibians", + "rank": "classs", + "included": ["Amphibia"], + "excluded": [], + "parent": "Animals" + }, + { + "name": "Fishes", + "rank": "classs", + "included": ["Agnatha", "Chondrichthyes", "Osteichthyes", "Actinopterygii", "Sarcopterygii"], + "excluded": [], + "parent": "Animals" + }, + { + "name": "Molluscs", + "rank": "phylum", + "included": ["Mollusca"], + "excluded": [], + "parent": "Animals" + }, + { + "name": "Arthropods", + "rank": "phylum", + "included": ["Arthropoda"], + "excluded": [], + "parent": "Animals" + }, + { + "name": "Crustaceans", + "rank": "classs", + "included": ["Branchiopoda", "Remipedia", "Maxillopoda", "Ostracoda", "Malacostraca"], + "excluded": [], + "parent": "Arthropods" + }, + { + "name": "Insects", + "rank": "classs", + "included": ["Insecta"], + "excluded": [], + "parent": "Arthropods" + }, + { + "name": "Plants", + "rank": "kingdom", + "included": ["Plantae"], + "excluded": [] + }, + { + "name": "Bryophytes", + "rank": "phylum", + "included": ["Bryophyta", "Marchantiophyta", "Anthocerotophyta"], + "excluded": [], + "parent": "Plants" + }, + { + "name": "Gymnosperms", + "rank": "subclass", + "included": ["Pinidae", "Cycadidae"], + "excluded": [], + "parent": "Plants" + }, + { + "name": "FernsAndAllies", + "rank": "subclass", + "included": ["Equisetidae", "Lycopodiidae", "Marattiidae", "Ophioglossidae", "Polypodiidae", "Psilotidae"], + "excluded": [], + "parent": "Plants" + }, + { + "name": "Angiosperms", + "rank": "subclass", + "included": ["Magnoliidae"], + "excluded": [], + "parent": "Plants" + }, + { + "name": "Monocots", + "rank": "superorder", + "included": ["Lilianae"], + "excluded": [], + "parent": "Angiosperms" + }, + { + "name": "Dicots", + "rank": "subclass", + "included": ["Magnoliidae"], + "excluded": ["Lilianae"], + "parent": "Angiosperms" + }, + { + "name": "Fungi", + "rank": "kingdom", + "included": ["Fungi"], + "excluded": [] + }, + { + "name": "Chromista", + "rank": "kingdom", + "included": ["Chromista"], + "excluded": [] + }, + { + "name": "Protozoa", + "rank": "kingdom", + "included": ["Protozoa"], + "excluded": [] + }, + { + "name": "Bacteria", + "rank": "kingdom", + "included": ["Bacteria"], + "excluded": [] + }, + { + "name": "Algae", + "rank": "phylum", + "included": ["Bacillariophyta", "Chlorophyta", "Cyanidiophyta", "Prasinophyta", "Rhodophyta", + "Cryptophyta", "Ochrophyta", "Sagenista", "Cercozoa", "Euglenozoa", "Cyanobacteria" + ], + "excluded": [] + } +] diff --git a/sbdi/data/config/logger-client.properties b/sbdi/data/config/logger-client.properties new file mode 100644 index 000000000..783672924 --- /dev/null +++ b/sbdi/data/config/logger-client.properties @@ -0,0 +1 @@ +logger_url=https://logger.biodiversitydata.se/service/logger diff --git a/sbdi/data/config/pipelines-field-config.json b/sbdi/data/config/pipelines-field-config.json new file mode 100644 index 000000000..93e9da113 --- /dev/null +++ b/sbdi/data/config/pipelines-field-config.json @@ -0,0 +1,413 @@ +{ + "fieldNameMapping": { + "abcd_identification_qualifier": null, + "abcd_identification_qualifier_insertion_point": null, + "abcd_type_status": "abcdTypeStatus", + "accepted_name_usage": "acceptedNameUsage", + "accepted_name_usage_id": "acceptedNameUsageID", + "access_rights": "accessRights", + "alau_user_id": "userId", + "all_image_url": "imageIDs", + "assertions_missing": null, + "assertions_passed": null, + "assertions_unchecked": null, + "assertion_user_id": "assertionUserId", + "associated_media": "associatedMedia", + "associated_references": "associatedReferences", + "associated_sequences": "associatedSequences", + "associated_taxa": "associatedTaxa", + "aust_conservation": "countryConservation", + "australian_herbarium_region": null, + "basis_of_record": "basisOfRecord", + "bbox": null, + "bibliographic_citation": "bibliographicCitation", + "biogeographic_region": null, + "catalogue_number": "catalogNumber", + "citation": null, + "class_id": "classID", + "classs": "class", + "clazz": "class", + "collection_code": "collectionCode", + "collection_id": "collectionID", + "collection_name": "collectionName", + "collection_uid": "collectionUid", + "collector": "recordedBy", + "collector_text": "text_recordedBy", + "collectors": "recordedBy", + "common_name": "vernacularName", + "coordinate_precision": "coordinatePrecision", + "coordinate_uncertainty": "coordinateUncertaintyInMeters", + "country_code": "raw_countryCode", + "country_conservation": "countryConservation", + "cultivar_name": null, + "cultivated": null, + "data_generalizations": "dataGeneralizations", + "data_hub_uid": "dataHubUid", + "data_provider": "dataProviderName", + "data_provider_uid": "dataProviderUid", + "data_resource": "dataResourceName", + "data_resource_uid": "dataResourceUid", + "dataset_id": "datasetID", + "dataset_name": "datasetName", + "date_deleted": null, + "date_precision": "datePrecision", + "datum": "geodeticDatum", + "decimal_latitudelatitude": null, + "default_values_used": "defaultValuesUsed", + "deleted": null, + "depth": "verbatimDepth", + "depth_d": "depth", + "distance_outside_expert_range": null, + "duplicate_inst": null, + "duplicate_record": "isDuplicateOf", + "duplicate_status": "duplicateStatus", + "duplicate_type": "duplicateType", + "duplicates_original_institution_id": null, + "duplicates_original_unit_id": null, + "dynamic_properties": "dynamicProperties", + "elevation": "verbatimElevation", + "end_day_of_year": "endDayOfYear", + "establishment_means": "establishmentMeans", + "event_id": "eventID", + "event_remarks": "eventRemarks", + "event_time": "eventTime", + "exchange_number": null, + "family_id": "familyID", + "field_notes": "fieldNotes", + "field_number": "fieldNumber", + "first_loaded_date": "firstLoadedDate", + "footprint_srs": "footprintSRS", + "footprint_wkt": "footprintWKT", + "generalisation_to_apply_in_metres": "generalisationToApplyInMetres", + "generalised_locality": null, + "genus_guid": "genusID", + "georeference_protocol": "georeferenceProtocol", + "georeference_remarks": "georeferenceRemarks", + "georeference_sources": "georeferenceSources", + "georeference_verification_status": "georeferenceVerificationStatus", + "georeferenced_by": "georeferencedBy", + "georeferenced_date": "georeferencedDate", + "geospatial_issue": "geospatialIssues", + "geospatial_kosher": "spatiallyValid", + "geospatially_kosher": "spatiallyValid", + "higher_classification": "higherClassification", + "higher_geography": "higherGeography", + "ibra": "cl1048", + "ibra_subregion": "cl1049", + "identification_id": "identificationID", + "identification_qualifier": "identificationQualifier", + "identification_qualifier_s": "identificationQualifier", + "identification_references": "identificationReferences", + "identification_remarks": "identificationRemarks", + "identification_verification_status": "identificationVerificationStatus", + "identified_by": "identifiedBy", + "identified_by_text": "text_identifiedBy", + "identified_date": "dateIdentified", + "identifier_by": "identifiedBy", + "identifier_role": "identifierRole", + "image_url": "imageID", + "images": "imageIDs", + "imcra": "cl966", + "individual_count": "individualCount", + "individual_id": "organismID", + "information_withheld": "informationWithheld", + "infraspecific_epithet": "infraspecificEpithet", + "institution_code": "institutionCode", + "institution_id": "institutionID", + "institution_name": "institutionName", + "institution_uid": "institutionUid", + "interaction": null, + "island_group": "islandGroup", + "kingdom_id": "kingdomID", + "last_load_date": "lastLoadDate", + "last_modified_time": "lastLoadDate", + "last_processed_date": "lastProcessedDate", + "last_assertion_date": "lastAssertionDate", + "latitude": "decimalLatitude", + "lga": "cl959", + "life_stage": "lifeStage", + "loan_botanist": null, + "loan_date": null, + "loan_destination": "loanDestination", + "loan_identifier": "loanIdentifier", + "loan_number": null, + "loan_return_date": null, + "loan_returned_date": null, + "location_according_to": "locationAccordingTo", + "location_determined": null, + "location_id": "locationID", + "location_remarks": "locationRemarks", + "longitude": "decimalLongitude", + "lsid": "taxonConceptID", + "mappable": null, + "matched_name": null, + "max_depth_d": "maximumDepthInMeters", + "max_elevation_d": "maximumElevationInMeters", + "measurement_accuracy": "measurementAccuracy", + "measurement_determined_by": "measurementDeterminedBy", + "measurement_determined_date": "measurementDeterminedDate", + "measurement_id": "measurementID", + "measurement_method": "measurementMethod", + "measurement_remarks": "measurementRemarks", + "measurement_type": "measurementType", + "measurement_unit": "measurementUnit", + "measurement_value": "measurementValue", + "min_depth_d": "minimumDepthInMeters", + "min_elevation_d": "minimumElevationInMeters", + "misc_properties": "dynamicProperties", + "modified_date": "modified", + "mytest": null, + "name_according_to": "nameAccordingTo", + "name_match_metric": "matchType", + "name_parse_type": "nameType", + "name_published_in": "namePublishedIn", + "natural_occurrence": null, + "near_named_place_relation_to": null, + "nomenclatural_code": "nomenclaturalCode", + "nomenclatural_status": "nomenclaturalStatus", + "occurrence_date": "eventDate", + "occurrence_decade_i": "decade", + "occurrence_details": "occurrenceDetails", + "occurrence_id": "occurrenceID", + "occurrence_remarks": "occurrenceRemarks", + "occurrence_status": "occurrenceStatus", + "occurrence_status_s": "occurrenceStatus", + "occurrence_year": "occurrenceYear", + "order_id": "orderID", + "organism_quantity": "organismQuantity", + "organism_quantity_type": "organismQuantityType", + "original_name_usage": "originalNameUsage", + "other_catalog_numbers": "otherCatalogNumbers", + "outlier_for_layers": "outlierLayer", + "outlier_layer": "outlierLayer", + "outlier_layer_count": "outlierLayerCount", + "owner_institution_code": "ownerInstitutionCode", + "parent_event_id": "parentEventID", + "parent_name_usage": "parentNameUsage", + "photo_page_url": null, + "photographer": null, + "phylum_id": "phylumID", + "places": "stateProvince", + "portal_id": null, + "preferred_flag": null, + "previous_identifications": "previousIdentifications", + "quality_assertion": null, + "query_assertion_uuid": null, + "query_assertions": null, + "rank": "taxonRank", + "rank_id": "taxonRankID", + "rankID": "taxonRankID", + "raw_abcd_identification_qualifier": null, + "raw_associated_references": "associatedReferences", + "raw_basis_of_record": "raw_basisOfRecord", + "raw_common_name": "raw_vernacularName", + "raw_coordinate_precision": "raw_coordinatePrecision", + "raw_coordinate_uncertainty": "raw_coordinateUncertaintyInMeters", + "raw_country_conservation": "raw_countryConservation", + "raw_data_generalizations": "raw_dataGeneralizations", + "raw_datum": "raw_geodeticDatum", + "raw_duplicate_record": "isDuplicateOf", + "raw_easting": "easting", + "raw_establishment_means": "raw_establishmentMeans", + "raw_geo_validation_status": null, + "raw_georeference_protocol": "georeferenceProtocol", + "raw_georeference_remarks": "georeferenceRemarks", + "raw_georeference_sources": "georeferenceSources", + "raw_georeferenced_by": "georeferencedBy", + "raw_georeferenced_date": "raw_georeferencedDate", + "raw_habitat": "habitat", + "raw_ibra": "cl1048", + "raw_identification_qualifier": "identificationQualifier", + "raw_identification_references": "identificationReferences", + "raw_identified_date": "raw_dateIdentified", + "raw_images": "imageIDs", + "raw_information_withheld": "raw_informationWithheld", + "raw_institution_id": "raw_institutionUid", + "raw_latitude": "raw_decimalLatitude", + "raw_life_stage": "raw_lifeStage", + "raw_longitude": "raw_decimalLongitude", + "raw_max_depth": "raw_maximumDepthInMeters", + "raw_max_elevation": "raw_maximumElevationInMeters", + "raw_min_depth": "raw_minimumDepthInMeters", + "raw_min_elevation": "raw_minimumElevationInMeters", + "raw_modified_date": "raw_modified", + "raw_name": "raw_scientificName", + "raw_name_according_to": "nameAccordingTo", + "raw_nomenclatural_code": "nomenclaturalCode", + "raw_northing": "northing", + "raw_occurrence_date": "raw_eventDate", + "raw_occurrence_status": "raw_occurrenceStatus", + "raw_occurrence_year": "occurrenceYear", + "raw_rank": "raw_taxonRank", + "raw_sampling_protocol": "samplingProtocol", + "raw_scientific_name_authorship": "raw_scientificNameAuthorship", + "raw_sounds": "soundIDs", + "raw_state": "raw_stateProvince", + "raw_state_conservation": "raw_stateConservation", + "raw_subspecies": "subspecies", + "raw_taxon_name": "raw_scientificName", + "raw_type_status": "raw_typeStatus", + "raw_verbatim_depth": "verbatimDepth", + "raw_verbatim_elevation": "verbatimElevation", + "raw_verbatim_taxon_rank": "verbatimTaxonRank", + "record_number": "recordNumber", + "record_type": "basisOfRecord", + "related_resource_id": "relatedResourceID", + "relationship_of_resource": "relationshipOfResource", + "relationship_remarks": "relationshipRemarks", + "rem_text": null, + "reproductive_condition": "reproductiveCondition", + "rightsholder": "rightsHolder", + "row_key": "id", + "sampling_effort": "samplingEffort", + "sampling_protocol": "samplingProtocol", + "scientific_name_addendum": null, + "scientific_name_authorship": "scientificNameAuthorship", + "scientific_name_id": "scientificNameID", + "scientific_name_without_author": null, + "secondary_collectors": "secondaryCollectors", + "sensitive_coordinate_uncertainty": "sensitive_coordinateUncertaintyInMeters", + "sensitive_event_date": "sensitive_eventDate", + "sensitive_latitude": "sensitive_decimalLatitude", + "sensitive_longitude": "sensitive_decimalLongitude", + "sounds": "soundIDs", + "species_group": "speciesGroup", + "species_guid": "speciesID", + "species_habitats": null, + "species_list_uid": "speciesListUid", + "species_subgroup": "speciesSubgroup", + "specific_epithet": "specificEpithet", + "start_day_of_year": "startDayOfYear", + "state": "stateProvince", + "state_conservation": "stateConservation", + "subspecies_guid": "subspeciesID", + "subspecies_id": "subspeciesID", + "subspecies_name": "subspecies", + "suitable_modelling": "spatiallyValid", + "systemAssertions": "assertions", + "system_assertions": "assertions", + "taxon_concept_lsid": "taxonConceptID", + "taxon_id": "taxonID", + "taxon_name": "scientificName", + "taxon_remarks": "taxonRemarks", + "taxonomic_issue": "taxonomicIssues", + "taxonomic_kosher": null, + "taxonomic_status": "taxonomicStatus", + "taxonomically_kosher": null, + "text_rev": null, + "type_status": "typeStatus", + "type_status_qualifier": null, + "typified_name": "typifiedName", + "user_assertion_status": "userAssertions", + "user_assertions": "userAssertions", + "user_id": "userId", + "valid_distribution": null, + "verbatim_coordinate_system": "verbatimCoordinateSystem", + "verbatim_coordinates": "verbatimCoordinates", + "verbatim_date_identified": null, + "verbatim_event_date": "verbatimEventDate", + "verbatim_latitude": "verbatimLatitude", + "verbatim_locality": "verbatimLocality", + "verbatim_longitude": "verbatimLongitude", + "verbatim_srs": "verbatimSRS", + "verbatim_taxon_rank": "verbatimTaxonRank", + "verification_date": null, + "verification_notes": null, + "verifier": null, + "videos": "videoIDs", + "water_body": "waterBody" + }, + "fieldValueMapping": { + "assertions": { + "altitudeInFeet": "ELEVATION_NOT_METRIC", + "altitudeNonNumeric": "ELEVATION_NON_NUMERIC", + "altitudeOutOfRange": "ELEVATION_UNLIKELY", + "badlyFormedBasisOfRecord": "BASIS_OF_RECORD_INVALID", + "coordinatePrecisionMismatch": "COORDINATE_PRECISION_UNCERTAINTY_MISMATCH", + "coordinatesCentreOfCountry": "COORDINATES_CENTRE_OF_COUNTRY", + "coordinatesCentreOfStateProvince": "COORDINATES_CENTRE_OF_STATEPROVINCE", + "coordinatesOutOfRange": "COORDINATE_OUT_OF_RANGE", + "countryCoordinateMismatch": "COUNTRY_COORDINATE_MISMATCH", + "countryInferredByCoordinates": "COUNTRY_DERIVED_FROM_COORDINATES", + "decimalLatLongConversionFailed": "COORDINATE_REPROJECTION_FAILED", + "decimalLatLongConverted": "COORDINATE_REPROJECTED", + "depthInFeet": "DEPTH_NOT_METRIC", + "depthNonNumeric": "DEPTH_NON_NUMERIC", + "depthOutOfRange": "DEPTH_UNLIKELY", + "firstOfCentury": "FIRST_OF_CENTURY", + "firstOfMonth": "FIRST_OF_MONTH", + "firstOfYear": "FIRST_OF_YEAR", + "geodeticDatumAssumedWgs84": "GEODETIC_DATUM_ASSUMED_WGS84", + "georefPostDate": "GEOREFERENCE_POST_OCCURRENCE", + "homonymIssue": "TAXON_HOMONYM", + "idPreOccurrence": "ID_PRE_OCCURRENCE", + "invalidScientificName": "INVALID_SCIENTIFIC_NAME", + "invertedCoordinates": "PRESUMED_SWAPPED_COORDINATE", + "invalidGeodeticDatum": "GEODETIC_DATUM_INVALID", + "locationNotSupplied": "LOCATION_NOT_SUPPLIED", + "minMaxAltitudeReversed": "ELEVATION_MIN_MAX_SWAPPED", + "minMaxDepthReversed": "DEPTH_MIN_MAX_SWAPPED", + "missingCollectionDate": "MISSING_COLLECTION_DATE", + "missingGeodeticDatum": "MISSING_GEODETICDATUM", + "missingGeorefencedBy": "MISSING_GEOREFERENCEDBY", + "missingGeoreferenceProtocol": "MISSING_GEOREFERENCEPROTOCOL", + "missingGeoreferenceSources": "MISSING_GEOREFERENCESOURCES", + "missingGeoreferenceVerificationStatus": "MISSING_GEOREFERENCEVERIFICATIONSTATUS", + "missingTaxonRank": "MISSING_TAXONRANK", + "nameNotRecognised": "TAXON_MATCH_NONE", + "nameNotSupplied": "NAME_NOT_SUPPLIED", + "negatedLatitude": "PRESUMED_NEGATED_LATITUDE", + "negatedLongitude": "PRESUMED_NEGATED_LONGITUDE", + "resourceTaxonomicScopeMismatch": "TAXON_SCOPE_MISMATCH", + "stateCoordinateMismatch": "STATE_COORDINATE_MISMATCH", + "uncertaintyInPrecision": "UNCERTAINTY_IN_PRECISION", + "uncertaintyNotSpecified": "UNCERTAINTY_NOT_SPECIFIED", + "unknownCountry": "UNKNOWN_COUNTRY_NAME", + "unknownKingdom": "UNKNOWN_KINGDOM", + "unrecognisedCollectionCode": "COLLECTION_MATCH_NONE", + "unrecognizedGeodeticDatum": "GEODETIC_DATUM_INVALID", + "unrecognisedInstitutionCode": "INSTITUTION_MATCH_NONE", + "unrecognisedOccurrenceStatus": "OCCURRENCE_STATUS_UNPARSABLE", + "unrecognisedTypeStatus": "TYPE_STATUS_INVALID", + "zeroCoordinates": "ZERO_COORDINATE" + }, + "basisOfRecord": { + "HumanObservation": "HUMAN_OBSERVATION", + "PreservedSpecimen": "PRESERVED_SPECIMEN", + "MachineObservation": "MACHINE_OBSERVATION", + "MaterialSample": "MATERIAL_SAMPLE", + "LivingSpecimen": "LIVING_SPECIMEN", + "FossilSpecimen": "FOSSIL_SPECIMEN" + }, + "occurrenceStatus": { + "present": "PRESENT", + "absent": "ABSENT" + }, + "duplicateStatus": { + "D": "ASSOCIATED", + "R": "REPRESENTATIVE" + }, + "biome": { + "Terrestrial": "TERRESTRIAL", + "Marine": "MARINE" + }, + "speciesHabitats": { + "Terrestrial": "TERRESTRIAL", + "Marine": "MARINE" + }, + "speciesGroup": { + "Fish": "Fishes" + }, + "month": { + "01": "1", + "02": "2", + "03": "3", + "04": "4", + "05": "5", + "06": "6", + "07": "7", + "08": "8", + "09": "9" + } + } +} \ No newline at end of file diff --git a/sbdi/data/config/subgroups.json b/sbdi/data/config/subgroups.json new file mode 100644 index 000000000..5a6a16f61 --- /dev/null +++ b/sbdi/data/config/subgroups.json @@ -0,0 +1,626 @@ +[ + { + "speciesGroup": "Mammals", + "taxonRank": "order", + "taxa": [ + { + "name": "DASYUROMORPHIA", + "common": "Carnivorous Marsupials" + }, + { + "name": "DIPROTODONTIA", + "common": "Herbivorous Marsupials" + }, + { + "name": "NOTORYCTEMORPHIA", + "common": "Marsupial Moles" + }, + { + "name": "PERAMELEMORPHIA", + "common": "Bandicoots, Bilbies" + }, + { + "name": "MONOTREMATA", + "common": "Monotremes" + }, + { + "name": "ARTIODACTYLA", + "common": "Even-toed hoofed" + }, + { + "name": "CARNIVORA", + "common": "Carnivores" + }, + { + "name": "CETACEA", + "common": "Dolphins, Porpoises, Whales" + }, + { + "name": "CHIROPTERA", + "common": "Bats" + }, + { + "name": "INSECTIVORA", + "common": "Shrews, Hedgehogs" + }, + { + "name": "LAGOMORPHA", + "common": "Hares, Pikas, Rabbits" + }, + { + "name": "PERRISODACTYLA", + "common": "Odd-toed hoofed" + }, + { + "name": "RODENTIA", + "common": "Rodents" + }, + { + "name": "SIRENIA", + "common": "Dugongs, Manatees, Sea Cows" + } + ] + }, + { + "speciesGroup": "Birds", + "taxonRank": "order", + "taxa": [ + { + "name": "ANSERIFORMES", + "common": "Ducks, Geese, Swans" + }, + { + "name": "APODIFORMES", + "common": "Hummingbirds, Swifts" + }, + { + "name": "CAPRIMULGIFORMES", + "common": "Nightjars, Frogmouths, Potoos" + }, + { + "name": "CHARADRIIFORMES", + "common": "Waders, Gulls, Auks" + }, + { + "name": "CICONIIFORMES", + "common": "Bitterns, Ibises" + }, + { + "name": "COLUMBIFORMES", + "common": "Doves" + }, + { + "name": "CORACIIFORMES", + "common": "Kingfishers" + }, + { + "name": "CUCULIFORMES", + "common": "Cuckoos" + }, + { + "name": "FALCONIFORMES", + "common": "Falcons" + }, + { + "name": "GALLIFORMES", + "common": "Fowls" + }, + { + "name": "GRUIFORMES", + "common": "Cranes" + }, + { + "name": "PASSERIFORMES", + "common": "Perching Birds" + }, + { + "name": "PELECANIFORMES", + "common": "Large waterbirds" + }, + { + "name": "PHOENICOPTERIFORMES", + "common": "Flamingos" + }, + { + "name": "PODICIPEDIFORMES", + "common": "Grebes" + }, + { + "name": "PROCELLARIIFORMES", + "common": "Petrels, Fulmars" + }, + { + "name": "PSITTACIFORMES", + "common": "Parrots" + }, + { + "name": "SPHENISCIFORMES", + "common": "Penguins" + }, + { + "name": "STRIGIFORMES", + "common": "Owls" + }, + { + "name": "STRUTHIONIFORMES", + "common": "Ostriches" + }, + { + "name": "TURNICIFORMES", + "common": "Buttonquails" + } + ] + }, + { + "speciesGroup": "Insects and Spiders", + "taxonRank": "order", + "taxa": [ + { + "name": "ARCHAEOGNATHA", + "common": "Bristletails" + }, + { + "name": "BLATTODEA", + "common": "Cockroaches, Termites" + }, + { + "name": "COLEOPTERA", + "common": "Beetles" + }, + { + "name": "DERMAPTERA", + "common": "Earwigs" + }, + { + "name": "DIPTERA", + "common": "Flies, Mosquitoes" + }, + { + "name": "EMBIOPTERA", + "common": "Webspinners" + }, + { + "name": "EPHEMEROPTERA", + "common": "Mayflies, Shadlfies" + }, + { + "name": "HEMIPTERA", + "common": "Cicadas, Aphids, Planthoppers, Leafhoppers, Shield Bugs" + }, + { + "name": "HYMENOPTERA", + "common": "Wasps, Ants, Bees, Sawflies" + }, + { + "name": "LEPIDOPTERA", + "common": "Butterflies, Moths" + }, + { + "name": "MANTODEA", + "common": "Mantises" + }, + { + "name": "MECOPTERA", + "common": "Scorpionflies, Hangingflies" + }, + { + "name": "MEGALOPTERA", + "common": "Alderflies, Dobsonflies, Fishflies" + }, + { + "name": "NEUROPTERA", + "common": "Lacewings, Mantidflies, Antlions" + }, + { + "name": "ODONATA", + "common": "Dragonflies, Damselflies" + }, + { + "name": "ORTHOPTERA", + "common": "Grasshoppers, Crickets, Locusts, Katydids, Weta, Lubber" + }, + { + "name": "PHASMIDA", + "common": "Stick Insects, Phasmids" + }, + { + "name": "PHTHIRAPTERA", + "common": "Lice" + }, + { + "name": "PLECOPTERA", + "common": "Stoneflies" + }, + { + "name": "PSOCOPTERA", + "common": "Booklice, Barklice, Barkflies" + }, + { + "name": "SIPHONAPTERA", + "common": "Fleas" + }, + { + "name": "STREPSIPTERA", + "common": "Twisted-Wing Parasites" + }, + { + "name": "THYSANOPTERA", + "common": "Thrips" + }, + { + "name": "TRICHOPTERA", + "common": "Caddisflies, Sedge-flies or Rail-flies" + }, + { + "name": "ZORAPTERA", + "common": "Zorapterans" + }, + { + "name": "ZYGENTOMA", + "common": "Silverfish" + }, + { + "name": "ARANEAE", + "common": "Spiders" + } + ] + }, + { + "speciesGroup": "Amphibians", + "taxonRank": "family", + "taxa": [ + { + "name": "BUFONIDAE", + "common": "True Toads" + }, + { + "name": "HYLIDAE", + "common": "Tree Frogs" + }, + { + "name": "MICROHYLIDAE", + "common": "Narrow-Mouthed Frogs" + }, + { + "name": "MYOBATRACHIDAE", + "common": "Australian Ground Frogs" + }, + { + "name": "RANIDAE", + "common": "True Frogs" + } + ] + }, + { + "speciesGroup": "Reptiles", + "taxonRank": "order", + "taxa": [ + { + "name": "CROCODYLIA", + "common": "Crocodiles" + }, + { + "name": "SQUAMATA", + "common": "Lizards, Snakes" + }, + { + "name": "TESTUDINES", + "common": "Tortoises, Turtles, Terrapins" + } + ] + }, + { + "speciesGroup": "Fish", + "taxonRank": "order", + "taxa": [ + { + "name": "MYXINIFORMES", + "common": "Hagfishes" + }, + { + "name": "CARCHARHINIFORMES", + "common": "Ground Sharks" + }, + { + "name": "HETERODONTIFORMES", + "common": "Bullhead Sharks" + }, + { + "name": "HEXANCHIFORMES", + "common": "Cow Sharks" + }, + { + "name": "LAMNIFORMES", + "common": "Mackerel Sharks" + }, + { + "name": "MYLIOBATIFORMES", + "common": "Batoids" + }, + { + "name": "ORECTOLOBIFORMES", + "common": "Carpet Sharks" + }, + { + "name": "PRISTIFORMES", + "common": "Sawfish" + }, + { + "name": "PRISTIOPHORIFORMES", + "common": "Saw Sharks" + }, + { + "name": "RAJIFORMES", + "common": "Softnose Skates" + }, + { + "name": "RHINOBATIFORMES", + "common": "Guitarfish" + }, + { + "name": "SQUALIFORMES", + "common": "Dogfish Sharks" + }, + { + "name": "SQUATINIFORMES", + "common": "Angel Sharks" + }, + { + "name": "TORPEDINIFORMES", + "common": "Electric Rays" + }, + { + "name": "CHIMAERIFORMES", + "common": "Chimaeras" + }, + { + "name": "CERATODONTIFORMES", + "common": "Lungfish" + }, + { + "name": "CLUPEIFORMES", + "common": "Anchovies " + }, + { + "name": "ALBULIFORMES", + "common": "Bonefishes" + }, + { + "name": "ANGUILLIFORMES", + "common": "Eels" + }, + { + "name": "ELOPIFORMES", + "common": "Tarpons" + }, + { + "name": "NOTACANTHIFORMES", + "common": "Spiny Eels" + }, + { + "name": "SACCOPHARYNGIFORMES", + "common": "Sackpharynx Fishes" + }, + { + "name": "ATHERINIFORMES", + "common": "Rainbow Fishes" + }, + { + "name": "BELONIFORMES", + "common": "Halfbeeks" + }, + { + "name": "BERYCIFORMES", + "common": "Ray-finned fishes" + }, + { + "name": "CYPRINODONTIFORMES", + "common": "Killifishes" + }, + { + "name": "GASTEROSTEIFORMES", + "common": "Dragonfishes" + }, + { + "name": "MUGILIFORMES", + "common": "Mullet fish" + }, + { + "name": "PERCIFORMES", + "common": "Perch-like Fishes" + }, + { + "name": "PLEURONECTIFORMES", + "common": "Flatfishes" + }, + { + "name": "SCORPAENIFORMES", + "common": "Scorpion Fishes, Sculpins" + }, + { + "name": "STEPHANOBERYCIFORMES", + "common": "Deep-sea ray-finned fishes" + }, + { + "name": "SYNBRANCHIFORMES", + "common": "Swamp Eels" + }, + { + "name": "TETRAODONTIFORMES", + "common": "Cowfishes" + }, + { + "name": "ZEIFORMES", + "common": "Boarfishes" + }, + { + "name": "AULOPIFORMES", + "common": "Marine ray-finned fish" + }, + { + "name": "LAMPRIDIFORMES", + "common": "Opahs" + }, + { + "name": "CYPRINIFORMES", + "common": "Minnows" + }, + { + "name": "GONORHYNCHIFORMES", + "common": "Milkfishes" + }, + { + "name": "SILURIFORMES", + "common": "Catfishes" + }, + { + "name": "BATRACHOIDIFORMES", + "common": "Batrachoidiforms" + }, + { + "name": "GADIFORMES", + "common": "Cods" + }, + { + "name": "LOPHIIFORMES", + "common": "Anglerfishes" + }, + { + "name": "OPHIDIIFORMES", + "common": "Ophidiiforms" + }, + { + "name": "POLYMIXIIFORMES", + "common": "Beardfishes" + }, + { + "name": "ARGENTINIFORMES", + "common": "Baldfishes,Tubeshoulders" + }, + { + "name": "SALMONIFORMES", + "common": "Salmons" + }, + { + "name": "MYCTOPHIFORMES", + "common": "Latern Fishes, Neoscopelids" + }, + { + "name": "ATELEOPODIFORMES", + "common": "Jellynose Fishes" + }, + { + "name": "STOMIIFORMES", + "common": "Deep-sea ray-finned fishes" + }, + { + "name": "OSTEOGLOSSIFORMES", + "common": "Bonytongues" + } + ] + }, + { + "speciesGroup": "Molluscs", + "taxonRank": "class", + "taxa": [ + { + "name": "APLACOPHORA", + "common": "Solenogasters" + }, + { + "name": "BIVALVIA", + "common": "Mussels, Clams" + }, + { + "name": "CEPHALOPODA", + "common": "Cuttlefish" + }, + { + "name": "GASTROPODA", + "common": "Gastropods, Slugs, Snails" + }, + { + "name": "POLYPLACOPHORA", + "common": "Chitons" + }, + { + "name": "SCAPHOPODA", + "common": "Tooth Shells" + } + ] + }, + { + "speciesGroup": "Crustaceans", + "taxonRank": "class", + "taxa": [ + { + "name": "BRANCHIOPODA", + "common": "Fairy shrimp, Clam shrimp" + }, + { + "name": "MALACOSTRACA", + "common": "Crabs, Lobsters" + }, + { + "name": "MAXILLOPODA", + "common": "Barnacles, Copepods" + }, + { + "name": "OSTRACODA", + "common": "Seed shrimp" + } + ] + }, + { + "speciesGroup": "Plants", + "facetName": "species_group", + "taxa": [ + { + "name": "Monocots", + "common": "Monocots" + }, + { + "name": "Dicots", + "common": "Dicots" + }, + { + "name": "Angiosperms", + "common": "Flowering plants" + }, + { + "name": "FernsAndAllies", + "common": "Ferns and Allies" + }, + { + "name": "Gymnosperms", + "common": "Conifers, Cycads" + } + ] + }, + { + "speciesGroup": "Fungi", + "taxonRank": "phylum", + "taxa": [ + { + "name": "Ascomycota", + "common": "Asco's" + }, + { + "name": "Basidiomycota", + "common": "Basidio's" + }, + { + "name": "Chytridiomycota", + "common": "Chytrids" + }, + { + "name": "Zygomycota", + "common": "Zygomycetes" + }, + { + "name": "Glomeromycota", + "common": "Glomeromycota" + } + ] + } +] \ No newline at end of file