In [None]:
# This notebook shows how to take some shape information, format it to suit the echoSMs
# anatomical datastore format, add metadata, and write out in TOML format.abs

# This code can be a starting point for writing your own Python code to convert your shape
# data into anatomical datastore TOML files.

# This demo code only creates one dataset with one specimen containing one shape.
# You may find it useful to generalise it to create multiple datassets, specimens,
# and shapes.

import tomli_w
import requests
import copy
import numpy as np
from datetime import date
from pathlib import Path

worms_url = 'https://www.marinespecies.org/rest/'

# Datset. Refer to the schema documentation for explanations of these, including
# data types, allowed values, and which are mandatory.
dataset = {'dataset_id': 'demo_123456',  # needs to be unique across all datasets
           'description': "",
           'anatomical_category': "organism",
           'date_first_added': date.today().strftime('%Y-%m-%d'),  # a placeholder date
           'date_last_modified': date.today().strftime('%Y-%m-%d'),  # a placeholder date
           'aphiaID': 126436,  # visit www.marinespecies.org and find the id for your species
           'class': "",
           'order': "",
           'family': "",
           'genus': "",
           'species': "",
           'vernacular_name': "",
           'reference': '',
           # 'activity_name': "",
           # 'location': "",
           # 'latitude': np.nan,
           # 'latitude_units': 'degrees_north',
           # 'longitude': np.nan,
           # 'longitude_units': 'degrees_east',
           # 'depth': np.nan,
           # 'depth_units': 'm',
           'date_collection': "",
           'date_image': "",
           'investigators': [],
           'data_collection_description': "",
           'note': "",
           'imaging_method': "unknown",
           'shape_method': "unknown",
           'shape_method_processing': "unknown",
           'model_type': "",
           'sound_speed_method': "unknown",
           'mass_density_method': "unknown",
           'dataset_size': 0.0,  # the datatore loader program will overwrite this
           'dataset_size_units': 'megabyte',
           'specimens': []}

In [None]:
# Query WoRMS to get the full species classification naming using the aphia ID.
#  This is just a convenience as you could provide these attributes manually

r = requests.get(worms_url + 'AphiaRecordByAphiaID/' + str(dataset['aphiaID']))
if r.status_code != 200:
    print('Failed to get record for aphiaID of {dataset["aphiaID"]}')
aphia_data = r.json()

# and populate the relevant echoSMs anatomical datastore metadata fields
for attr in ['class', 'order', 'family', 'genus']:
    dataset[attr] = aphia_data[attr]
dataset['species'] = aphia_data['scientificname']

# Get a vernacular name from WoRMS too
r = requests.get(worms_url + 'AphiaVernacularsByAphiaID/' + str(dataset['aphiaID']))
# If there are some for this species, choose one
if r.status_code == 200 and len(vers := r.json()) > 0:
    dataset['vernacular_name'] = vers[0]['vernacular']


In [None]:
# Each dataset can have one or more specimens
specimen = {'specimen_id': 'some specimen name',
            'specimen_condition': 'unknown',
            'length': 0.45,
            'length_units': 'm',
            # 'weight': 0.0,
            # 'weight_units': 'kg',
            'sex': 'unknown',
            'length_type': 'unknown',
            'shape_type': 'outline',
            'shapes': []}

# Each specimen can have one or more shapes
shape = {'shape_units': 'm',
         'boundary': 'pressure-release',
         'x': [],
         'y': [],
         'z': [],
         'height': [],
         'width': [],
         'mass_density': [],
         'mass_density_units': 'kg/m^3',
         'sound_speed_compressional': [],
         'sound_speed_compressional_units': 'm/s'}

# Add the shape into the specimen
specimen['shapes'].append(shape)

# Add the specimen into the dataset
dataset['specimens'].append(specimen)

In [None]:
# Write the entire dataset to a TOML file

# It might also be sensible to do the validatation against the schema here - the 
# datastore loading process does validation, but it's probably better to know about
# validation errors now rather than later.

# Convert any arrays to lists (to facilitate exporting to toml)
ds = copy.deepcopy(dataset)
for ss in ds['specimens']:
    for s in ss['shapes']:
        for k in s.keys():
            if isinstance(s[k], np.ndarray):
                s[k] = s[k].tolist()

# As per the echoSMs datatore guidelines, the TOML file should be placed in 
# a directory with the same name as the dataset_id
dataset_file = Path.home()/'datasets'/dataset['dataset_id']/'metadata.toml'

# Create the directory for the TOML file
Path.mkdir(dataset_file.parent, parents=True, exist_ok=True)

# Write the TOML file
with open(dataset_file, 'wb') as f:
    tomli_w.dump(ds, f)
print('Wrote TOML file to', dataset_file)