In [1]:
# Script to build Markdown pages that provide term metadata for complex vocabularies
# Steve Baskauf 2020-06-28 CC0
# This script merges static Markdown header and footer documents with term information tables (in Markdown) generated from data in the rs.tdwg.org repo from the TDWG Github site

import re
import requests   # best library to manage HTTP transactions
import csv        # library to read/write/parse CSV files
import json       # library to convert JSON to Python data structures
import pandas as pd

# On my Jupyter notebook, I had to use this hack to avoid a certificate problem:
import ssl
ssl._create_default_https_context = ssl._create_unverified_context

# -----------------
# Configuration section
# -----------------

# !!!! NOTE !!!!
# There is not currently an example of a complex vocabulary that has the column headers
# used in the sample files. In order to test this script, it uses the Audubon Core files,
# which have headers that differ from the samples. So throughout the code, there are
# pairs of lines where the default header names are commented out and the Audubon Core
# headers are not. To build a page using the sample files, you will need to reverse the
# commenting of these pairs.

# This is the base URL for raw files from the branch of the repo that has been pushed to GitHub
githubBaseUri = 'https://raw.githubusercontent.com/mfvargas/rs.tdwg.org/master/'
#githubBaseUri = 'https://raw.githubusercontent.com/baskaufs/plic.rs.tdwg.org/master/'

headerFileName = 'termlist-header.md'
footerFileName = 'termlist-footer.md'
outFileName = 'pc.md'

# This is a Python list of the database names of the term lists to be included in the document.
termLists = ['plic']
#termLists = ['pathway']

# NOTE! There may be problems unless every term list is of the same vocabulary type since the number of columns will differ
# However, there probably aren't any circumstances where mixed types will be used to generate the same page.
vocab_type = 2 # 1 is simple vocabulary, 2 is simple controlled vocabulary, 3 is c.v. with broader hierarchy

# Terms in large vocabularies like Darwin and Audubon Cores may be organized into categories using tdwgutility_organizedInClass
# If so, those categories can be used to group terms in the generated term list document.
organized_in_categories = True

# If organized in categories, the display_order list must contain the IRIs that are values of tdwgutility_organizedInClass
# If not organized into categories, the value is irrelevant. There just needs to be one item in the list.
display_order = ["DatasetMetadata", "RecordMetadata", "NomenclatureAndClassification", "TaxonomicDescription", "LifeForm", "LifeCycle", "Reproduction", "AnnualCycles", "Feeding", "Dispersal", "Behavior", "Interactions", "MolecularData", "Migratory", "EcologicalSignificance", "EnvironmentalEnvelope", "NaturalHistory", "Invasiveness", "HabitatAndDistribution", "DemographyAndThreat", "UsesManagementAndConservation", "AssociatedParty", "AncillaryData"]
display_label = ["Dataset Metadata", "Record Metadata", "Nomenclature And Classification", "Taxonomic Description", "Life Form", "Life Cycle", "Reproduction", "Annual Cycles", "Feeding", "Dispersal", "Behavior", "Interactions", "Molecular Data", "Migratory", "Ecological Significance", "Environmental Envelope", "Natural History", "Invasiveness", "Habitat And Distribution", "Demography And Threat", "Uses Management And Conservation", "Associated Party", "Ancillary Data"]
display_comments = ['Information about the collections of records.',
"Information about the Version, Revision, the language and target audiences of the Taxon Record.",
"Information on the taxon's name, synomyms, nomenclatural status, common names, and taxonomic hierarchy; plus misc. details and ancillary data",
'Description of Taxon: Brief description, Full description, Identification keys and Ancillary Data',
'General appearance. Characteristic mode of growth or occurrence associated to the environment, particularly for plants. Comprising the size, shape, texture and orientation',
'Life history of a living organism: The course of developmental changes in an organism from fertilized zygote to maturity or stages through which an organism passes.',
'All data related to the generation of offspring',
'Set of changes or events that recurrently take place at the same time of year and are influenced by seasonal and interannual climate variations. For example: reproduction, flowering, fruiting, emergence of insects, etc. Also called phenology, mainly in plants. In the case of migration, it only makes reference to the timing; other data about this are gathered in MigratoryData.',
'Information related to the food supply for the development and sustenance of the individual and/or its offspring',
'The permanent spreading of individuals away from each other not including return. Dispersal determines the range over which genetic mixing occurs, and thus, the degree of homogeneity and inbreeding in a population.',
'Responses, reactions or movements made by an organism in a particular situation',
'Mutual or reciprocal actions or influences. For example, predation, parasitism, mutualism, etc. Relations with products grown and stored by man (plagues) are also included.',
'Information on the chemical structures and biological processes at the molecular level: DNA and proteins sequences, protein structures, expression profiles of genes protein domains, families of genes, mutations, polymorphisms, involvement in disease, ... Placeholder for connecting with standards developed by specialists, e.g. Standards for Data Exchange and Management from Scalalife (http://www.scalalife.eu/content/data.html).',
'The regular, usually seasonal, movement of all or part of an animal population to and from a given area, which can occur in variable periods of time and even involve more than one generation.',
'Ecological importance of the taxon',
'Set of environmental conditions within which it is believed that the species can persist; i.e. where its environmental requirements can be satisfied.',
'Relevant descriptive information about the species: Life Form, Life Cycle, Reproduction, Annual Cycles, Feeding, Dispersal, Behavior, Interactions, Molecular Data, Migratory, Ecological Significance, Miscellaneous Details, Environmental Envelope and Ancillary Data.',
'Information about invasive exotic species that could threaten ecosystems, habitats and species',
'Habitat and Distribution of the species',
'Information concerning the demographic aspects of the species: Territory, Population Biology, Threat Status, Direct Threats, Legislation and Ancillary Data',
'Uses: Ways in which species are utilized by people, including Folklore. Management: actions aimed at conserving or recovering species. Conservation status',
'A party associated with the resource. Parties have particular role. (EML-RESOURCE)',
'The AncillaryData element contains information that could be an image, an audio, a list of references, a list of agents, rights, licenses...related with the element it belongs. Its definition is borrowed from the dataObjectBaseType element of the EOL scheme.']
display_id = ["DatasetMetadata", "RecordMetadata", "NomenclatureAndClassification", "TaxonomicDescription", "LifeForm", "LifeCycle", "Reproduction", "AnnualCycles", "Feeding", "Dispersal", "Behavior", "Interactions", "MolecularData", "Migratory", "EcologicalSignificance", "EnvironmentalEnvelope", "NaturalHistory", "Invasiveness", "HabitatAndDistribution", "DemographyAndThreat", "UsesManagementAndConservation", "AssociatedParty", "AncillaryData"]

#display_order = ['']
#display_label = ['Vocabulary'] # these are the section labels for the categories in the page
#display_comments = [''] # these are the comments about the category to be appended following the section labels
#display_id = ['Vocabulary'] # these are the fragment identifiers for the associated sections for the categories

# ---------------
# Function definitions
# ---------------

# replace URL with link
#
def createLinks(text):
    def repl(match):
        if match.group(1)[-1] == '.':
            return '<a href="' + match.group(1)[:-1] + '">' + match.group(1)[:-1] + '</a>.'
        return '<a href="' + match.group(1) + '">' + match.group(1) + '</a>'

    pattern = '(https?://[^\s,;\)"]*)'
    result = re.sub(pattern, repl, text)
    return result

In [2]:
term_lists_info = []

# *** for whatever reason, these characteristics didn't make it into the term lists data, so this is a hack

#frame = pd.read_csv(githubBaseUri + 'term-lists/term-lists.csv', na_filter=False)
frame = pd.read_csv(githubBaseUri + 'plic/plic.csv', na_filter=False)

for termList in termLists:
    term_list_dict = {'list_iri': termList}
    term_list_dict = {'database': termList}
    for index,row in frame.iterrows():
        if row['database'] == termList:
            term_list_dict['pref_ns_prefix'] = row['vann_preferredNamespacePrefix']
            term_list_dict['pref_ns_uri'] = row['vann_preferredNamespaceUri']
            # term_list_dict['list_iri'] = row['list']  # *** also a hack here
    term_lists_info.append(term_list_dict)
print(term_lists_info)

[{'database': 'plic', 'pref_ns_prefix': 'plic', 'pref_ns_uri': 'http://rs.tdwg.org/plic/terms/'}]


In [3]:
# Create column list
#column_list = ['pref_ns_prefix', 'pref_ns_uri', 'term_localName', 'label', 'rdfs_comment', 'skos_scopeNote', 'dcterms_description', 'examples', 'term_modified', 'term_deprecated', 'rdf_type']
column_list = ['pref_ns_prefix', 'pref_ns_uri', 'term_localName', 'label', 'definition', 'usage', 'notes', 'examples', 'equivalentXPath', 'term_modified', 'term_deprecated', 'type']
if vocab_type == 2:
    column_list += ['controlled_value_string']
elif vocab_type == 3:
    column_list += ['controlled_value_string', 'skos_broader']
if organized_in_categories:
    column_list.append('tdwgutility_organizedInClass')
column_list.append('version_iri')

# Create list of lists metadata table
table_list = []
for term_list in term_lists_info:
    # retrieve versions metadata for term list
    versions_url = githubBaseUri + term_list['database'] + '-versions/' + term_list['database'] + '-versions.csv'
    versions_df = pd.read_csv(versions_url, na_filter=False)
    
    # retrieve current term metadata for term list
    data_url = githubBaseUri + term_list['database'] + '/' + term_list['database'] + '.csv'
    frame = pd.read_csv(data_url, na_filter=False)
    for index,row in frame.iterrows():
        #row_list = [term_list['pref_ns_prefix'], term_list['pref_ns_uri'], row['term_localName'], row['label'], row['rdfs_comment'], row['skos_scopeNote'], row['dcterms_description'], row['examples'], row['term_modified'], row['term_deprecated'], row['rdf_type']]
        row_list = [term_list['pref_ns_prefix'], term_list['pref_ns_uri'], row['term_localName'], row['label'], row['definition'], row['usage'], row['notes'], row['examples'], row['equivalentXPath'], row['term_modified'], row['term_deprecated'], row['type']]
        if vocab_type == 2:
            if 'controlled_value_string' in row:
                row_list += [row['controlled_value_string']]
            else:
                row_list += ['']
        elif vocab_type == 3:
            if row['skos_broader'] =='':
                row_list += [row['controlled_value_string'], '']
            else:
                row_list += [row['controlled_value_string'], term_list['pref_ns_prefix'] + ':' + row['skos_broader']]
        if organized_in_categories:
            row_list.append(row['tdwgutility_organizedInClass'])

        # Borrowed terms really don't have implemented versions. They may be lacking values for version_status.
        # In their case, their version IRI will be omitted.
        found = False
        for vindex, vrow in versions_df.iterrows():
            if vrow['term_localName']==row['term_localName'] and vrow['version_status']=='recommended':
                found = True
                version_iri = vrow['version']
                # NOTE: the current hack for non-TDWG terms without a version is to append # to the end of the term IRI
                if version_iri[len(version_iri)-1] == '#':
                    version_iri = ''
        if not found:
            version_iri = ''
        row_list.append(version_iri)

        table_list.append(row_list)

# Turn list of lists into dataframe
terms_df = pd.DataFrame(table_list, columns = column_list)

terms_sorted_by_label = terms_df.sort_values(by='label')
terms_sorted_by_localname = terms_df.sort_values(by='term_localName')
terms_sorted_by_label

Unnamed: 0,pref_ns_prefix,pref_ns_uri,term_localName,label,definition,usage,notes,examples,equivalentXPath,term_modified,term_deprecated,type,controlled_value_string,tdwgutility_organizedInClass,version_iri
5,plic,http://rs.tdwg.org/plic/terms/,abstract,Abstract,A brief summary of the most relevant or attrac...,,Example: Blue whale is the largest mammal in t...,,/Dataset/Metadata/dataset/abstract|/Dataset/Ta...,2023-10-10,,http://www.w3.org/1999/02/22-rdf-syntax-ns#Pro...,,RecordMetadata,http://rs.tdwg.org/plic/terms/version/abstract...
176,plic,http://rs.tdwg.org/plic/terms/,abundance,Abundance,"Controlled vocabulary: Dominant, Common, Rare,...",,,,/Dataset/TaxonRecord/Invasiveness/Invasiveness...,2023-10-10,,http://www.w3.org/1999/02/22-rdf-syntax-ns#Pro...,,Invasiveness,http://rs.tdwg.org/plic/terms/version/abundanc...
82,plic,http://rs.tdwg.org/plic/terms/,AbundanceData,AbundanceData,The relative representation of a species popul...,,,,/Dataset/TaxonRecord/DemographyAndThreat/Popul...,2023-10-10,,http://www.w3.org/1999/02/22-rdf-syntax-ns#Pro...,,DemographyAndThreat,http://rs.tdwg.org/plic/terms/version/Abundanc...
132,plic,http://rs.tdwg.org/plic/terms/,Actions,Actions,Individual tasks in which a management project...,,,,/Dataset/TaxonRecord/UsesManagementAndConserva...,2023-10-10,,http://www.w3.org/1999/02/22-rdf-syntax-ns#Pro...,,UsesManagementAndConservation,http://rs.tdwg.org/plic/terms/version/Actions-...
133,plic,http://rs.tdwg.org/plic/terms/,ActionsType,Actions Type,Each action must have one type. In the case th...,,,,,2023-10-10,,http://www.w3.org/1999/02/22-rdf-syntax-ns#Pro...,,UsesManagementAndConservation,http://rs.tdwg.org/plic/terms/version/ActionsT...
...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...
96,plic,http://rs.tdwg.org/plic/terms/,synonymsUnstructured,unstructured synonyms,list of synonyms elements in a text block format.,,,,/Dataset/TaxonRecord/NomenclatureAndClassifica...,2023-10-10,,http://www.w3.org/1999/02/22-rdf-syntax-ns#Pro...,,NomenclatureAndClassification,http://rs.tdwg.org/plic/terms/version/synonyms...
103,plic,http://rs.tdwg.org/plic/terms/,taxonomicDescriptionUnstructured,unstructured taxonomic description,TaxonomicDescription element in text block for...,,,,/Dataset/TaxonRecord/TaxonomicDescription/Taxo...,2023-10-10,,http://www.w3.org/1999/02/22-rdf-syntax-ns#Pro...,,TaxonomicDescription,http://rs.tdwg.org/plic/terms/version/taxonomi...
104,plic,http://rs.tdwg.org/plic/terms/,territoryUnstructured,unstructured territory,"Information associated mostly to vertebrates, ...",,,,/Dataset/TaxonRecord/DemographyAndThreat/Terri...,2023-10-10,,http://www.w3.org/1999/02/22-rdf-syntax-ns#Pro...,,DemographyAndThreat,http://rs.tdwg.org/plic/terms/version/territor...
109,plic,http://rs.tdwg.org/plic/terms/,threatStatusUnstructured,unstructured threat status,Information about the status of the taxon.As a...,,,,/Dataset/TaxonRecord/DemographyAndThreat/Threa...,2023-10-10,,http://www.w3.org/1999/02/22-rdf-syntax-ns#Pro...,,DemographyAndThreat,http://rs.tdwg.org/plic/terms/version/threatSt...


Run the following cell to generate an index sorted alphabetically by lowercase term local name. Omit this index if the terms have opaque local names.

In [4]:
# generate the index of terms grouped by category and sorted alphabetically by lowercase term local name

text = '### 3.1 Index By Term Name\n\n'
text += '(See also [3.2 Index By Label](#32-index-by-label))\n\n'

text += '**Classes**\n'
text += '\n'
for row_index,row in terms_sorted_by_localname.iterrows():
    if row['type'] == 'http://www.w3.org/2000/01/rdf-schema#Class':
        curie = row['pref_ns_prefix'] + ":" + row['term_localName']
        curie_anchor = curie.replace(':','_')
        text += '[' + curie + '](#' + curie_anchor + ') |\n'
text = text[:len(text)-2] # remove final trailing vertical bar and newline
text += '\n\n' # put back removed newline

for category in range(0,len(display_order)):
    text += '**' + display_label[category] + '**\n'
    text += '\n'
    if organized_in_categories:
        filtered_table = terms_sorted_by_localname[terms_sorted_by_localname['tdwgutility_organizedInClass']==display_order[category]]
        filtered_table.reset_index(drop=True, inplace=True)
    else:
        filtered_table = terms_sorted_by_localname
        filtered_table.reset_index(drop=True, inplace=True)
        
    for row_index,row in filtered_table.iterrows():
        curie = row['pref_ns_prefix'] + ":" + row['term_localName']
        curie_anchor = curie.replace(':','_')
        text += '[' + curie + '](#' + curie_anchor + ') |\n'
    text = text[:len(text)-2] # remove final trailing vertical bar and newline
    text += '\n\n' # put back removed newline

index_by_name = text

print(index_by_name)

### 3.1 Index By Term Name

(See also [3.2 Index By Label](#32-index-by-label))

**Classes**

[plic:AnnualCycleAtomized](#plic_AnnualCycleAtomized) |
[plic:AnnualCycles](#plic_AnnualCycles) |
[plic:BaseElements](#plic_BaseElements) |
[plic:Behavior](#plic_Behavior) |
[plic:DirectThreats](#plic_DirectThreats) |
[plic:Dispersal](#plic_Dispersal) |
[plic:EcologicalSignificance](#plic_EcologicalSignificance) |
[plic:EnvironmentalEnvelope](#plic_EnvironmentalEnvelope) |
[plic:Feeding](#plic_Feeding) |
[plic:FullDescription](#plic_FullDescription) |
[plic:Hierarchy](#plic_Hierarchy) |
[plic:Interactions](#plic_Interactions) |
[plic:Legislation](#plic_Legislation) |
[plic:LifeForm](#plic_LifeForm) |
[plic:ManagementAndConservationAtomized](#plic_ManagementAndConservationAtomized) |
[plic:ManagementAndConservationType](#plic_ManagementAndConservationType) |
[plic:Migratory](#plic_Migratory) |
[plic:MigratoryAtomized](#plic_MigratoryAtomized) |
[plic:MolecularData](#plic_MolecularData) |
[plic:

Run the following cell to generate an index by term label

In [5]:
text = '\n\n'

# Comment out the following two lines if there is no index by local names
text = '### 3.2 Index By Label\n\n'
text += '(See also [3.1 Index By Term Name](#31-index-by-term-name))\n\n'

text += '**Classes**\n'
text += '\n'
for row_index,row in terms_sorted_by_label.iterrows():
    if row['type'] == 'http://www.w3.org/2000/01/rdf-schema#Class':
        curie_anchor = row['pref_ns_prefix'] + "_" + row['term_localName']
        text += '[' + row['label'] + '](#' + curie_anchor + ') |\n'
text = text[:len(text)-2] # remove final trailing vertical bar and newline
text += '\n\n' # put back removed newline

for category in range(0,len(display_order)):
    if organized_in_categories:
        text += '**' + display_label[category] + '**\n'
        text += '\n'
        filtered_table = terms_sorted_by_label[terms_sorted_by_label['tdwgutility_organizedInClass']==display_order[category]]
        filtered_table.reset_index(drop=True, inplace=True)
    else:
        filtered_table = terms_sorted_by_label
        filtered_table.reset_index(drop=True, inplace=True)
        
    for row_index,row in filtered_table.iterrows():
        if row_index == 0 or (row_index != 0 and row['label'] != filtered_table.iloc[row_index - 1].loc['label']): # this is a hack to prevent duplicate labels
            curie_anchor = row['pref_ns_prefix'] + "_" + row['term_localName']
            text += '[' + row['label'] + '](#' + curie_anchor + ') |\n'
    text = text[:len(text)-2] # remove final trailing vertical bar and newline
    text += '\n\n' # put back removed newline

index_by_label = text

print(index_by_label)

### 3.2 Index By Label

(See also [3.1 Index By Term Name](#31-index-by-term-name))

**Classes**

[Annual Cycles](#plic_AnnualCycles) |
[Atomized Annual Cycle](#plic_AnnualCycleAtomized) |
[Atomized Uses](#plic_UsesAtomized) |
[Base Elements](#plic_BaseElements) |
[Behavior](#plic_Behavior) |
[Direct Threats](#plic_DirectThreats) |
[Dispersal](#plic_Dispersal) |
[Ecological Significance](#plic_EcologicalSignificance) |
[Environmental Envelope](#plic_EnvironmentalEnvelope) |
[Feeding](#plic_Feeding) |
[Full Description](#plic_FullDescription) |
[Hierarchy](#plic_Hierarchy) |
[Interactions](#plic_Interactions) |
[Legislation](#plic_Legislation) |
[Life Form](#plic_LifeForm) |
[Management and Conservation Atomized](#plic_ManagementAndConservationAtomized) |
[Management and Conservation Type](#plic_ManagementAndConservationType) |
[Record Metadata](#plic_RecordMetadata) |
[References](#plic_References) |
[Taxon Record](#plic_TaxonRecord) |
[atomized migratory](#plic_MigratoryAtomized) |
[a

In [6]:
decisions_df = pd.read_csv('https://raw.githubusercontent.com/tdwg/rs.tdwg.org/master/decisions/decisions-links.csv', na_filter=False)

# generate a table for each term, with terms grouped by category

# generate the Markdown for the terms table
text = '## 4 Vocabulary\n'
for category in range(0,len(display_order)):
    # print(category)
    if organized_in_categories:
        text += '### 4.' + str(category + 1) + ' ' + display_label[category] + '\n'
        text += '\n'
        text += display_comments[category] # insert the comments for the category, if any.
        filtered_table = terms_sorted_by_localname[terms_sorted_by_localname['tdwgutility_organizedInClass']==display_order[category]]
        filtered_table.reset_index(drop=True, inplace=True)
    else:
        filtered_table = terms_sorted_by_localname
        filtered_table.reset_index(drop=True, inplace=True)

    for row_index,row in filtered_table.iterrows():
        text += '<table>\n'
        curie = row['pref_ns_prefix'] + ":" + row['term_localName']
        curieAnchor = curie.replace(':','_')
        text += '\t<thead>\n'
        text += '\t\t<tr>\n'
        text += '\t\t\t<th colspan="2"><a id="' + curieAnchor + '"></a>Term Name  ' + curie + '</th>\n'
        text += '\t\t</tr>\n'
        text += '\t</thead>\n'
        text += '\t<tbody>\n'
        text += '\t\t<tr>\n'
        text += '\t\t\t<td>Term IRI</td>\n'
        uri = row['pref_ns_uri'] + row['term_localName']
        text += '\t\t\t<td><a href="' + uri + '">' + uri + '</a></td>\n'
        text += '\t\t</tr>\n'
        text += '\t\t<tr>\n'
        text += '\t\t\t<td>Modified</td>\n'
        text += '\t\t\t<td>' + row['term_modified'] + '</td>\n'
        text += '\t\t</tr>\n'

        if row['version_iri'] != '':
            text += '\t\t<tr>\n'
            text += '\t\t\t<td>Term version IRI</td>\n'
            text += '\t\t\t<td><a href="' + row['version_iri'] + '">' + row['version_iri'] + '</a></td>\n'
            text += '\t\t</tr>\n'

        text += '\t\t<tr>\n'
        text += '\t\t\t<td>Label</td>\n'
        text += '\t\t\t<td>' + row['label'] + '</td>\n'
        text += '\t\t</tr>\n'

        if row['term_deprecated'] != '':
            text += '\t\t<tr>\n'
            text += '\t\t\t<td></td>\n'
            text += '\t\t\t<td><strong>This term is deprecated and should no longer be used.</strong></td>\n'
            text += '\t\t</tr>\n'

        text += '\t\t<tr>\n'
        text += '\t\t\t<td>Definition</td>\n'
        #text += '\t\t\t<td>' + row['rdfs_comment'] + '</td>\n'
        text += '\t\t\t<td>' + row['definition'] + '</td>\n'
        text += '\t\t</tr>\n'

        #if row['skos_scopeNote'] != '':
        if row['usage'] != '':
            text += '\t\t<tr>\n'
            text += '\t\t\t<td>Usage</td>\n'
            #text += '\t\t\t<td>' + createLinks(row['skos_scopeNote']) + '</td>\n'
            text += '\t\t\t<td>' + createLinks(row['usage']) + '</td>\n'
            text += '\t\t</tr>\n'

        #if row['dcterms_description'] != '':
        if row['notes'] != '':
            text += '\t\t<tr>\n'
            text += '\t\t\t<td>Notes</td>\n'
            #text += '\t\t\t<td>' + createLinks(row['dcterms_description']) + '</td>\n'
            text += '\t\t\t<td>' + createLinks(row['notes']) + '</td>\n'
            text += '\t\t</tr>\n'

        if row['examples'] != '':
            text += '\t\t<tr>\n'
            text += '\t\t\t<td>Examples</td>\n'
            text += '\t\t\t<td>' + createLinks(row['examples']) + '</td>\n'
            text += '\t\t</tr>\n'

        if row['equivalentXPath'] != '':
            xPathList = row['equivalentXPath'].split('|')
            xPathString = ''
            for xPath in xPathList:
                xPathString += xPath + '<br/>'
            xPathString = xPathString[:-5] # remove trailing break
            text += '\t\t<tr>\n'
            text += '\t\t\t<td>Equivalent XPath</td>\n'
            
            # Cambios propuestos por David Fichtmueller (2022-10-07)
            
            # text += '\t\t\t<td>' + xPathString + '</td>\n'
            # text += '\t\t</tr>\n'
            if (len(xPathList) > 5):
                text += '\t\t\t<td>\n'
                text += '\t\t\t\t<details>\n'
                text += '\t\t\t\t\t<summary>Show all '+str(len(xPathList))+' XPaths</summary>\n'
                text += xPathString + '\n'
                text += '</details>\n'
                text += '</td>\n'
                text += '\t\t</tr>\n'
            else:
                text += '\t\t\t<td>' + xPathString + '</td>\n'
                text += '\t\t</tr>\n'            

        if (vocab_type == 2 or vocab_type == 3) and row['controlled_value_string'] != '': # controlled vocabulary
            text += '\t\t<tr>\n'
            text += '\t\t\t<td>Controlled value</td>\n'
            text += '\t\t\t<td>' + row['controlled_value_string'] + '</td>\n'
            text += '\t\t</tr>\n'

        if vocab_type == 3 and row['skos_broader'] != '': # controlled vocabulary with skos:broader relationships
            text += '\t\t<tr>\n'
            text += '\t\t\t<td>Has broader concept</td>\n'
            curieAnchor = row['skos_broader'].replace(':','_')
            text += '\t\t\t<td><a href="#' + curieAnchor + '">' + row['skos_broader'] + '</a></td>\n'
            text += '\t\t</tr>\n'

        text += '\t\t<tr>\n'
        text += '\t\t\t<td>Type</td>\n'
        #if row['rdf_type'] == 'http://www.w3.org/1999/02/22-rdf-syntax-ns#Property':
        if row['type'] == 'http://www.w3.org/1999/02/22-rdf-syntax-ns#Property':
            text += '\t\t\t<td>Property</td>\n'
        #elif row['rdf_type'] == 'http://www.w3.org/2000/01/rdf-schema#Class':
        elif row['type'] == 'http://www.w3.org/2000/01/rdf-schema#Class':
            text += '\t\t\t<td>Class</td>\n'
        #elif row['rdf_type'] == 'http://www.w3.org/2004/02/skos/core#Concept':
        elif row['type'] == 'http://www.w3.org/2004/02/skos/core#Concept':
            text += '\t\t\t<td>Concept</td>\n'
        else:
            #text += '\t\t\t<td>' + row['rdf_type'] + '</td>\n' # this should rarely happen
            text += '\t\t\t<td>' + row['type'] + '</td>\n' # this should rarely happen
        text += '\t\t</tr>\n'

        # Look up decisions related to this term
        for drow_index,drow in decisions_df.iterrows():
            if drow['linked_affected_resource'] == uri:
                text += '\t\t<tr>\n'
                text += '\t\t\t<td>Executive Committee decision</td>\n'
                text += '\t\t\t<td><a href="http://rs.tdwg.org/decisions/' + drow['decision_localName'] + '">http://rs.tdwg.org/decisions/' + drow['decision_localName'] + '</a></td>\n'
                text += '\t\t</tr>\n'                        

        text += '\t</tbody>\n'
        text += '</table>\n'
        text += '\n'
    text += '\n'
term_table = text

print(term_table)

## 4 Vocabulary
### 4.1 Dataset Metadata

Information about the collections of records.<table>
	<thead>
		<tr>
			<th colspan="2"><a id="plic_Dataset_ID"></a>Term Name  plic:Dataset_ID</th>
		</tr>
	</thead>
	<tbody>
		<tr>
			<td>Term IRI</td>
			<td><a href="http://rs.tdwg.org/plic/terms/Dataset_ID">http://rs.tdwg.org/plic/terms/Dataset_ID</a></td>
		</tr>
		<tr>
			<td>Modified</td>
			<td>2023-10-10</td>
		</tr>
		<tr>
			<td>Term version IRI</td>
			<td><a href="http://rs.tdwg.org/plic/terms/version/Dataset_ID-2023-10-10">http://rs.tdwg.org/plic/terms/version/Dataset_ID-2023-10-10</a></td>
		</tr>
		<tr>
			<td>Label</td>
			<td>Dataset ID</td>
		</tr>
		<tr>
			<td>Definition</td>
			<td>An identifier for the dataset.</td>
		</tr>
		<tr>
			<td>Equivalent XPath</td>
			<td>/Dataset/Metadata/Dataset_ID</td>
		</tr>
		<tr>
			<td>Type</td>
			<td>Property</td>
		</tr>
	</tbody>
</table>

<table>
	<thead>
		<tr>
			<th colspan="2"><a id="plic_References"></a>Term Name  plic:Referenc

Modify to display the indices that you want

In [7]:
#text = index_by_label + term_table
text = index_by_name + index_by_label + term_table

In [8]:
# read in header and footer, merge with terms table, and output

headerObject = open(headerFileName, 'rt', encoding='utf-8')
header = headerObject.read()
headerObject.close()

footerObject = open(footerFileName, 'rt', encoding='utf-8')
footer = footerObject.read()
footerObject.close()

output = header + text + footer
outputObject = open(outFileName, 'wt', encoding='utf-8')
outputObject.write(output)
outputObject.close()
    
print('done')

done
