In [1]:
!pip install pandas pyarrow SPARQLWrapper rdflib

Collecting pyarrow
  Downloading pyarrow-16.1.0-cp39-cp39-macosx_10_15_x86_64.whl (28.4 MB)
[2K     [90m━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━[0m [32m28.4/28.4 MB[0m [31m2.6 MB/s[0m eta [36m0:00:00[0m00:01[0m00:01[0m
[?25hCollecting SPARQLWrapper
  Downloading SPARQLWrapper-2.0.0-py3-none-any.whl (28 kB)
Collecting rdflib
  Downloading rdflib-7.0.0-py3-none-any.whl (531 kB)
[2K     [90m━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━[0m [32m531.9/531.9 kB[0m [31m5.6 MB/s[0m eta [36m0:00:00[0ma [36m0:00:01[0m
Collecting isodate<0.7.0,>=0.6.0
  Downloading isodate-0.6.1-py2.py3-none-any.whl (41 kB)
[2K     [90m━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━[0m [32m41.7/41.7 kB[0m [31m3.7 MB/s[0m eta [36m0:00:00[0m
Installing collected packages: pyarrow, isodate, rdflib, SPARQLWrapper
Successfully installed SPARQLWrapper-2.0.0 isodate-0.6.1 pyarrow-16.1.0 rdflib-7.0.0


In [11]:
import pandas as pd #data manipulation and analysis library that provides data structures like DataFrames to work with structured data
import pyarrow #enables reading and writing of Parquet files
from SPARQLWrapper import SPARQLWrapper, JSON #execute SPARQL queries
import rdflib #querying of RDF data, including JSON-LD
import json #library for parsing, generating, and manipulating JSON
import subprocess # run shell commmands
import os #interact with the operating system
import re  # Import the regular expression module
from IPython.display import display

In [14]:
silva_taxa = '/' #curated files of most common marine microbial taxa 

In [15]:
taxa_values = ' '.join([f'"{taxon}"' for taxon in silva_taxa])

In [16]:
# endpoint where the data is stored, such as the ODIS graph:
endpoint = "http://graph.oceaninfohub.org/blazegraph/namespace/oih/sparql"

In [None]:
#Template of the SPARQL query
#Dwc = Darwin Code
#schema.org dataset
#""" allowing to write multi-line query
#LCASE : performs case-insensitive comparisons and REGEX for complex pattern matching such as plural/singular forms and other text patterns 
#If taxonomy is available, then it will be included in the result, if not, then the query still returns a results without those fields
#The marine or microbial entity must be matched for the "marine microorganism" to be valid

sparql_microbe_query = """ 
SELECT ?dataset ?associatedTaxa ?class ?family ?genericName ?genus ?higherClassification ?kingdom ?order ?phylum ?scientificName ?superfamily ?taxonAttributes ?verbatimIdentification ?associatedSequences ?acceptedNameUsageID ?acceptedNameUsage ?GeologicalContext ?Occurrence ?habitat ?occurrenceRemarks ?occurrenceDetails ?MaterialEntity ?MaterialSample

WHERE {{
  VALUES ?taxa {{ {taxa_values} }}
  ?dataset a schema:Dataset ;
           dwc:scientificName ?scientificName ;
           OPTIONAL {{ ?dataset dwc:class ?class }} ;
           OPTIONAL {{ ?dataset dwc:family ?family }} ;
           OPTIONAL {{ ?dataset dwc:genericName ?genericName }} ;
           OPTIONAL {{ ?dataset dwc:genus ?genus }} ;
           OPTIONAL {{ ?dataset dwc:higherClassification ?higherClassification }} ;
           OPTIONAL {{ ?dataset dwc:kingdom ?kingdom }} ;
           OPTIONAL {{ ?dataset dwc:order ?order }} ;
           OPTIONAL {{ ?dataset dwc:phylum ?phylum }} ;
           OPTIONAL {{ ?dataset dwc:superfamily ?superfamily }} ;
           OPTIONAL {{ ?dataset dwc:taxonAttributes ?taxonAttributes }} ;
           OPTIONAL {{ ?dataset dwc:verbatimIdentification ?verbatimIdentification }} ;
           OPTIONAL {{ ?dataset dwc:associatedSequences ?associatedSequences }} ;
           OPTIONAL {{ ?dataset dwc:acceptedNameUsageID ?acceptedNameUsageID }} ;
           OPTIONAL {{ ?dataset dwc:acceptedNameUsage ?acceptedNameUsage }} ;
           OPTIONAL {{ ?dataset dwc:GeologicalContext ?GeologicalContext }} ;
           OPTIONAL {{ ?dataset dwc:Occurrence ?Occurrence }} ;
           OPTIONAL {{ ?dataset dwc:habitat ?habitat }} ;
           OPTIONAL {{ ?dataset dwc:occurrenceRemarks ?occurrenceRemarks }} ;
           OPTIONAL {{ ?dataset dwc:occurrenceDetails ?occurrenceDetails }} ;
           OPTIONAL {{ ?dataset dwc:MaterialEntity ?MaterialEntity }} ;
           OPTIONAL {{ ?dataset dwc:MaterialSample ?MaterialSample }} .
  FILTER (
    LCASE(?scientificName) IN ({taxa_values}) &&
    LCASE(?habitat) IN ("marine", "pelagic", "oceanic", "maritime", "coastal", "seafaring", "littoral", "benthic", "abyssal", "planktonic", "bathyal", "epipelagic", "mesopelagic", "upwelling", "downwelling", "saltwater", "gyre", "MPA") &&
    REGEX(LCASE(?MaterialEntity), "microbe|bacteria|bacterium|bacillus|microflora|microbial|prokaryote|protist|archaea|microorganism")
  )
}}
"""


In [None]:
# Set up the SPARQLWrapper
sparql = SPARQLWrapper(sparql_endpoint)
sparql.setQuery(sparql_microbe_query)
sparql.setReturnFormat(JSON)

In [None]:
#sparql.query() sends the query to the SPARQL endpoint and retrieves results
#convert() processes raw results and converts them to a JSON object, a list of bindings
sparql_microbe_query_results = sparql.query().convert()

In [1]:
#use spaCy which is an open-source software library for advanced natural language processing
!pip install spacy
!python -m spacy download en_core_web_sm

Collecting spacy
  Downloading spacy-3.7.5-cp39-cp39-macosx_10_9_x86_64.whl (6.9 MB)
[2K     [90m━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━[0m [32m6.9/6.9 MB[0m [31m21.5 MB/s[0m eta [36m0:00:00[0m00:01[0m00:01[0m
Collecting weasel<0.5.0,>=0.1.0
  Downloading weasel-0.4.1-py3-none-any.whl (50 kB)
[2K     [90m━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━[0m [32m50.3/50.3 kB[0m [31m5.1 MB/s[0m eta [36m0:00:00[0m
[?25hCollecting wasabi<1.2.0,>=0.9.1
  Downloading wasabi-1.1.3-py3-none-any.whl (27 kB)
Collecting srsly<3.0.0,>=2.4.3
  Downloading srsly-2.4.8-cp39-cp39-macosx_10_9_x86_64.whl (493 kB)
[2K     [90m━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━[0m [32m493.1/493.1 kB[0m [31m21.2 MB/s[0m eta [36m0:00:00[0m
[?25hCollecting preshed<3.1.0,>=3.0.2
  Downloading preshed-3.0.9-cp39-cp39-macosx_10_9_x86_64.whl (133 kB)
[2K     [90m━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━[0m [32m133.5/133.5 kB[0m [31m17.3 MB/s[0m eta [36m0:00:00[0m
[?25hCollecting catalogue<2.1.0,>

In [14]:
import spacy

# Load the english version of the spaCy model which includes the NER pipeline
nlp = spacy.load("en_core_web_sm") #Natural language processing (NLP)

In [42]:
#model sample data from my favorite journal article 
#Cavicchioli, R., Ripple, W.J., Timmis, K.N. et al. Scientists’ warning to humanity: microorganisms and climate change. 
#Nat Rev Microbiol 17, 569–586 (2019). https://doi.org/10.1038/s41579-019-0222-5
sample_text = """
Marine biomes cover ~70% of Earth’s surface and range from coastal estuaries, mangroves and coral reefs to the open oceans (Fig. 1). Phototrophic microorganisms use the sun’s energy in the top 200 m of the water column, whereas marine life in deeper zones uses organic and inorganic chemicals for energy10. In addition to sunlight, the availability of other energy forms and water temperature (ranging from approximately −2 °C in ice-covered seas to more than 100 °C in hydrothermal vents) influence the composition of marine communities11. Rising temperatures not only affect biological processes but also reduce water density and thereby stratification and circulation, which affect organismal dispersal and nutrient transport. Precipitation, salinity and winds also affect stratification, mixing and circulation. Nutrient inputs from air, river and estuarine flows also affect microbial community composition and function, and climate change affects all these physical factors.
Fig. 1: Microorganisms and climate change in marine and terrestrial biomes.Marine environments, microbial primary production contributes substantially to CO2sequestration. Marine microorganisms also recycle nutrients for use in the marine food web and in the process release CO2 to the atmosphere. In a broad range of terrestrial environments, microorganisms are the key decomposers of organic matter and release nutrients in the soil for plant growth as well as CO2 and CH4 into the atmosphere. Microbial biomass and other organic matter (remnants of plants and animals) are converted to fossil fuels over millions of years. By contrast, burning of fossil fuels liberates greenhouse gases in a small fraction of that time. As a result, the carbon cycle is extremely out of balance, and atmospheric CO2 levels will continue to rise as long as fossil fuels continue to be burnt. The many effects of human activities, including agriculture, industry, transport, population growth and human consumption, combined with local environmental factors, including soil type and light, greatly influence the complex network of microbial interactions that occur with other microorganisms, plants and animals. These interactions dictate how microorganisms respond to and affect climate change (for example, through greenhouse gas emissions) and how climate change (for example, higher CO2 levels, warming, and precipitation changes) in turn affect microbial responses. OMZ, oxygen minimum zone.
The overall relevance of microorganisms to ocean ecosystems can be appreciated from their number and biomass in the water column and subsurface: the total number of cells is more than 1029 (refs8,12,13,14,15,16) and the Census of Marine Life estimates that 90% of marine biomass is microbial. Beyond their sheer numbers, marine microorganisms fulfil key ecosystem functions. By fixing carbon and nitrogen, and remineralizing organic matter, marine microorganisms form the basis of ocean food webs and thus global carbon and nutrient cycles13. The sinking, deposition and burial of fixed carbon in particulate organic matter to marine sediments is a key, long-term mechanism for sequestering CO2 from the atmosphere. Therefore, the balance between regeneration of CO2 and nutrients via remineralization versus burial in the seabed determines the effect on climate change.
In addition to getting warmer (from increased atmospheric CO2 concentrations enhancing the greenhouse effect), oceans have acidified by ~0.1 pH units since preindustrial times, with further reductions of 0.3–0.4 units predicted by the end of the century17,18,19. Given the unprecedented rate of pH change19,20,21, there is a need to rapidly learn how marine life will respond22. The impact of elevated greenhouse gas concentrations on ocean temperature, acidification, stratification, mixing, thermohaline circulation, nutrient supply, irradiation and extreme weather events affects the marine microbiota in ways that have substantial environmental consequences, including major shifts in productivity, marine food webs, carbon export and burial in the seabed19,23,24,25,26,27,28,29.
Microorganisms affect climate change
Marine phytoplankton perform half of the global photosynthetic CO2 fixation (net global primary production of ~50 Pg C per year) and half of the oxygen production despite amounting to only ~1% of global plant biomass30. In comparison with terrestrial plants, marine phytoplankton are distributed over a larger surface area, are exposed to less seasonal variation and have markedly faster turnover rates than trees (days versus decades)30. Therefore, phytoplankton respond rapidly on a global scale to climate variations. These characteristics are important when one is evaluating the contributions of phytoplankton to carbon fixation and forecasting how this production may change in response to perturbations. Predicting the effects of climate change on primary productivity is complicated by phytoplankton bloom cycles that are affected by both bottom-up control (for example, availability of essential nutrients and vertical mixing) and top-down control (for example, grazing and viruses)27,30,31,32,33,34. Increases in solar radiation, temperature and freshwater inputs to surface waters strengthen ocean stratification and consequently reduce transport of nutrients from deep water to surface waters, which reduces primary productivity30,34,35. Conversely, rising CO2 levels can increase phytoplankton primary production, but only when nutrients are not limiting36,37,38.
Some studies indicate that overall global oceanic phytoplankton density has decreased in the past century39, but these conclusions have been questioned because of the limited availability of long-term phytoplankton data, methodological differences in data generation and the large annual and decadal variability in phytoplankton production40,41,42,43. Moreover, other studies suggest a global increase in oceanic phytoplankton production44 and changes in specific regions or specific phytoplankton groups45,46. The global sea ice (Sea Ice Index) is declining, leading to higher light penetration and potentially more primary production47; however, there are conflicting predictions for the effects of variable mixing patterns and changes in nutrient supply and for productivity trends in polar zones34. This highlights the need to collect long-term data on phytoplankton production and microbial community composition. Long-term data are needed to reliably predict how microbial functions and feedback mechanisms will respond to climate change, yet only very few such datasets exist (for example, the Hawaii Ocean Time-series and the Bermuda Atlantic Time-series Study)48,49,50. In this context, the Global Ocean Sampling Expedition51, transects of the Southern Ocean52,53, and the Tara Oceans Consortium11,54,55,56,57,58,59 provide metagenome data that are a valuable baseline of marine microorganisms.
Diatoms perform 25–45% of total primary production in the oceans60,61,62, owing to their prevalence in open-ocean regions when total phytoplankton biomass is maximal63. Diatoms have relatively high sinking speeds compared with other phytoplankton groups, and they account for ~40% of particulate carbon export to depth62,64. Physically driven seasonal enrichments in surface nutrients favour diatom blooms. Anthropogenic climate change will directly affect these seasonal cycles, changing the timing of blooms and diminishing their biomass, which will reduce primary production and CO2 uptake65. Remote sensing data suggest a global decline of diatoms between 1998 and 2012, particularly in the North Pacific, which is associated with shallowing of the surface mixed layer and lower nutrient concentrations46.
In addition to the contribution of marine phytoplankton to CO2 sequestration30,66,67,68, chemolithoautotrophic archaea and bacteria fix CO2 under dark conditions in deep ocean waters69 and at the surface during polar winter70. Marine bacteria and archaea also contribute substantially to surface ocean respiration and cycling of many elements18. Seafloor methanogens and methanotrophs are important producers and consumers of CH4, but their influence on the atmospheric flux of this greenhouse gas is uncertain71. Marine viruses, bacteriovorous bacteria and eukaryotic grazers are also important components of microbial food webs; for example, marine viruses influence how effectively carbon is sequestered and deposited into the deep ocean57. Climate change affects predator–prey interactions, including virus–host interactions, and thereby global biogeochemical cycles72.
Oxygen minimum zones (OMZs) have expanded in the past 50 years as a result of ocean warming, which reduces oxygen solubility73,74,75. OMZs are global sinks for reactive nitrogen, and microbial production of N2 and N2O accounts for ~25–50% of nitrogen loss from the ocean to the atmosphere. Furthermore, OMZs are the largest pelagic methane reservoirs in the ocean and contribute substantially to open ocean methane cycling. The observed and predicted future expansion of OMZs may therefore considerably affect ocean nutrient and greenhouse gas budgets, and the distributions of oxygen-dependent organisms73,74,75.
The top 50 cm of deep-sea sediments contains ~1 × 1029 microorganisms8,16, and the total abundances of archaea and bacteria in these sediments increase with latitude (from 34° N to 79° N) with specific taxa (such as Marine Group I Thaumarchaeota) contributing disproportionately to the increase76. Benthic microorganisms show biogeographic patterns and respond to variations in the quantity and quality of the particulate matter sinking to the seafloor77. As a result, climate change is expected to particularly affect the functional processes that deep-sea benthic archaea perform (such as ammonia oxidation) and associated biogeochemical cycles76.
Aerosols affect cloud formation, thereby influencing sunlight irradiation and precipitation, but the extent to which and the manner in which they influence climate remains uncertain78. Marine aerosols consist of a complex mixture of sea salt, non-sea-salt sulfate and organic molecules and can function as nuclei for cloud condensation, influencing the radiation balance and, hence, climate79,80. For example, biogenic aerosols in remote marine environments (for example, the Southern Ocean) can increase the number and size of cloud droplets, having similar effects on climate as aerosols in highly polluted regions80,81,82,83. Specifically, phytoplankton emit dimethylsulfide, and its derivate sulfate promotes cloud condensation79,84. Understanding the ways in which marine phytoplankton contribute to aerosols will allow better predictions of how changing ocean conditions will affect clouds and feed back on climate84. In addition, the atmosphere itself contains ~1022 microbial cells, and determining the ability of atmospheric microorganisms to grow and form aggregates will be valuable for assessing their influence on climate8.
Vegetated coastal habitats are important for carbon sequestration, determined by the full trophic spectrum from predators to herbivores, to plants and their associated microbial communities85. Human activity, including anthropogenic climate change, has reduced these habitats over the past 50 years by 25–50%, and the abundance of marine predators has dropped by up to 90%85,86,87. Given such extensive perturbation, the effects on microbial communities need to be evaluated because microbial activity determines how much carbon is remineralized and released as CO2 and CH4.
Climate change affects microorganisms
Climate change perturbs interactions between species and forces species to adapt, migrate and be replaced by others or go extinct28,88. Ocean warming, acidification, eutrophication and overuse (for example, fishing, tourism) together cause the decline of coral reefs and may cause ecosystems shifts towards macroalgae89,90,91,92,93 and benthic cyanobacterial mats94,95. The capacity for corals to adapt to climate change is strongly influenced by the responses of their associated microorganisms, including microalgal symbionts and bacteria96,97,98. The hundreds to thousands of microbial species that live on corals are crucial for host health, for example by recycling the waste products, by provisioning essential nutrients and vitamins and by assisting the immune system to fight pathogens99. However, environmental perturbation or coral bleaching can change the coral microbiome rapidly. Such shifts undoubtedly influence the ecological functions and stability of the coral–microorganism system, potentially affecting the capacity and pace at which corals adapt to climate change, and the relationships between corals and other components of the reef ecosystem99,100.
Generally, microorganisms can disperse more easily than macroscopic organisms. Nevertheless, biogeographic distinctions occur for many microbial species, with dispersal, lifestyle (for example, host association) and environmental factors strongly influencing community composition and function54,101,102,103. Ocean currents and thermal and latitudinal gradients are particularly important for marine communities104,105. If movement to more favourable environments is impossible, evolutionary change may be the only survival mechanism88. Microorganisms, such as bacteria, archaea and microalgae, with large population sizes and rapid asexual generation times have high adaptive potential22. Relatively few studies have examined evolutionary adaptation to ocean acidification or other climate change-relevant environmental variables22,28. Similarly, there is limited understanding of the molecular mechanisms of physiological responses and the implications of those responses for biogeochemical cycles18.
However, several studies have demonstrated effects of elevated CO2 levels on individual phytoplankton species, which may disrupt broader ecosystem-level processes. A field experiment demonstrated that increasing CO2 levels provide a selective advantage to a toxic microalga, Vicicitus globosus, leading to disruption of organic matter transfer across trophic levels106. The marine cyanobacterial genus Trichodesmium responds to long-term (4.5-year) exposure to elevated CO2 levels with irreversible genetic changes that increase nitrogen fixation and growth107. For the photosynthetic green alga Ostreococcus tauri, elevated CO2levels increase growth, cell size and carbon-to-nitrogen ratios108. Higher CO2 levels also affect the population structure of O. tauri, with changes in ecotypes and niche occupation, thereby affecting the broader food webs and biogeochemical cycles108. Rather than producing larger cells, the calcifying phytoplankton species Emiliania huxleyi responds to the combined effects of elevated temperature and elevated CO2 levels (and associated acidification) by producing smaller cells that contain less carbon109. However, for this species, overall production rates do not change as a result of evolutionary adaptation to higher CO2 levels109. Responses to CO2 levels differ between communities (for example, between Arctic phytoplankton and Antarctic phytoplankton110). A mesocosm study identified variable changes in the diversity of viruses that infect E. huxleyi when it is growing under elevated CO2levels, and noted the need to determine whether elevated CO2 levels directly affected viruses, hosts or the interactions between them111. These examples illustrate the need to improve our understanding of evolutionary processes and incorporate that knowledge into predictions of the effects of climate change.
Ocean acidification presents marine microorganisms with pH conditions well outside their recent historical range, which affects their intracellular pH homeostasis18,112. Species that are less adept at regulating internal pH will be more affected, and factors such as organism size, aggregation state, metabolic activity and growth rate influence the capacity for regulation112.
Lower pH causes bacteria and archaea to change gene expression in ways that support cell maintenance rather than growth18. In mesocosms with low phytoplankton biomass, bacteria committed more resources to pH homeostasis than bacteria in nutrient-enriched mesocosms with high phytoplankton biomass. Consequently, ocean acidification is predicted to alter the microbial food web via changes in cellular growth efficiency, carbon cycling and energy fluxes, with the biggest effects expected in the oligotrophic regions, which include most of the ocean18. Experimental comparisons of Synechococcus sp. growth under both present and predicted future pH concentrations showed effects not only on the cyanobacteria but also on the cyanophage viruses that infect them113.
Environmental temperature and latitude correlate with the diversity, distribution and/or temperature optimum (Topt) of certain marine taxa, with models predicting that rising temperatures will cause a poleward shift of cold-adapted communities52,114,115,116,117,118. However, Topt of phytoplankton from polar and temperate waters was found to be substantially higher than environmental temperatures, and an eco-evolutionary model predicted that Topt for tropical phytoplankton would be substantially higher than observed experimental values116. Understanding how well microorganisms are adapted to environmental temperature and predicting how they will respond to warming requires assessments of more than Topt, which is generally a poor indicator of physiological and ecological adaptation of microorganisms from cold environments119.
Many environmental and physiological factors influence the responses and overall competitiveness of microorganisms in their native environment. For example, elevated temperatures increase protein synthesis in eukaryotic phytoplankton while reducing cellular ribosome concentration120. As the biomass of eukaryotic phytoplankton is ~1 Gt C (ref.13) and ribosomes are phosphate rich, climate change-driven alteration of their nitrogen-to-phosphate ratio will affect resource allocation in the global ocean120. Ocean warming is thought to favour smaller plankton types over larger ones, changing biogeochemical fluxes such as particle export121. Increased ocean temperatures, acidification and decreased nutrient supplies are projected to increase the extracellular release of dissolved organic matter from phytoplankton, with changes in the microbial loop possibly causing increased microbial production at the expense of higher trophic levels122. Warming can also alleviate iron limitation of nitrogen-fixing cyanobacteria, with potentially profound implications for new nitrogen supplied to food webs of the future warming oceans123. Careful attention needs to be paid to how to quantify and interpret responses of environmental microorganisms to ecosystem changes and stresses linked to climate change124,125. Key questions thus remain about the functional consequences of community shifts, such as changes in carbon remineralization versus carbon sequestration, and nutrient cycling.

"""

In [43]:
# Initial seed list of microbial-related terms
seed_terms = ["microbe", "bacteria", "bacterium", "bacillus", "microflora", "microbial", "prokaryote", "protist", "archaea", "microorganism"]

In [44]:
# Function to find similar terms using word vectors
def find_similar_terms(seed_terms, nlp, topn=10):
    similar_terms = set(seed_terms)
    for term in seed_terms:
        token = nlp(term)[0]  # Get the first token in the Doc
        if token.has_vector:
            most_similar = token.vocab.vectors.most_similar(token.vector.reshape(1, token.vector.shape[0]), n=topn)[0]
            similar_terms.update([nlp.vocab.strings[w] for w in most_similar if isinstance(w, int)])
    return similar_terms


In [45]:
expanded_terms = find_similar_terms(seed_terms, nlp)

In [46]:
doc = nlp(text_corpus)

In [47]:
# Extract entities and terms
terms_freq = defaultdict(int)
for token in doc:
    if any(term in token.text.lower() for term in expanded_terms):
        terms_freq[token.text.lower()] += 1


In [48]:
# Remove duplicates and sort the list
all_terms = sorted(terms_freq.keys())

In [49]:
# Display the generated index
print("Generated Index of Microbial-Related Terms:")
for term in all_terms:
    print(term)

Generated Index of Microbial-Related Terms:
archaea
bacillus
bacteria
bacterium
microbial
microorganisms
protists
