In [17]:
import pandas as pd
import text2term 

#### Read in Microbes in Periodontitis and Health Data:

In [19]:
micro_list = pd.read_excel(
    'Feres_PeriodontalMicrobiome/data/Cleaned Micro List RY_final.xlsx',
    sheet_name='New DATABASE', 
    header=[0, 1])

In [22]:
print(micro_list.columns)

MultiIndex([(                        'Unnamed: 0_level_0',    'Number'),
            (                        'Unnamed: 1_level_0', 'Reference'),
            (       'Species elevated in health (p<0.05)',    'Domain'),
            (       'Species elevated in health (p<0.05)',   'Kingdom'),
            (       'Species elevated in health (p<0.05)',    'Phylum'),
            (       'Species elevated in health (p<0.05)',     'Class'),
            (       'Species elevated in health (p<0.05)',     'Order'),
            (       'Species elevated in health (p<0.05)',    'Family'),
            (       'Species elevated in health (p<0.05)',     'Genus'),
            (       'Species elevated in health (p<0.05)',   'Species'),
            (       'Species elevated in health (p<0.05)', 'Species.1'),
            ('Species elevated in periodontitis (p<0.05)',    'Domain'),
            ('Species elevated in periodontitis (p<0.05)',   'Kingdom'),
            ('Species elevated in periodontitis (p<

In [23]:
micro_perio = micro_list['Species elevated in periodontitis (p<0.05)']

In [24]:
micro_perio.head()

Unnamed: 0,Domain,Kingdom,Phylum,Class,Order,Family,Genus,Species
0,Bacteria,Bacteria,Bacteroidetes,Bacteroidia,Bacteroidales,Prevotellaceae,Alloprevotella,tannerae
1,Bacteria,Bacteria,Bacteroidetes,Bacteroidia,Bacteroidales,Bacteroidaceae,Bacteroidales,sp.
2,Bacteria,Bacteria,Proteobacteria,Epsilonproteobacteria,Campylobacterales,Campylobacteraceae,Campylobacter,gracilis
3,Bacteria,Bacteria,Bacteroidetes,Flavobacteriia,Flavobacteriales,Flavobacteriaceae,Capnocytophaga,leadbetteri
4,Bacteria,Bacteria,Proteobacteria,Epsilonproteobacteria,Campylobacterales,Campylobacteraceae,Campylobacter,rectus


In [44]:
micro_domain = micro_perio['Domain'].dropna().unique().tolist()

In [58]:
micro_domain

['Bacteria']

#### Cache the NCBI Taxon:

In [57]:
ncbi_taxon = text2term.cache_ontology(ontology_url="http://purl.obolibrary.org/obo/ncbitaxon.owl", 
                                 ontology_acronym="NCBITaxon")

2025-09-03 11:38:58 INFO [text2term.term_collector]: Loading ontology http://purl.obolibrary.org/obo/ncbitaxon.owl...
2025-09-03 11:43:12 INFO [text2term.term_collector]: ...done (ontology loading time: 163.14s)
2025-09-03 11:43:12 INFO [text2term.term_collector]: Collecting ontology term details...
2025-09-03 11:45:41 INFO [text2term.term_collector]: ...done: collected 2650614 ontology terms (collection time: 149.25s)
2025-09-03 11:46:42 INFO [text2term.t2t]: Filtered ontology terms to those of type: OntologyTermType.ANY
2025-09-03 11:46:42 INFO [text2term.t2t]: Caching ontology http://purl.obolibrary.org/obo/ncbitaxon.owl to: .cache/NCBITaxon


#### Use it to map domain terms:

In [59]:
ncbi_taxon.map_terms(source_terms=micro_domain)

2025-09-03 11:52:21 INFO [text2term.t2t]: Loading cached ontology from: .cache/NCBITaxon/NCBITaxon-term-details.pickle
2025-09-03 11:52:31 INFO [text2term.t2t]: Filtered ontology terms to those of type: OntologyTermType.CLASS
2025-09-03 11:52:31 INFO [text2term.t2t]: Mapping 1 source terms to NCBITaxon
2025-09-03 11:53:09 INFO [text2term.t2t]: ...done (mapping time: 37.79s seconds)


Unnamed: 0,Source Term ID,Source Term,Mapped Term Label,Mapped Term CURIE,Mapped Term IRI,Mapping Score,Tags
0,https://text2term.utils/RGjSYbiiqPA,Bacteria,Bacteria,NCBITAXON:2,http://purl.obolibrary.org/obo/NCBITaxon_2,0.994,
1,https://text2term.utils/RGjSYbiiqPA,Bacteria,Candidatus Peribacteria bacterium,NCBITAXON:2053688,http://purl.obolibrary.org/obo/NCBITaxon_2053688,0.77,
2,https://text2term.utils/RGjSYbiiqPA,Bacteria,Candidatus Poribacteria bacterium,NCBITAXON:2026781,http://purl.obolibrary.org/obo/NCBITaxon_2026781,0.741,


#### Use it to map genus terms:

In [60]:
micro_genus = micro_perio['Genus'].dropna().unique()

In [61]:
micro_genus

array(['Alloprevotella', 'Bacteroidales', 'Campylobacter',
       'Capnocytophaga', 'Desulfobulbus', 'Dialister', 'Enterococcus',
       'Filifactor', 'Fretibacterium', 'Fusobacterium', 'Lachnospiracee',
       'Leptotrichia', 'Mycoplasma', 'Parvimonas', 'Peptoniphilaceae',
       'Porphyromonas', 'Prevotella', 'Selenomonas', 'Tannerella',
       'Tessnema', 'Treponema', 'Aggregatibacter', 'Lachnospiraceae G8',
       'Mogibacterium', 'Peptostreptococcaceae XI G4',
       'Peptostreptococcaceae XI G5', 'Peptostreptococcaceae XI G6',
       'Bacteroidetes G-3', 'Lachnospiraceae G-8',
       'Peptostreptococcaceae XIG-1', 'Peptostreptococcaceae XIG-4',
       'Peptostreptococcaceae XIG-5', 'Peptostreptococcus',
       'Saccharibacteria TM7 G-1', 'Saccharibacteria TM7 G-5',
       'Veillonellaceae G-1', 'Bacteroidaceae [G-1]', 'Bacteroides',
       'Actinomyces', 'Bacteroidetes [G-3]', 'Bacteroidetes [G-5]',
       'Eubacterium', 'Lachnospiraceae [G-8]', 'Peptococcus',
       'Peptostrept

In [62]:
ncbi_taxon.map_terms(source_terms=micro_genus)

2025-09-03 12:08:12 INFO [text2term.t2t]: Loading cached ontology from: .cache/NCBITaxon/NCBITaxon-term-details.pickle
2025-09-03 12:08:20 INFO [text2term.t2t]: Filtered ontology terms to those of type: OntologyTermType.CLASS
2025-09-03 12:08:20 INFO [text2term.t2t]: Mapping 74 source terms to NCBITaxon
2025-09-03 12:08:59 INFO [text2term.t2t]: ...done (mapping time: 38.65s seconds)


Unnamed: 0,Source Term ID,Source Term,Mapped Term Label,Mapped Term CURIE,Mapped Term IRI,Mapping Score,Tags
0,https://text2term.utils/RGTetkHL5V6,Alloprevotella,Alloprevotella,NCBITAXON:1283313,http://purl.obolibrary.org/obo/NCBITaxon_1283313,0.982,
1,https://text2term.utils/RGTetkHL5V6,Alloprevotella,Alloprevotella sp.,NCBITAXON:1872471,http://purl.obolibrary.org/obo/NCBITaxon_1872471,0.973,
2,https://text2term.utils/RGTetkHL5V6,Alloprevotella,Alloprevotella rava,NCBITAXON:671218,http://purl.obolibrary.org/obo/NCBITaxon_671218,0.825,
3,https://text2term.utils/RmtdD8ijUgq,Bacteroidales,Bacteroidales,NCBITAXON:171549,http://purl.obolibrary.org/obo/NCBITaxon_171549,0.974,
4,https://text2term.utils/RmtdD8ijUgq,Bacteroidales,Bacteroidales bacterium,NCBITAXON:2030927,http://purl.obolibrary.org/obo/NCBITaxon_2030927,0.874,
...,...,...,...,...,...,...,...
217,https://text2term.utils/REFF4sUTPuB,Peptoniphilaceae [G-1],Peptoniphilaceae bacterium,NCBITAXON:1891242,http://purl.obolibrary.org/obo/NCBITaxon_1891242,0.839,
218,https://text2term.utils/REFF4sUTPuB,Peptoniphilaceae [G-1],unclassified Peptoniphilaceae,NCBITAXON:1689302,http://purl.obolibrary.org/obo/NCBITaxon_1689302,0.748,
219,https://text2term.utils/RJ2WgkGb5in,Cardiobacterium,Cardiobacterium,NCBITAXON:2717,http://purl.obolibrary.org/obo/NCBITaxon_2717,0.992,
220,https://text2term.utils/RJ2WgkGb5in,Cardiobacterium,Cardiobacterium sp.,NCBITAXON:2382124,http://purl.obolibrary.org/obo/NCBITaxon_2382124,0.978,
