# BF ratio
> Use Greengenes taxonomy to calcualate BF ratio for samples

In [1]:
import pandas as pd
import numpy as np
import ete3


In [2]:
# Fastest way is to use subtrees

tree = ete3.Tree(
    "greengenes/data/gg_13_5_otus_99_annotated.tree",
    format=1,
    quoted_node_names=True,
)


In [3]:
tree.search_nodes(name="p__Firmicutes")

def is_firmicutes(node):
    if node.name.startswith("p__Firmicutes"):
        return True
    else:
        return False

def is_bacteroidetes(node):
    if node.name.startswith("p__Bacteroidetes"):
        return True
    else:
        return False
    
f_nodes = list(filter(is_firmicutes, tree.traverse()))
b_nodes = list(filter(is_bacteroidetes, tree.traverse()))

print(len(f_nodes), "Firmicutes nodes")
print(len(b_nodes), "Bacteroidetes nodes")

# Aggregate to leaves
f_leaves = set()
for node in f_nodes:
    f_leaves = f_leaves | set(node.get_leaf_names())

b_leaves = set()
for node in b_nodes:
    b_leaves = b_leaves | set(node.get_leaf_names())

print(len(f_leaves), "Firmicutes leaves")
print(len(b_leaves), "Bacteroidetes leaves")

15 Firmicutes nodes
1 Bacteroidetes nodes
55677 Firmicutes leaves
25811 Bacteroidetes leaves


In [4]:
# Save these as tsv files
with open("greengenes/data/firmicutes.txt", "w") as f:
    f.write("\n".join(f_leaves))

with open("greengenes/data/bacteroidetes.txt", "w") as f:
    f.write("\n".join(b_leaves))

In [5]:
from src.greengenes import calculate_fb_ratio

otu_table = pd.read_csv("ihmp/ibd_data.csv.gz", index_col=0)
otu_table

Unnamed: 0_level_0,patient,visit,sample,1000269,1008348,1009894,1012376,1017181,1017413,1019823,...,964363,968675,968954,971907,975306,976470,979707,988375,988932,999046
site,Unnamed: 1_level_1,Unnamed: 2_level_1,Unnamed: 3_level_1,Unnamed: 4_level_1,Unnamed: 5_level_1,Unnamed: 6_level_1,Unnamed: 7_level_1,Unnamed: 8_level_1,Unnamed: 9_level_1,Unnamed: 10_level_1,Unnamed: 11_level_1,Unnamed: 12_level_1,Unnamed: 13_level_1,Unnamed: 14_level_1,Unnamed: 15_level_1,Unnamed: 16_level_1,Unnamed: 17_level_1,Unnamed: 18_level_1,Unnamed: 19_level_1,Unnamed: 20_level_1,Unnamed: 21_level_1
feces,3001,4,CSM5FZ3N,0.000003,0.000000,0.0,0.0,0.000000,0.000000,0.0,...,0.0,0.000022,0.000041,0.000000,0.000451,0.000691,0.000000,0.000000,0.000011,0.000000
feces,3002,5,CSM5FZ3X,0.000003,0.000006,0.0,0.0,0.000012,0.000000,0.0,...,0.0,0.000062,0.000042,0.000021,0.000665,0.000009,0.000000,0.000128,0.000006,0.000003
feces,3002,6,CSM5FZ3Z,0.000000,0.000000,0.0,0.0,0.000000,0.000012,0.0,...,0.0,0.000061,0.000004,0.000000,0.000210,0.000000,0.000000,0.000000,0.000020,0.000000
feces,3002,8,CSM5FZ44,0.000000,0.000000,0.0,0.0,0.000000,0.000000,0.0,...,0.0,0.000000,0.000000,0.000000,0.000000,0.000000,0.000000,0.000000,0.000000,0.000000
feces,3002,9,CSM5FZ46,0.000000,0.000000,0.0,0.0,0.000000,0.000005,0.0,...,0.0,0.000020,0.000010,0.000005,0.000243,0.000000,0.000000,0.000000,0.000010,0.000000
...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...
feces,2021,11,MSM5LLIO,0.000000,0.000000,0.0,0.0,0.000000,0.000000,0.0,...,0.0,0.000031,0.000014,0.000010,0.001298,0.000010,0.000000,0.001417,0.000005,0.000000
feces,2026,4,MSM5LLIQ,0.000162,0.000000,0.0,0.0,0.000019,0.000011,0.0,...,0.0,0.002586,0.000686,0.001035,0.000233,0.000025,0.000000,0.000000,0.000206,0.000008
feces,2027,4,MSM5LLIS,0.000000,0.000000,0.0,0.0,0.000000,0.000000,0.0,...,0.0,0.000000,0.000000,0.000000,0.000000,0.000000,0.000000,0.000000,0.000000,0.000000
feces,2014,9,MSM5ZOJY,0.000031,0.000000,0.0,0.0,0.000017,0.000021,0.0,...,0.0,0.003953,0.000159,0.002219,0.005125,0.000291,0.000000,0.000000,0.000025,0.000002


In [8]:
ratios = []

for i, sample in otu_table.iterrows():
    fb = calculate_fb_ratio(sample)
    ratios.append(fb)
    print(i, fb)

pd.Series(ratios, index=otu_table.index).to_csv("ihmp/ibd_fb.csv")

feces 0.18827867019246733
feces 0.1332801771871502
feces 0.11790559908274272
feces 0.10526315789473598
feces 0.05765593141228584
feces 0.0
feces 0.5
feces 1.1642056955236078
feces 5.160771704180081
feces 0.11893921438995349
feces 0.23711623812492463
feces 0.0
feces 1.5898308850954992
feces 0.1975068172964518
feces 0.04664776657722845
feces 0.12783243712869813
feces 0.03435288598125674
feces 0.05263157894736841
feces 0.19999999999999993
feces 78.15508474576612
feces 0.3302627250082159
feces 0.054183008726935974
feces 240.11829652998776
feces 0.03238063770031166
feces 0.5969012279323545
feces 0.3401950162513513
feces 0.0
feces 1.4341048820161515
feces 0.0
feces 1.5326649915388895
feces nan
feces 0.8938959909929102
feces 0.44927209907216364
feces 0.7719298245614027
feces 1.4042678659883696
feces 0.5714285714285715
feces 0.43906810035841953
feces 0.19724731833948125
feces 3.1486349494878683
feces 0.03870056955555011
feces 0.10166288737717015
feces 0.012150894608244986
feces 0.2559969289920