In [14]:
#Bowel Movement Frequency
#Gut Microbiome Analysis
#James Johnson
#Credit to Christian Diener, PhD as noted
#v 12-22-22

# Use this Arivale snapshot path
#frozen_ss_path='/shared-data/snapshots/arivale_snapshot_ISB_2020-03-16_2156'

# Load libraries
import numpy as np
import pandas as pd
import matplotlib.pyplot as plt
import statsmodels.api as sm
import statsmodels.formula.api as smf
import seaborn as sns

# Get the library
from arivale_data_interface import *
import arivale_data_interface as adi

# Set display options
pd.set_option('display.max_rows', 200)
pd.set_option('display.max_columns', 200)
pd.set_option('display.width', 1000)
from IPython.display import display

# Apply these settings so the library knows where the data is
adi.apply_isb_config()

# If you want plots to show up in the notebook you need to run this
%matplotlib inline
plt.rcParams["figure.figsize"] = [10, 10]

In [15]:
#Obtain Bowel Movement Frequency (BMF) data:
bowel = adi.get_snapshot('assessments')
bowel = bowel.sort_values('days_in_program',ascending=True) # Sort df by days first
bowel = bowel.drop(columns = ['vendor','days_in_program','days_since_first_call','days_since_first_draw']) # Drop days and vendor, unneeded from this point
bowel = bowel.drop_duplicates(subset='public_client_id') # Keep only the first timepoint for each public_client_id
bowel = bowel.fillna(0) # Fill in empty BMF data with zero
bowel = bowel.set_index('public_client_id') # Arrange df by index of PCI
bowel = bowel[['assessment:digestion:bowel-movements:enum']]
bowel = bowel.rename(columns={"assessment:digestion:bowel-movements:enum":"bowel"}) # rename column to "bowel"
bowel = bowel[bowel['bowel'] != 0] #remove participants with no bowel data, reduce from 5764 -> 3955 individuals
bowel_list = ['(1) 2 or fewer times per week', '(2) 3-6 times per week', '(3) 1-3 times daily', '(4) 4+ times daily']
bowel_r = [1,2,3,4] #convert bowel responses to bowel movement frequency per week (minimum of each range, 1x, 3x, 7x, 28x)
bowel.bowel = bowel.bowel.replace(to_replace = bowel_list, value = bowel_r)
bowel = bowel.reset_index() # 3955 individuals
bowel

  t = pandas.to_datetime(x.str.replace("^arivale_snapshot.*(\\d{4}-\\d{2}-\\d{2}_\\d{4})$", "\\1"), format="%Y-%m-%d_%H%M")
  colnames = hdr.loc[~hdr.str.contains("^#")].head(1).str.replace("\n$","")
  hdr = hdr.loc[hdr.str.contains("^#")].str.replace("\n$","")
  fields = hdr.str.replace("^# [^:]+: *(.*)$",r"\1")
  fields.index = hdr.str.replace("^# ([^:]+):.*$",r"\1")
  return "\n".join(list(hdr.loc[hdr.index >= i].str.replace("^# ","")))


Unnamed: 0,public_client_id,bowel
0,01092042,3
1,01568974,3
2,01684342,2
3,01601230,3
4,01124029,3
...,...,...
3950,01395286,3
3951,01232614,3
3952,01181682,3
3953,01380110,3


In [16]:
asvs = pd.read_csv("/proj/arivale/microbiome/16S_processed/asvs.csv")
asvs.head()

Unnamed: 0,id,hash,count
0,22001612560016|GFM-1079-001,c2d3fc09212e226b3a2c3398a1af9436,1285
1,22001612560023|GFM-1079-001,c2d3fc09212e226b3a2c3398a1af9436,2431
2,22001612560062|GFM-1079-001,c2d3fc09212e226b3a2c3398a1af9436,4908
3,22001612560065|GFM-1079-001,c2d3fc09212e226b3a2c3398a1af9436,3944
4,22001612560067|GFM-1079-001,c2d3fc09212e226b3a2c3398a1af9436,5990


In [17]:
print("#samples:\t", asvs.id.nunique())
print("#sequences:\t", asvs.hash.nunique())

#samples:	 5232
#sequences:	 89022


In [18]:
samples = pd.read_csv("/proj/arivale/microbiome/16S_processed/metadata.csv")
merged = pd.merge(asvs, samples[["id", "public_client_id", "sex", "age", "days_in_program"]], on="id")

tax = pd.read_csv("/proj/arivale/microbiome/16S_processed/taxonomy.csv")
tax = tax.drop("sequence", axis=1).rename(columns={"id": "hash"})
merged = pd.merge(merged, tax, on="hash")

#Create taxa genus df
taxa = merged[['public_client_id','count','Kingdom','Phylum','Class','Order','Family','Genus'#,'Species'
              ]]

#Add prefixes to taxa names for consistency
taxa = taxa.assign(Name = 'taxa_' + taxa.Kingdom.astype(str) + '.' + \
  taxa.Phylum.astype(str) + '.' + taxa.Class.astype(str) + '.' + \
  taxa.Order.astype(str) + '.' + taxa.Family.astype(str) + '.' + \
  taxa.Genus.astype(str)# + '.' + taxa.Species.astype(str) \
                  )

taxa = taxa.sort_values(by = ['public_client_id'])

#create MultiIndex for pivot table
arrays = [taxa['public_client_id'],taxa['Name']]
index = pd.MultiIndex.from_arrays(arrays, names = ('public_client_id','Name'))

#pivot df into one with taxa as columns and PCI as index with counts as values
name_melt = pd.DataFrame(data = {'count':taxa['count'].to_numpy()}, index = index)

name_pivot = name_melt.pivot_table(index='public_client_id', columns='Name', values='count',aggfunc='sum', fill_value = 0)
name_pivot = name_pivot.dropna(how = 'any')
name_pivot

Name,taxa_Archaea.Euryarchaeota.Methanobacteria.Methanobacteriales.Methanobacteriaceae.Methanobrevibacter,taxa_Archaea.Euryarchaeota.Methanobacteria.Methanobacteriales.Methanobacteriaceae.Methanosphaera,taxa_Archaea.Euryarchaeota.Methanobacteria.Methanobacteriales.Methanobacteriaceae.nan,taxa_Archaea.Euryarchaeota.Thermoplasmata.Methanomassiliicoccales.Methanomassiliicoccaceae.Methanomassiliicoccus,taxa_Archaea.Euryarchaeota.Thermoplasmata.Methanomassiliicoccales.Methanomethylophilaceae.Candidatus_Methanogranum,taxa_Archaea.Euryarchaeota.Thermoplasmata.Methanomassiliicoccales.Methanomethylophilaceae.Candidatus_Methanomethylophilus,taxa_Archaea.Euryarchaeota.Thermoplasmata.Methanomassiliicoccales.Methanomethylophilaceae.nan,taxa_Bacteria.Actinobacteria.Acidimicrobiia.Microtrichales.nan.nan,taxa_Bacteria.Actinobacteria.Actinobacteria.Actinomycetales.Actinomycetaceae.Actinobaculum,taxa_Bacteria.Actinobacteria.Actinobacteria.Actinomycetales.Actinomycetaceae.Actinomyces,taxa_Bacteria.Actinobacteria.Actinobacteria.Actinomycetales.Actinomycetaceae.Actinotignum,taxa_Bacteria.Actinobacteria.Actinobacteria.Actinomycetales.Actinomycetaceae.Arcanobacterium,taxa_Bacteria.Actinobacteria.Actinobacteria.Actinomycetales.Actinomycetaceae.F0332,taxa_Bacteria.Actinobacteria.Actinobacteria.Actinomycetales.Actinomycetaceae.Mobiluncus,taxa_Bacteria.Actinobacteria.Actinobacteria.Actinomycetales.Actinomycetaceae.Trueperella,taxa_Bacteria.Actinobacteria.Actinobacteria.Actinomycetales.Actinomycetaceae.Varibaculum,taxa_Bacteria.Actinobacteria.Actinobacteria.Actinomycetales.Actinomycetaceae.nan,taxa_Bacteria.Actinobacteria.Actinobacteria.Bifidobacteriales.Bifidobacteriaceae.Aeriscardovia,taxa_Bacteria.Actinobacteria.Actinobacteria.Bifidobacteriales.Bifidobacteriaceae.Alloscardovia,taxa_Bacteria.Actinobacteria.Actinobacteria.Bifidobacteriales.Bifidobacteriaceae.Bifidobacterium,taxa_Bacteria.Actinobacteria.Actinobacteria.Bifidobacteriales.Bifidobacteriaceae.Gardnerella,taxa_Bacteria.Actinobacteria.Actinobacteria.Bifidobacteriales.Bifidobacteriaceae.Neoscardovia,taxa_Bacteria.Actinobacteria.Actinobacteria.Bifidobacteriales.Bifidobacteriaceae.Parascardovia,taxa_Bacteria.Actinobacteria.Actinobacteria.Bifidobacteriales.Bifidobacteriaceae.Pseudoscardovia,taxa_Bacteria.Actinobacteria.Actinobacteria.Bifidobacteriales.Bifidobacteriaceae.Scardovia,taxa_Bacteria.Actinobacteria.Actinobacteria.Bifidobacteriales.Bifidobacteriaceae.nan,taxa_Bacteria.Actinobacteria.Actinobacteria.Corynebacteriales.Corynebacteriaceae.Corynebacterium,taxa_Bacteria.Actinobacteria.Actinobacteria.Corynebacteriales.Corynebacteriaceae.Corynebacterium_1,taxa_Bacteria.Actinobacteria.Actinobacteria.Corynebacteriales.Corynebacteriaceae.Lawsonella,taxa_Bacteria.Actinobacteria.Actinobacteria.Corynebacteriales.Mycobacteriaceae.Mycobacterium,taxa_Bacteria.Actinobacteria.Actinobacteria.Corynebacteriales.Nocardiaceae.Rhodococcus,taxa_Bacteria.Actinobacteria.Actinobacteria.Corynebacteriales.nan.nan,taxa_Bacteria.Actinobacteria.Actinobacteria.Frankiales.Geodermatophilaceae.Modestobacter,taxa_Bacteria.Actinobacteria.Actinobacteria.Micrococcales.Beutenbergiaceae.Salana,taxa_Bacteria.Actinobacteria.Actinobacteria.Micrococcales.Bogoriellaceae.Georgenia,taxa_Bacteria.Actinobacteria.Actinobacteria.Micrococcales.Brevibacteriaceae.Brevibacterium,taxa_Bacteria.Actinobacteria.Actinobacteria.Micrococcales.Cellulomonadaceae.Cellulomonas,taxa_Bacteria.Actinobacteria.Actinobacteria.Micrococcales.Dermabacteraceae.Brachybacterium,taxa_Bacteria.Actinobacteria.Actinobacteria.Micrococcales.Dermabacteraceae.Dermabacter,taxa_Bacteria.Actinobacteria.Actinobacteria.Micrococcales.Microbacteriaceae.Amnibacterium,taxa_Bacteria.Actinobacteria.Actinobacteria.Micrococcales.Microbacteriaceae.Clavibacter,taxa_Bacteria.Actinobacteria.Actinobacteria.Micrococcales.Microbacteriaceae.Curtobacterium,taxa_Bacteria.Actinobacteria.Actinobacteria.Micrococcales.Microbacteriaceae.Leucobacter,taxa_Bacteria.Actinobacteria.Actinobacteria.Micrococcales.Microbacteriaceae.Microbacterium,taxa_Bacteria.Actinobacteria.Actinobacteria.Micrococcales.Microbacteriaceae.Pseudoclavibacter,taxa_Bacteria.Actinobacteria.Actinobacteria.Micrococcales.Microbacteriaceae.nan,taxa_Bacteria.Actinobacteria.Actinobacteria.Micrococcales.Micrococcaceae.Arthrobacter,taxa_Bacteria.Actinobacteria.Actinobacteria.Micrococcales.Micrococcaceae.Kocuria,taxa_Bacteria.Actinobacteria.Actinobacteria.Micrococcales.Micrococcaceae.Paenarthrobacter,taxa_Bacteria.Actinobacteria.Actinobacteria.Micrococcales.Micrococcaceae.Pseudarthrobacter,taxa_Bacteria.Actinobacteria.Actinobacteria.Micrococcales.Micrococcaceae.Pseudoglutamicibacter,taxa_Bacteria.Actinobacteria.Actinobacteria.Micrococcales.Micrococcaceae.Renibacterium,taxa_Bacteria.Actinobacteria.Actinobacteria.Micrococcales.Micrococcaceae.Rothia,taxa_Bacteria.Actinobacteria.Actinobacteria.Micrococcales.Promicromonosporaceae.Isoptericola,taxa_Bacteria.Actinobacteria.Actinobacteria.Micrococcales.Sanguibacteraceae.Sanguibacter,taxa_Bacteria.Actinobacteria.Actinobacteria.Micrococcales.nan.nan,taxa_Bacteria.Actinobacteria.Actinobacteria.Propionibacteriales.Nocardioidaceae.Aeromicrobium,taxa_Bacteria.Actinobacteria.Actinobacteria.Propionibacteriales.Propionibacteriaceae.Acidipropionibacterium,taxa_Bacteria.Actinobacteria.Actinobacteria.Propionibacteriales.Propionibacteriaceae.Cutibacterium,taxa_Bacteria.Actinobacteria.Actinobacteria.Propionibacteriales.Propionibacteriaceae.Propionibacterium,taxa_Bacteria.Actinobacteria.Actinobacteria.Propionibacteriales.Propionibacteriaceae.Pseudopropionibacterium,taxa_Bacteria.Actinobacteria.Actinobacteria.Pseudonocardiales.Pseudonocardiaceae.Saccharopolyspora,taxa_Bacteria.Actinobacteria.Actinobacteria.nan.nan.nan,taxa_Bacteria.Actinobacteria.Coriobacteriia.Coriobacteriales.Atopobiaceae.Atopobium,taxa_Bacteria.Actinobacteria.Coriobacteriia.Coriobacteriales.Atopobiaceae.Coriobacteriaceae_UCG-002,taxa_Bacteria.Actinobacteria.Coriobacteriia.Coriobacteriales.Atopobiaceae.Coriobacteriaceae_UCG-003,taxa_Bacteria.Actinobacteria.Coriobacteriia.Coriobacteriales.Atopobiaceae.Libanicoccus,taxa_Bacteria.Actinobacteria.Coriobacteriia.Coriobacteriales.Atopobiaceae.Olsenella,taxa_Bacteria.Actinobacteria.Coriobacteriia.Coriobacteriales.Atopobiaceae.nan,taxa_Bacteria.Actinobacteria.Coriobacteriia.Coriobacteriales.Coriobacteriaceae.Collinsella,taxa_Bacteria.Actinobacteria.Coriobacteriia.Coriobacteriales.Coriobacteriaceae.Enorma,taxa_Bacteria.Actinobacteria.Coriobacteriia.Coriobacteriales.Coriobacteriaceae.nan,taxa_Bacteria.Actinobacteria.Coriobacteriia.Coriobacteriales.Coriobacteriales_Incertae_Sedis.Raoultibacter,taxa_Bacteria.Actinobacteria.Coriobacteriia.Coriobacteriales.Coriobacteriales_Incertae_Sedis.nan,taxa_Bacteria.Actinobacteria.Coriobacteriia.Coriobacteriales.Eggerthellaceae.Adlercreutzia,taxa_Bacteria.Actinobacteria.Coriobacteriia.Coriobacteriales.Eggerthellaceae.CHKCI002,taxa_Bacteria.Actinobacteria.Coriobacteriia.Coriobacteriales.Eggerthellaceae.Cryptobacterium,taxa_Bacteria.Actinobacteria.Coriobacteriia.Coriobacteriales.Eggerthellaceae.DNF00809,taxa_Bacteria.Actinobacteria.Coriobacteriia.Coriobacteriales.Eggerthellaceae.Denitrobacterium,taxa_Bacteria.Actinobacteria.Coriobacteriia.Coriobacteriales.Eggerthellaceae.Eggerthella,taxa_Bacteria.Actinobacteria.Coriobacteriia.Coriobacteriales.Eggerthellaceae.Enterorhabdus,taxa_Bacteria.Actinobacteria.Coriobacteriia.Coriobacteriales.Eggerthellaceae.Gordonibacter,taxa_Bacteria.Actinobacteria.Coriobacteriia.Coriobacteriales.Eggerthellaceae.Paraeggerthella,taxa_Bacteria.Actinobacteria.Coriobacteriia.Coriobacteriales.Eggerthellaceae.Parvibacter,taxa_Bacteria.Actinobacteria.Coriobacteriia.Coriobacteriales.Eggerthellaceae.Senegalimassilia,taxa_Bacteria.Actinobacteria.Coriobacteriia.Coriobacteriales.Eggerthellaceae.Slackia,taxa_Bacteria.Actinobacteria.Coriobacteriia.Coriobacteriales.Eggerthellaceae.nan,taxa_Bacteria.Actinobacteria.Coriobacteriia.Coriobacteriales.nan.nan,taxa_Bacteria.Actinobacteria.Thermoleophilia.Solirubrobacterales.67-14.nan,taxa_Bacteria.Actinobacteria.nan.nan.nan.nan,taxa_Bacteria.Bacteroidetes.Bacteroidia.Bacteroidales.Bacteroidaceae.Bacteroides,taxa_Bacteria.Bacteroidetes.Bacteroidia.Bacteroidales.Bacteroidales_Incertae_Sedis.Phocaeicola,taxa_Bacteria.Bacteroidetes.Bacteroidia.Bacteroidales.Bacteroidales_RF16_group.nan,taxa_Bacteria.Bacteroidetes.Bacteroidia.Bacteroidales.Barnesiellaceae.Barnesiella,taxa_Bacteria.Bacteroidetes.Bacteroidia.Bacteroidales.Barnesiellaceae.Coprobacter,taxa_Bacteria.Bacteroidetes.Bacteroidia.Bacteroidales.Barnesiellaceae.nan,taxa_Bacteria.Bacteroidetes.Bacteroidia.Bacteroidales.Dysgonomonadaceae.Dysgonomonas,taxa_Bacteria.Bacteroidetes.Bacteroidia.Bacteroidales.Dysgonomonadaceae.Proteiniphilum,taxa_Bacteria.Bacteroidetes.Bacteroidia.Bacteroidales.Dysgonomonadaceae.nan,taxa_Bacteria.Bacteroidetes.Bacteroidia.Bacteroidales.Marinifilaceae.Butyricimonas,...,taxa_Bacteria.Proteobacteria.Gammaproteobacteria.Betaproteobacteriales.Burkholderiaceae.Diaphorobacter,taxa_Bacteria.Proteobacteria.Gammaproteobacteria.Betaproteobacteriales.Burkholderiaceae.Herbaspirillum,taxa_Bacteria.Proteobacteria.Gammaproteobacteria.Betaproteobacteriales.Burkholderiaceae.Hydrogenophaga,taxa_Bacteria.Proteobacteria.Gammaproteobacteria.Betaproteobacteriales.Burkholderiaceae.Janthinobacterium,taxa_Bacteria.Proteobacteria.Gammaproteobacteria.Betaproteobacteriales.Burkholderiaceae.Lautropia,taxa_Bacteria.Proteobacteria.Gammaproteobacteria.Betaproteobacteriales.Burkholderiaceae.Massilia,taxa_Bacteria.Proteobacteria.Gammaproteobacteria.Betaproteobacteriales.Burkholderiaceae.Oxalobacter,taxa_Bacteria.Proteobacteria.Gammaproteobacteria.Betaproteobacteriales.Burkholderiaceae.Parasutterella,taxa_Bacteria.Proteobacteria.Gammaproteobacteria.Betaproteobacteriales.Burkholderiaceae.Pelomonas,taxa_Bacteria.Proteobacteria.Gammaproteobacteria.Betaproteobacteriales.Burkholderiaceae.Ralstonia,taxa_Bacteria.Proteobacteria.Gammaproteobacteria.Betaproteobacteriales.Burkholderiaceae.Rhodoferax,taxa_Bacteria.Proteobacteria.Gammaproteobacteria.Betaproteobacteriales.Burkholderiaceae.Sutterella,taxa_Bacteria.Proteobacteria.Gammaproteobacteria.Betaproteobacteriales.Burkholderiaceae.Variovorax,taxa_Bacteria.Proteobacteria.Gammaproteobacteria.Betaproteobacteriales.Burkholderiaceae.nan,taxa_Bacteria.Proteobacteria.Gammaproteobacteria.Betaproteobacteriales.Neisseriaceae.Eikenella,taxa_Bacteria.Proteobacteria.Gammaproteobacteria.Betaproteobacteriales.Neisseriaceae.Kingella,taxa_Bacteria.Proteobacteria.Gammaproteobacteria.Betaproteobacteriales.Neisseriaceae.Neisseria,taxa_Bacteria.Proteobacteria.Gammaproteobacteria.Betaproteobacteriales.Rhodocyclaceae.Methyloversatilis,taxa_Bacteria.Proteobacteria.Gammaproteobacteria.Betaproteobacteriales.nan.nan,taxa_Bacteria.Proteobacteria.Gammaproteobacteria.Cardiobacteriales.Cardiobacteriaceae.Cardiobacterium,taxa_Bacteria.Proteobacteria.Gammaproteobacteria.Enterobacteriales.Enterobacteriaceae.Atlantibacter,taxa_Bacteria.Proteobacteria.Gammaproteobacteria.Enterobacteriales.Enterobacteriaceae.Buttiauxella,taxa_Bacteria.Proteobacteria.Gammaproteobacteria.Enterobacteriales.Enterobacteriaceae.Citrobacter,taxa_Bacteria.Proteobacteria.Gammaproteobacteria.Enterobacteriales.Enterobacteriaceae.Cosenzaea,taxa_Bacteria.Proteobacteria.Gammaproteobacteria.Enterobacteriales.Enterobacteriaceae.Cronobacter,taxa_Bacteria.Proteobacteria.Gammaproteobacteria.Enterobacteriales.Enterobacteriaceae.Dickeya,taxa_Bacteria.Proteobacteria.Gammaproteobacteria.Enterobacteriales.Enterobacteriaceae.Enterobacter,taxa_Bacteria.Proteobacteria.Gammaproteobacteria.Enterobacteriales.Enterobacteriaceae.Erwinia,taxa_Bacteria.Proteobacteria.Gammaproteobacteria.Enterobacteriales.Enterobacteriaceae.Escherichia/Shigella,taxa_Bacteria.Proteobacteria.Gammaproteobacteria.Enterobacteriales.Enterobacteriaceae.Franconibacter,taxa_Bacteria.Proteobacteria.Gammaproteobacteria.Enterobacteriales.Enterobacteriaceae.Hafnia-Obesumbacterium,taxa_Bacteria.Proteobacteria.Gammaproteobacteria.Enterobacteriales.Enterobacteriaceae.Klebsiella,taxa_Bacteria.Proteobacteria.Gammaproteobacteria.Enterobacteriales.Enterobacteriaceae.Kluyvera,taxa_Bacteria.Proteobacteria.Gammaproteobacteria.Enterobacteriales.Enterobacteriaceae.Kosakonia,taxa_Bacteria.Proteobacteria.Gammaproteobacteria.Enterobacteriales.Enterobacteriaceae.Lelliottia,taxa_Bacteria.Proteobacteria.Gammaproteobacteria.Enterobacteriales.Enterobacteriaceae.Mangrovibacter,taxa_Bacteria.Proteobacteria.Gammaproteobacteria.Enterobacteriales.Enterobacteriaceae.Moellerella,taxa_Bacteria.Proteobacteria.Gammaproteobacteria.Enterobacteriales.Enterobacteriaceae.Morganella,taxa_Bacteria.Proteobacteria.Gammaproteobacteria.Enterobacteriales.Enterobacteriaceae.Pantoea,taxa_Bacteria.Proteobacteria.Gammaproteobacteria.Enterobacteriales.Enterobacteriaceae.Plesiomonas,taxa_Bacteria.Proteobacteria.Gammaproteobacteria.Enterobacteriales.Enterobacteriaceae.Pluralibacter,taxa_Bacteria.Proteobacteria.Gammaproteobacteria.Enterobacteriales.Enterobacteriaceae.Proteus,taxa_Bacteria.Proteobacteria.Gammaproteobacteria.Enterobacteriales.Enterobacteriaceae.Providencia,taxa_Bacteria.Proteobacteria.Gammaproteobacteria.Enterobacteriales.Enterobacteriaceae.Pseudocitrobacter,taxa_Bacteria.Proteobacteria.Gammaproteobacteria.Enterobacteriales.Enterobacteriaceae.Rahnella,taxa_Bacteria.Proteobacteria.Gammaproteobacteria.Enterobacteriales.Enterobacteriaceae.Raoultella,taxa_Bacteria.Proteobacteria.Gammaproteobacteria.Enterobacteriales.Enterobacteriaceae.Rosenbergiella,taxa_Bacteria.Proteobacteria.Gammaproteobacteria.Enterobacteriales.Enterobacteriaceae.Salmonella,taxa_Bacteria.Proteobacteria.Gammaproteobacteria.Enterobacteriales.Enterobacteriaceae.Serratia,taxa_Bacteria.Proteobacteria.Gammaproteobacteria.Enterobacteriales.Enterobacteriaceae.Shimwellia,taxa_Bacteria.Proteobacteria.Gammaproteobacteria.Enterobacteriales.Enterobacteriaceae.Yersinia,taxa_Bacteria.Proteobacteria.Gammaproteobacteria.Enterobacteriales.Enterobacteriaceae.nan,taxa_Bacteria.Proteobacteria.Gammaproteobacteria.Gammaproteobacteria_Incertae_Sedis.Unknown_Family.Wenzhouxiangella,taxa_Bacteria.Proteobacteria.Gammaproteobacteria.Pasteurellales.Pasteurellaceae.Actinobacillus,taxa_Bacteria.Proteobacteria.Gammaproteobacteria.Pasteurellales.Pasteurellaceae.Aggregatibacter,taxa_Bacteria.Proteobacteria.Gammaproteobacteria.Pasteurellales.Pasteurellaceae.Cricetibacter,taxa_Bacteria.Proteobacteria.Gammaproteobacteria.Pasteurellales.Pasteurellaceae.Haemophilus,taxa_Bacteria.Proteobacteria.Gammaproteobacteria.Pasteurellales.Pasteurellaceae.Mannheimia,taxa_Bacteria.Proteobacteria.Gammaproteobacteria.Pasteurellales.Pasteurellaceae.nan,taxa_Bacteria.Proteobacteria.Gammaproteobacteria.Pseudomonadales.Moraxellaceae.Acinetobacter,taxa_Bacteria.Proteobacteria.Gammaproteobacteria.Pseudomonadales.Moraxellaceae.Enhydrobacter,taxa_Bacteria.Proteobacteria.Gammaproteobacteria.Pseudomonadales.Moraxellaceae.Moraxella,taxa_Bacteria.Proteobacteria.Gammaproteobacteria.Pseudomonadales.Moraxellaceae.Psychrobacter,taxa_Bacteria.Proteobacteria.Gammaproteobacteria.Pseudomonadales.Pseudomonadaceae.Pseudomonas,taxa_Bacteria.Proteobacteria.Gammaproteobacteria.Xanthomonadales.Rhodanobacteraceae.Rhodanobacter,taxa_Bacteria.Proteobacteria.Gammaproteobacteria.Xanthomonadales.Xanthomonadaceae.Pseudoxanthomonas,taxa_Bacteria.Proteobacteria.Gammaproteobacteria.Xanthomonadales.Xanthomonadaceae.Stenotrophomonas,taxa_Bacteria.Proteobacteria.Gammaproteobacteria.nan.nan.nan,taxa_Bacteria.Proteobacteria.nan.nan.nan.nan,taxa_Bacteria.Spirochaetes.Brachyspirae.Brachyspirales.Brachyspiraceae.Brachyspira,taxa_Bacteria.Spirochaetes.Spirochaetia.Spirochaetales.Spirochaetaceae.Sphaerochaeta,taxa_Bacteria.Spirochaetes.Spirochaetia.Spirochaetales.Spirochaetaceae.Treponema_2,taxa_Bacteria.Synergistetes.Synergistia.Synergistales.Synergistaceae.Cloacibacillus,taxa_Bacteria.Synergistetes.Synergistia.Synergistales.Synergistaceae.Fretibacterium,taxa_Bacteria.Synergistetes.Synergistia.Synergistales.Synergistaceae.Jonquetella,taxa_Bacteria.Synergistetes.Synergistia.Synergistales.Synergistaceae.Pyramidobacter,taxa_Bacteria.Synergistetes.Synergistia.Synergistales.Synergistaceae.Synergistes,taxa_Bacteria.Synergistetes.Synergistia.Synergistales.Synergistaceae.nan,taxa_Bacteria.Tenericutes.Mollicutes.Anaeroplasmatales.Anaeroplasmataceae.Anaeroplasma,taxa_Bacteria.Tenericutes.Mollicutes.EMP-G18.nan.nan,taxa_Bacteria.Tenericutes.Mollicutes.Izimaplasmatales.nan.nan,taxa_Bacteria.Tenericutes.Mollicutes.Mollicutes_RF39.nan.nan,taxa_Bacteria.Tenericutes.Mollicutes.Mycoplasmatales.Mycoplasmataceae.Mycoplasma,taxa_Bacteria.Tenericutes.Mollicutes.nan.nan.nan,taxa_Bacteria.Verrucomicrobia.Verrucomicrobiae.Chthoniobacterales.Xiphinematobacteraceae.Candidatus_Xiphinematobacter,taxa_Bacteria.Verrucomicrobia.Verrucomicrobiae.Opitutales.Puniceicoccaceae.Cerasicoccus,taxa_Bacteria.Verrucomicrobia.Verrucomicrobiae.Opitutales.Puniceicoccaceae.nan,taxa_Bacteria.Verrucomicrobia.Verrucomicrobiae.Pedosphaerales.Pedosphaeraceae.nan,taxa_Bacteria.Verrucomicrobia.Verrucomicrobiae.Verrucomicrobiales.Akkermansiaceae.Akkermansia,taxa_Bacteria.Verrucomicrobia.Verrucomicrobiae.Verrucomicrobiales.nan.nan,taxa_Bacteria.Verrucomicrobia.Verrucomicrobiae.nan.nan.nan,taxa_Bacteria.nan.nan.nan.nan.nan,taxa_Eukaryota.Euglenozoa.Euglenida.Aphagea.Aphagea_fa.Distigma,taxa_Eukaryota.Euglenozoa.Euglenida.Aphagea.Aphagea_fa.nan,taxa_Eukaryota.Euglenozoa.Euglenida.nan.nan.nan,taxa_Eukaryota.Euglenozoa.nan.nan.nan.nan,taxa_Eukaryota.Incertae_Sedis.Incertae_Sedis_cl.Incertae_Sedis_or.Incertae_Sedis_fa.Blastocystis,taxa_Eukaryota.Parabasalia.Tritrichomonadea.Tritrichomonadea_or.Tritrichomonadea_fa.Dientamoeba,taxa_Eukaryota.nan.nan.nan.nan.nan,taxa_nan.nan.nan.nan.nan.nan
public_client_id,Unnamed: 1_level_1,Unnamed: 2_level_1,Unnamed: 3_level_1,Unnamed: 4_level_1,Unnamed: 5_level_1,Unnamed: 6_level_1,Unnamed: 7_level_1,Unnamed: 8_level_1,Unnamed: 9_level_1,Unnamed: 10_level_1,Unnamed: 11_level_1,Unnamed: 12_level_1,Unnamed: 13_level_1,Unnamed: 14_level_1,Unnamed: 15_level_1,Unnamed: 16_level_1,Unnamed: 17_level_1,Unnamed: 18_level_1,Unnamed: 19_level_1,Unnamed: 20_level_1,Unnamed: 21_level_1,Unnamed: 22_level_1,Unnamed: 23_level_1,Unnamed: 24_level_1,Unnamed: 25_level_1,Unnamed: 26_level_1,Unnamed: 27_level_1,Unnamed: 28_level_1,Unnamed: 29_level_1,Unnamed: 30_level_1,Unnamed: 31_level_1,Unnamed: 32_level_1,Unnamed: 33_level_1,Unnamed: 34_level_1,Unnamed: 35_level_1,Unnamed: 36_level_1,Unnamed: 37_level_1,Unnamed: 38_level_1,Unnamed: 39_level_1,Unnamed: 40_level_1,Unnamed: 41_level_1,Unnamed: 42_level_1,Unnamed: 43_level_1,Unnamed: 44_level_1,Unnamed: 45_level_1,Unnamed: 46_level_1,Unnamed: 47_level_1,Unnamed: 48_level_1,Unnamed: 49_level_1,Unnamed: 50_level_1,Unnamed: 51_level_1,Unnamed: 52_level_1,Unnamed: 53_level_1,Unnamed: 54_level_1,Unnamed: 55_level_1,Unnamed: 56_level_1,Unnamed: 57_level_1,Unnamed: 58_level_1,Unnamed: 59_level_1,Unnamed: 60_level_1,Unnamed: 61_level_1,Unnamed: 62_level_1,Unnamed: 63_level_1,Unnamed: 64_level_1,Unnamed: 65_level_1,Unnamed: 66_level_1,Unnamed: 67_level_1,Unnamed: 68_level_1,Unnamed: 69_level_1,Unnamed: 70_level_1,Unnamed: 71_level_1,Unnamed: 72_level_1,Unnamed: 73_level_1,Unnamed: 74_level_1,Unnamed: 75_level_1,Unnamed: 76_level_1,Unnamed: 77_level_1,Unnamed: 78_level_1,Unnamed: 79_level_1,Unnamed: 80_level_1,Unnamed: 81_level_1,Unnamed: 82_level_1,Unnamed: 83_level_1,Unnamed: 84_level_1,Unnamed: 85_level_1,Unnamed: 86_level_1,Unnamed: 87_level_1,Unnamed: 88_level_1,Unnamed: 89_level_1,Unnamed: 90_level_1,Unnamed: 91_level_1,Unnamed: 92_level_1,Unnamed: 93_level_1,Unnamed: 94_level_1,Unnamed: 95_level_1,Unnamed: 96_level_1,Unnamed: 97_level_1,Unnamed: 98_level_1,Unnamed: 99_level_1,Unnamed: 100_level_1,Unnamed: 101_level_1,Unnamed: 102_level_1,Unnamed: 103_level_1,Unnamed: 104_level_1,Unnamed: 105_level_1,Unnamed: 106_level_1,Unnamed: 107_level_1,Unnamed: 108_level_1,Unnamed: 109_level_1,Unnamed: 110_level_1,Unnamed: 111_level_1,Unnamed: 112_level_1,Unnamed: 113_level_1,Unnamed: 114_level_1,Unnamed: 115_level_1,Unnamed: 116_level_1,Unnamed: 117_level_1,Unnamed: 118_level_1,Unnamed: 119_level_1,Unnamed: 120_level_1,Unnamed: 121_level_1,Unnamed: 122_level_1,Unnamed: 123_level_1,Unnamed: 124_level_1,Unnamed: 125_level_1,Unnamed: 126_level_1,Unnamed: 127_level_1,Unnamed: 128_level_1,Unnamed: 129_level_1,Unnamed: 130_level_1,Unnamed: 131_level_1,Unnamed: 132_level_1,Unnamed: 133_level_1,Unnamed: 134_level_1,Unnamed: 135_level_1,Unnamed: 136_level_1,Unnamed: 137_level_1,Unnamed: 138_level_1,Unnamed: 139_level_1,Unnamed: 140_level_1,Unnamed: 141_level_1,Unnamed: 142_level_1,Unnamed: 143_level_1,Unnamed: 144_level_1,Unnamed: 145_level_1,Unnamed: 146_level_1,Unnamed: 147_level_1,Unnamed: 148_level_1,Unnamed: 149_level_1,Unnamed: 150_level_1,Unnamed: 151_level_1,Unnamed: 152_level_1,Unnamed: 153_level_1,Unnamed: 154_level_1,Unnamed: 155_level_1,Unnamed: 156_level_1,Unnamed: 157_level_1,Unnamed: 158_level_1,Unnamed: 159_level_1,Unnamed: 160_level_1,Unnamed: 161_level_1,Unnamed: 162_level_1,Unnamed: 163_level_1,Unnamed: 164_level_1,Unnamed: 165_level_1,Unnamed: 166_level_1,Unnamed: 167_level_1,Unnamed: 168_level_1,Unnamed: 169_level_1,Unnamed: 170_level_1,Unnamed: 171_level_1,Unnamed: 172_level_1,Unnamed: 173_level_1,Unnamed: 174_level_1,Unnamed: 175_level_1,Unnamed: 176_level_1,Unnamed: 177_level_1,Unnamed: 178_level_1,Unnamed: 179_level_1,Unnamed: 180_level_1,Unnamed: 181_level_1,Unnamed: 182_level_1,Unnamed: 183_level_1,Unnamed: 184_level_1,Unnamed: 185_level_1,Unnamed: 186_level_1,Unnamed: 187_level_1,Unnamed: 188_level_1,Unnamed: 189_level_1,Unnamed: 190_level_1,Unnamed: 191_level_1,Unnamed: 192_level_1,Unnamed: 193_level_1,Unnamed: 194_level_1,Unnamed: 195_level_1,Unnamed: 196_level_1,Unnamed: 197_level_1,Unnamed: 198_level_1,Unnamed: 199_level_1,Unnamed: 200_level_1,Unnamed: 201_level_1
01000261,71,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,14866,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,522,0,0,0,67,0,0,0,0,0,0,0,0,0,0,101,0,0,69,0,0,10461,0,0,0,49,0,0,0,0,0,...,0,0,0,0,0,0,0,43,0,0,0,861,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,29,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,136,0,0,29,0,0,0,0,0,0,0,0
01001298,0,0,0,0,0,0,0,0,0,7,0,0,0,0,0,0,0,0,0,23,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,578,0,0,0,0,49,0,0,0,0,28,0,0,0,0,218,0,421,92,0,0,107017,0,0,5388,0,0,0,0,0,0,...,0,0,0,0,0,0,0,57,0,0,0,8079,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,17,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0
01001621,0,0,0,0,0,0,0,0,0,139,0,0,0,0,0,0,0,0,0,812,11,0,0,0,7,0,22,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,6857,0,0,0,0,0,0,0,0,0,2281,0,0,0,0,0,0,0,0,0,0,85495,0,0,0,0,0,0,0,0,0,...,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,31,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,15,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0
01001661,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,18,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,42,94,0,0,0,0,0,0,8,0,0,0,0,0,0,0,0,31605,0,0,0,0,0,0,0,0,533,...,0,0,0,0,0,0,0,133,0,0,0,541,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,45,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,97,0,0,0,0,0,0,0,0,0,0,0
01001798,0,0,0,0,0,0,0,0,0,19,0,0,0,0,0,0,0,0,0,2439,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,10,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,4,2657,0,0,0,166,297,0,0,0,0,63,0,8,0,0,0,6,0,16,0,0,54320,0,0,880,1472,0,0,0,0,0,...,0,0,0,0,0,0,0,2565,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,15,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,561,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0
...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...
MUMA,0,0,0,0,0,0,0,0,0,142,0,0,0,0,0,0,0,0,0,5150,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,274,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,49966,0,0,0,1552,1005,0,0,112,0,3373,0,948,0,0,0,0,0,0,0,0,8604,0,0,0,18,0,0,0,0,4366,...,0,0,0,0,0,0,14,6749,0,0,0,3149,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,180,0,316,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,30,0,0,0,0,3,0,0,0,0,0,0,8,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,59632,0,0,0,0,0,0,0,0,0,0,0
QIRL,3269,0,0,0,0,0,0,0,0,156,0,0,6,28,0,0,0,0,0,20953,0,0,0,0,0,0,0,0,15,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,87296,0,0,0,25,1905,0,0,0,0,0,0,1891,0,0,0,0,1603,284,0,0,16481,0,0,0,91,0,0,0,0,4087,...,0,0,0,0,0,0,0,13115,0,0,0,29,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,152,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,6,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,45,0,0,0,0,0,0,0,99,125,0,0,0,0,0,0,7050,0,0,0,0,0,0,0,0,0,0,0
ZU273983,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,6853,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,144,0,0,0,46,138,0,0,0,0,18,0,0,0,0,0,0,0,0,0,0,43541,0,0,0,887,344,0,0,0,0,...,0,0,0,0,0,0,0,177,0,0,0,1429,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,14,0,0,0,0,0,0,0,1973,0,0,0,0,0,0,0,0,0,88,0
ZU621944,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,2204,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,3571,0,0,0,0,0,0,0,0,0,17,0,0,0,0,0,0,86,0,0,0,154921,0,0,1620,0,0,0,0,0,389,...,0,0,0,0,0,0,0,1583,0,0,0,19171,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,72,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,5,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,131,0,0,0,0,0,0,0,0,0,3,0


In [19]:
taxa = name_pivot.reset_index()
taxa_zerocounts=(taxa == 0).sum() # sum up times any columns have zero values
taxa_nonnancounts=(~taxa.isna()).sum() # sum up times any columns have non-NA values
taxa = taxa.loc[:, taxa_zerocounts/taxa.shape[0] <= .30] #this sum of zero values/num of total rows should less than 15% to proceed with the cohort
#taxa = taxa.fillna(taxa.median()) #replace NaN values with the median of each column
#taxa = taxa.replace(to_replace = 0, value = 0.0000001) #replace zero values with 0.000000
#taxa = taxa.set_index('public_client_id')
taxa

Name,public_client_id,taxa_Bacteria.Actinobacteria.Actinobacteria.Bifidobacteriales.Bifidobacteriaceae.Bifidobacterium,taxa_Bacteria.Actinobacteria.Coriobacteriia.Coriobacteriales.Coriobacteriaceae.Collinsella,taxa_Bacteria.Actinobacteria.Coriobacteriia.Coriobacteriales.Eggerthellaceae.Adlercreutzia,taxa_Bacteria.Bacteroidetes.Bacteroidia.Bacteroidales.Bacteroidaceae.Bacteroides,taxa_Bacteria.Bacteroidetes.Bacteroidia.Bacteroidales.Marinifilaceae.Odoribacter,taxa_Bacteria.Bacteroidetes.Bacteroidia.Bacteroidales.Rikenellaceae.Alistipes,taxa_Bacteria.Bacteroidetes.Bacteroidia.Bacteroidales.Tannerellaceae.Parabacteroides,taxa_Bacteria.Firmicutes.Bacilli.Lactobacillales.Streptococcaceae.Streptococcus,taxa_Bacteria.Firmicutes.Clostridia.Clostridiales.Christensenellaceae.Christensenellaceae_R-7_group,taxa_Bacteria.Firmicutes.Clostridia.Clostridiales.Christensenellaceae.nan,taxa_Bacteria.Firmicutes.Clostridia.Clostridiales.Clostridiaceae_1.Clostridium_sensu_stricto_1,taxa_Bacteria.Firmicutes.Clostridia.Clostridiales.Clostridiales_vadinBB60_group.nan,taxa_Bacteria.Firmicutes.Clostridia.Clostridiales.Family_XIII.Family_XIII_AD3011_group,taxa_Bacteria.Firmicutes.Clostridia.Clostridiales.Family_XIII.Family_XIII_UCG-001,taxa_Bacteria.Firmicutes.Clostridia.Clostridiales.Family_XIII.nan,taxa_Bacteria.Firmicutes.Clostridia.Clostridiales.Lachnospiraceae.Agathobacter,taxa_Bacteria.Firmicutes.Clostridia.Clostridiales.Lachnospiraceae.Anaerostipes,taxa_Bacteria.Firmicutes.Clostridia.Clostridiales.Lachnospiraceae.Blautia,taxa_Bacteria.Firmicutes.Clostridia.Clostridiales.Lachnospiraceae.Coprococcus_1,taxa_Bacteria.Firmicutes.Clostridia.Clostridiales.Lachnospiraceae.Coprococcus_3,taxa_Bacteria.Firmicutes.Clostridia.Clostridiales.Lachnospiraceae.Dorea,taxa_Bacteria.Firmicutes.Clostridia.Clostridiales.Lachnospiraceae.Fusicatenibacter,taxa_Bacteria.Firmicutes.Clostridia.Clostridiales.Lachnospiraceae.GCA-900066575,taxa_Bacteria.Firmicutes.Clostridia.Clostridiales.Lachnospiraceae.Lachnoclostridium,taxa_Bacteria.Firmicutes.Clostridia.Clostridiales.Lachnospiraceae.Lachnospira,taxa_Bacteria.Firmicutes.Clostridia.Clostridiales.Lachnospiraceae.Lachnospiraceae_FCS020_group,taxa_Bacteria.Firmicutes.Clostridia.Clostridiales.Lachnospiraceae.Lachnospiraceae_ND3007_group,taxa_Bacteria.Firmicutes.Clostridia.Clostridiales.Lachnospiraceae.Lachnospiraceae_NK4A136_group,taxa_Bacteria.Firmicutes.Clostridia.Clostridiales.Lachnospiraceae.Lachnospiraceae_UCG-001,taxa_Bacteria.Firmicutes.Clostridia.Clostridiales.Lachnospiraceae.Lachnospiraceae_UCG-004,taxa_Bacteria.Firmicutes.Clostridia.Clostridiales.Lachnospiraceae.Lachnospiraceae_UCG-010,taxa_Bacteria.Firmicutes.Clostridia.Clostridiales.Lachnospiraceae.Marvinbryantia,taxa_Bacteria.Firmicutes.Clostridia.Clostridiales.Lachnospiraceae.Roseburia,taxa_Bacteria.Firmicutes.Clostridia.Clostridiales.Lachnospiraceae.nan,taxa_Bacteria.Firmicutes.Clostridia.Clostridiales.Peptostreptococcaceae.Intestinibacter,taxa_Bacteria.Firmicutes.Clostridia.Clostridiales.Peptostreptococcaceae.Romboutsia,taxa_Bacteria.Firmicutes.Clostridia.Clostridiales.Ruminococcaceae.Anaerotruncus,taxa_Bacteria.Firmicutes.Clostridia.Clostridiales.Ruminococcaceae.Butyricicoccus,taxa_Bacteria.Firmicutes.Clostridia.Clostridiales.Ruminococcaceae.Candidatus_Soleaferrea,taxa_Bacteria.Firmicutes.Clostridia.Clostridiales.Ruminococcaceae.DTU089,taxa_Bacteria.Firmicutes.Clostridia.Clostridiales.Ruminococcaceae.Faecalibacterium,taxa_Bacteria.Firmicutes.Clostridia.Clostridiales.Ruminococcaceae.Flavonifractor,taxa_Bacteria.Firmicutes.Clostridia.Clostridiales.Ruminococcaceae.GCA-900066225,taxa_Bacteria.Firmicutes.Clostridia.Clostridiales.Ruminococcaceae.Intestinimonas,taxa_Bacteria.Firmicutes.Clostridia.Clostridiales.Ruminococcaceae.Negativibacillus,taxa_Bacteria.Firmicutes.Clostridia.Clostridiales.Ruminococcaceae.Oscillibacter,taxa_Bacteria.Firmicutes.Clostridia.Clostridiales.Ruminococcaceae.Oscillospira,taxa_Bacteria.Firmicutes.Clostridia.Clostridiales.Ruminococcaceae.Ruminiclostridium_5,taxa_Bacteria.Firmicutes.Clostridia.Clostridiales.Ruminococcaceae.Ruminiclostridium_9,taxa_Bacteria.Firmicutes.Clostridia.Clostridiales.Ruminococcaceae.Ruminococcaceae_NK4A214_group,taxa_Bacteria.Firmicutes.Clostridia.Clostridiales.Ruminococcaceae.Ruminococcaceae_UCG-002,taxa_Bacteria.Firmicutes.Clostridia.Clostridiales.Ruminococcaceae.Ruminococcaceae_UCG-003,taxa_Bacteria.Firmicutes.Clostridia.Clostridiales.Ruminococcaceae.Ruminococcaceae_UCG-004,taxa_Bacteria.Firmicutes.Clostridia.Clostridiales.Ruminococcaceae.Ruminococcaceae_UCG-005,taxa_Bacteria.Firmicutes.Clostridia.Clostridiales.Ruminococcaceae.Ruminococcaceae_UCG-013,taxa_Bacteria.Firmicutes.Clostridia.Clostridiales.Ruminococcaceae.Ruminococcus_1,taxa_Bacteria.Firmicutes.Clostridia.Clostridiales.Ruminococcaceae.Ruminococcus_2,taxa_Bacteria.Firmicutes.Clostridia.Clostridiales.Ruminococcaceae.Subdoligranulum,taxa_Bacteria.Firmicutes.Clostridia.Clostridiales.Ruminococcaceae.UBA1819,taxa_Bacteria.Firmicutes.Clostridia.Clostridiales.Ruminococcaceae.nan,taxa_Bacteria.Firmicutes.Erysipelotrichia.Erysipelotrichales.Erysipelotrichaceae.Erysipelatoclostridium,taxa_Bacteria.Firmicutes.Erysipelotrichia.Erysipelotrichales.Erysipelotrichaceae.Erysipelotrichaceae_UCG-003,taxa_Bacteria.Firmicutes.Erysipelotrichia.Erysipelotrichales.Erysipelotrichaceae.Holdemania,taxa_Bacteria.Firmicutes.Erysipelotrichia.Erysipelotrichales.Erysipelotrichaceae.nan,taxa_Bacteria.Firmicutes.Negativicutes.Selenomonadales.Acidaminococcaceae.Phascolarctobacterium,taxa_Bacteria.Proteobacteria.Deltaproteobacteria.Desulfovibrionales.Desulfovibrionaceae.Bilophila,taxa_Bacteria.Proteobacteria.Gammaproteobacteria.Betaproteobacteriales.Burkholderiaceae.Parasutterella,taxa_Bacteria.Verrucomicrobia.Verrucomicrobiae.Verrucomicrobiales.Akkermansiaceae.Akkermansia
0,01000261,14866,522,0,10461,94,1293,73,1722,430,12,245,0,11,78,0,13434,1180,5969,644,1282,2150,1631,122,493,2007,37,688,2533,358,347,111,273,9011,5934,25,251,0,1065,0,0,7248,0,0,79,29,182,31,518,81,193,659,275,0,723,327,3310,2223,10315,0,580,100,1866,0,37,516,52,43,136
1,01001298,23,578,49,107017,973,4295,2118,25,841,44,77,105,195,156,0,4290,8476,12679,416,1627,1821,1886,1437,9114,15149,342,0,51,833,1277,709,736,119,9736,0,0,42,671,53,12,32584,444,8,499,106,846,15,222,412,80,1375,1312,247,400,2165,2457,0,8698,78,944,1512,0,166,35,4720,674,57,0
2,01001621,812,6857,0,85495,0,0,5358,2387,45,282,0,48,79,159,505,9281,402,44146,0,65,2555,0,72,5597,3589,16,0,0,384,0,299,0,10042,13198,528,2340,68,436,241,340,17397,1840,424,0,0,1873,1656,12018,2902,0,0,0,904,486,105,3490,0,786,670,4376,4699,0,224,491,4543,1850,0,0
3,01001661,18,0,94,31605,0,1289,853,308,1594,16,51,32,54,71,0,6077,1316,5601,0,0,502,0,71,795,4015,114,0,1695,873,791,215,93,1556,3134,0,20,5,1378,0,12,6913,38,13,106,9,61,42,32,71,21,689,351,120,122,1335,352,1414,0,34,221,58,1688,38,9,535,112,133,97
4,01001798,2439,2657,297,54320,751,4798,2228,1284,44,23,0,139,70,28,56,13180,3689,20076,0,470,2659,0,90,2327,6201,492,0,382,0,1381,521,46,15938,4450,52,37,67,212,66,14,37083,127,20,59,358,1082,0,2725,397,132,641,0,308,15,0,2491,7668,1421,0,3039,796,20,101,34,1810,436,2565,0
...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...
3689,MUMA,5150,49966,1005,8604,3046,5544,7,1631,466,55,30,0,191,189,445,1938,5634,16935,0,1644,659,70,0,3480,952,0,7978,936,826,26,170,113,1415,8988,0,227,0,91,0,27354,5961,166,0,642,85,375,451,85,628,103,7404,85,1977,72,313,0,0,4264,0,1264,663,4546,0,0,0,12790,6749,59632
3690,QIRL,20953,87296,1905,16481,3098,14265,0,2217,23430,333,963,131,671,116,56,4899,2895,14001,823,1378,3986,3768,20,1969,3858,148,4284,816,158,61,152,502,779,5719,155,3434,0,0,0,25,18727,43,0,314,0,556,220,923,139,1406,14750,0,1646,3451,154,840,7800,1978,0,1512,36,4825,0,0,930,4207,13115,7050
3691,ZU273983,6853,144,138,43541,188,3797,3528,106,553,169,86,37,161,76,18,9457,1249,4700,431,633,888,1063,24,2631,2061,0,300,1217,169,287,219,0,3459,5949,28,0,65,382,28,28,12587,177,29,47,0,502,0,803,279,510,635,0,185,499,524,4855,7658,3244,30,2492,81,0,50,22,0,296,177,1973
3692,ZU621944,2204,3571,0,154921,196,3314,5769,118,0,0,25,0,0,0,0,13318,1800,2512,153,0,2319,1306,0,7304,0,17,0,0,34,265,432,0,722,3134,10,18,0,638,11,0,1620,170,0,86,0,336,0,549,162,0,0,0,0,0,424,0,1183,0,11,491,26,1268,0,2,0,1476,1583,131


In [20]:
#Obtain "eGFR" calculated GFR data (credit: Alexandra Ralevski) code chunk below:
################################################################################################
chems = adi.get_snapshot('chemistries', clean=True)
cl = adi.get_snapshot('clients', clean=True)
chems_dems = join_clients(cl, chems)
#these are the relevant column names
[col for col in chems.columns if 'CREAT' in col]
[col for col in chems.columns if 'GFR' in col]
def f(sex, age, creatinine): 
    if sex == 'F':
        if (creatinine/0.7) <=1:
            return 142*(((creatinine/0.7)**-0.241) * (0.9938**age) * 1.012)
        else:
            return 142*(((creatinine/0.7)**-1.200) * (0.9938**age) * 1.012)
    if sex == 'M':
        if (creatinine/0.9) <=1:
            return 142*(((creatinine/0.9)**-0.302) * (0.9938**age))
        else:
            return 142*(((creatinine/0.7)**-1.200) * (0.9938**age))

creat_calc = chems_dems[['public_client_id','sex', 'age', 'CREATININE_ENZ__SER']]

creat_calc['eGFR'] = creat_calc.apply(lambda x: f(x.sex, x.age, x.CREATININE_ENZ__SER), axis=1)
creat_calc.to_csv('eGFR.csv',index = False)
################################################################################################

#prepare eGFR dataframe:
eGFR = creat_calc
eGFR = eGFR.set_index('public_client_id')
eGFR = eGFR[['eGFR']] #select relevant columns
#drop duplicate entries to reduce 12316 entries across all individuals
#to 6133 individuals
eGFR = eGFR.reset_index().drop_duplicates('public_client_id')
eGFR

  t = pandas.to_datetime(x.str.replace("^arivale_snapshot.*(\\d{4}-\\d{2}-\\d{2}_\\d{4})$", "\\1"), format="%Y-%m-%d_%H%M")
  colnames = hdr.loc[~hdr.str.contains("^#")].head(1).str.replace("\n$","")
  hdr = hdr.loc[hdr.str.contains("^#")].str.replace("\n$","")
  fields = hdr.str.replace("^# [^:]+: *(.*)$",r"\1")
  fields.index = hdr.str.replace("^# ([^:]+):.*$",r"\1")
  return "\n".join(list(hdr.loc[hdr.index >= i].str.replace("^# ","")))
  x.columns = x.columns.str.replace("[^a-zA-Z0-9]","_").str.replace("^([0-9])","_\\1")
  x.columns = x.columns.str.replace("[^a-zA-Z0-9]","_").str.replace("^([0-9])","_\\1")
  t = pandas.to_datetime(x.str.replace("^arivale_snapshot.*(\\d{4}-\\d{2}-\\d{2}_\\d{4})$", "\\1"), format="%Y-%m-%d_%H%M")
  colnames = hdr.loc[~hdr.str.contains("^#")].head(1).str.replace("\n$","")
  hdr = hdr.loc[hdr.str.contains("^#")].str.replace("\n$","")
  fields = hdr.str.replace("^# [^:]+: *(.*)$",r"\1")
  fields.index = hdr.str.replace("^# ([^:]+):.*$",r"\1")
  return "

Unnamed: 0,public_client_id,eGFR
0,01000552,
1,01000978,
2,01001181,
3,01001298,99.024207
4,01001467,100.959522
...,...,...
12307,QIRL,111.687296
12311,ZU273983,118.179626
12312,ZU612255,70.256126
12313,ZU621944,120.472213


In [21]:
#Code borrowed with permission from Annie Levine, MD:
#Get stress data
assessments = adi.get_snapshot('assessments', clean=True)
stress = adi.get_snapshot("assessments")
stress = stress.sort_values('days_in_program',ascending=True)
stress = stress.drop(columns = ['vendor','days_in_program','days_since_first_call','days_since_first_draw'])
stress = stress.drop_duplicates(subset='public_client_id')
stress = stress.fillna(0.001)
stress = stress.set_index('public_client_id')
stress = stress[["assessment:pss-four-item:out_of_control:enum", "assessment:pss-four-item:handle_problems:enum", "assessment:pss-four-item:going_your_way:enum", "assessment:pss-four-item:insurmountable:enum"]] 
stress = stress.rename(columns={"assessment:pss-four-item:out_of_control:enum" : "pss_1", "assessment:pss-four-item:handle_problems:enum": "pss_2", "assessment:pss-four-item:going_your_way:enum": "pss_3", "assessment:pss-four-item:insurmountable:enum": "pss_4"})

# Coding question values and replacing with numeric values
pss_1_list = ['(0) Never', '(1) Almost Never', '(2) Sometimes', '(3) Fairly Often', '(4) Very Often']
pss_1_r = [0, 1, 2, 3, 4]
stress["pss_1"] = stress["pss_1"].replace(pss_1_list, pss_1_r)
pss_2_list = ['(0) Very Often', '(1) Fairly Often', '(2) Sometimes', '(3) Almost Never', '(4) Never']
pss_2_r = [0, 1, 2, 3, 4] 
stress["pss_2"] = stress["pss_2"].replace(pss_2_list, pss_2_r)
pss_3_list = ['(0) Very Often', '(1) Fairly Often', '(2) Sometimes', '(3) Almost Never', '(4) Never']
pss_3_r = [0, 1, 2, 3, 4] 
stress["pss_3"] = stress["pss_3"].replace(pss_3_list, pss_3_r)
pss_4_list = ['(0) Never', '(1) Almost Never', '(2) Sometimes', '(3) Fairly Often', '(4) Very Often']
pss_4_r = [0, 1, 2, 3, 4]
stress["pss_4"] = stress["pss_4"].replace(pss_4_list, pss_4_r)

#Calculating the total stress score
def stress_add (row):
    stress_score = row["pss_1"] + row["pss_2"] + row["pss_3"] + row["pss_4"]
    return stress_score
stress.apply (lambda row: stress_add(row), axis=1)
stress["stress_score"] = stress.apply (lambda row: stress_add(row), axis=1)
stress

#Make it a DataFrame and remove NaNs
stress = stress
stress = stress[stress.stress_score !=0.004]

  t = pandas.to_datetime(x.str.replace("^arivale_snapshot.*(\\d{4}-\\d{2}-\\d{2}_\\d{4})$", "\\1"), format="%Y-%m-%d_%H%M")
  colnames = hdr.loc[~hdr.str.contains("^#")].head(1).str.replace("\n$","")
  hdr = hdr.loc[hdr.str.contains("^#")].str.replace("\n$","")
  fields = hdr.str.replace("^# [^:]+: *(.*)$",r"\1")
  fields.index = hdr.str.replace("^# ([^:]+):.*$",r"\1")
  return "\n".join(list(hdr.loc[hdr.index >= i].str.replace("^# ","")))
  x.columns = x.columns.str.replace("[^a-zA-Z0-9]","_").str.replace("^([0-9])","_\\1")
  x.columns = x.columns.str.replace("[^a-zA-Z0-9]","_").str.replace("^([0-9])","_\\1")
  t = pandas.to_datetime(x.str.replace("^arivale_snapshot.*(\\d{4}-\\d{2}-\\d{2}_\\d{4})$", "\\1"), format="%Y-%m-%d_%H%M")
  colnames = hdr.loc[~hdr.str.contains("^#")].head(1).str.replace("\n$","")
  hdr = hdr.loc[hdr.str.contains("^#")].str.replace("\n$","")
  fields = hdr.str.replace("^# [^:]+: *(.*)$",r"\1")
  fields.index = hdr.str.replace("^# ([^:]+):.*$",r"\1")
  return "

In [22]:
# Obtain vendor from snapshot
dash = adi.get_snapshot('microbiome_diversity')
dash = dash.sort_values('days_in_program',ascending=True) # Sort by days in program first
dash = dash.drop_duplicates(subset='public_client_id') # drop duplicate PCIs, keeping first day
dash = dash.sort_values('public_client_id',ascending=True)
dash = dash.set_index('public_client_id')
dash = dash[['vendor_dashboard']]
dash = dash[dash.notna()]
dash = dash.reset_index()
dash
#3698 individuals with vendor information

  t = pandas.to_datetime(x.str.replace("^arivale_snapshot.*(\\d{4}-\\d{2}-\\d{2}_\\d{4})$", "\\1"), format="%Y-%m-%d_%H%M")
  colnames = hdr.loc[~hdr.str.contains("^#")].head(1).str.replace("\n$","")
  hdr = hdr.loc[hdr.str.contains("^#")].str.replace("\n$","")
  fields = hdr.str.replace("^# [^:]+: *(.*)$",r"\1")
  fields.index = hdr.str.replace("^# ([^:]+):.*$",r"\1")
  return "\n".join(list(hdr.loc[hdr.index >= i].str.replace("^# ","")))


Unnamed: 0,public_client_id,vendor_dashboard
0,01000261,Second Genome
1,01001298,research-microbiome
2,01001621,research-microbiome
3,01001661,research-microbiome
4,01001798,research-microbiome
...,...,...
3693,MUMA,Second Genome
3694,QIRL,Second Genome
3695,ZU273983,research-microbiome
3696,ZU621944,research-microbiome


In [23]:
#Now, get the Assessments data
bio=adi.get_snapshot('clients')
a = bio['sex']
b = bio['age']
c = bio['public_client_id']
biodf = pd.DataFrame({'sex':a, 'age':b, 'public_client_id':c})
biodf = biodf[['sex','age','public_client_id']]
#biodf = biodf.set_index('public_client_id')
biodf #6133 individuals

#Get BMI data
bmi = adi.get_snapshot('weight_monthly')
bmi = bmi[['public_client_id','days_in_program','BMI_CALC']]
bmi = bmi.sort_values('days_in_program',ascending=True)
bmi = bmi.sort_values('public_client_id',ascending=True)
bmi = bmi.drop('days_in_program', axis=1)
bmi = bmi.drop_duplicates('public_client_id')
#bmi = bmi.set_index('public_client_id')
bmi = bmi.dropna()
bmi #5567 individuals


output_df = pd.merge(biodf,bmi)
output_df = pd.merge(output_df,dash)
output_df = pd.merge(output_df,bowel)
output_df = pd.merge(output_df,eGFR)
output_df = pd.merge(output_df,taxa)
output_df = output_df.set_index('public_client_id')
output_df.to_csv('gut.csv')
micro = output_df
micro

  t = pandas.to_datetime(x.str.replace("^arivale_snapshot.*(\\d{4}-\\d{2}-\\d{2}_\\d{4})$", "\\1"), format="%Y-%m-%d_%H%M")
  colnames = hdr.loc[~hdr.str.contains("^#")].head(1).str.replace("\n$","")
  hdr = hdr.loc[hdr.str.contains("^#")].str.replace("\n$","")
  fields = hdr.str.replace("^# [^:]+: *(.*)$",r"\1")
  fields.index = hdr.str.replace("^# ([^:]+):.*$",r"\1")
  return "\n".join(list(hdr.loc[hdr.index >= i].str.replace("^# ","")))
  t = pandas.to_datetime(x.str.replace("^arivale_snapshot.*(\\d{4}-\\d{2}-\\d{2}_\\d{4})$", "\\1"), format="%Y-%m-%d_%H%M")
  colnames = hdr.loc[~hdr.str.contains("^#")].head(1).str.replace("\n$","")
  hdr = hdr.loc[hdr.str.contains("^#")].str.replace("\n$","")
  fields = hdr.str.replace("^# [^:]+: *(.*)$",r"\1")
  fields.index = hdr.str.replace("^# ([^:]+):.*$",r"\1")
  return "\n".join(list(hdr.loc[hdr.index >= i].str.replace("^# ","")))


Unnamed: 0_level_0,sex,age,BMI_CALC,vendor_dashboard,bowel,eGFR,taxa_Bacteria.Actinobacteria.Actinobacteria.Bifidobacteriales.Bifidobacteriaceae.Bifidobacterium,taxa_Bacteria.Actinobacteria.Coriobacteriia.Coriobacteriales.Coriobacteriaceae.Collinsella,taxa_Bacteria.Actinobacteria.Coriobacteriia.Coriobacteriales.Eggerthellaceae.Adlercreutzia,taxa_Bacteria.Bacteroidetes.Bacteroidia.Bacteroidales.Bacteroidaceae.Bacteroides,taxa_Bacteria.Bacteroidetes.Bacteroidia.Bacteroidales.Marinifilaceae.Odoribacter,taxa_Bacteria.Bacteroidetes.Bacteroidia.Bacteroidales.Rikenellaceae.Alistipes,taxa_Bacteria.Bacteroidetes.Bacteroidia.Bacteroidales.Tannerellaceae.Parabacteroides,taxa_Bacteria.Firmicutes.Bacilli.Lactobacillales.Streptococcaceae.Streptococcus,taxa_Bacteria.Firmicutes.Clostridia.Clostridiales.Christensenellaceae.Christensenellaceae_R-7_group,taxa_Bacteria.Firmicutes.Clostridia.Clostridiales.Christensenellaceae.nan,taxa_Bacteria.Firmicutes.Clostridia.Clostridiales.Clostridiaceae_1.Clostridium_sensu_stricto_1,taxa_Bacteria.Firmicutes.Clostridia.Clostridiales.Clostridiales_vadinBB60_group.nan,taxa_Bacteria.Firmicutes.Clostridia.Clostridiales.Family_XIII.Family_XIII_AD3011_group,taxa_Bacteria.Firmicutes.Clostridia.Clostridiales.Family_XIII.Family_XIII_UCG-001,taxa_Bacteria.Firmicutes.Clostridia.Clostridiales.Family_XIII.nan,taxa_Bacteria.Firmicutes.Clostridia.Clostridiales.Lachnospiraceae.Agathobacter,taxa_Bacteria.Firmicutes.Clostridia.Clostridiales.Lachnospiraceae.Anaerostipes,taxa_Bacteria.Firmicutes.Clostridia.Clostridiales.Lachnospiraceae.Blautia,taxa_Bacteria.Firmicutes.Clostridia.Clostridiales.Lachnospiraceae.Coprococcus_1,taxa_Bacteria.Firmicutes.Clostridia.Clostridiales.Lachnospiraceae.Coprococcus_3,taxa_Bacteria.Firmicutes.Clostridia.Clostridiales.Lachnospiraceae.Dorea,taxa_Bacteria.Firmicutes.Clostridia.Clostridiales.Lachnospiraceae.Fusicatenibacter,taxa_Bacteria.Firmicutes.Clostridia.Clostridiales.Lachnospiraceae.GCA-900066575,taxa_Bacteria.Firmicutes.Clostridia.Clostridiales.Lachnospiraceae.Lachnoclostridium,taxa_Bacteria.Firmicutes.Clostridia.Clostridiales.Lachnospiraceae.Lachnospira,taxa_Bacteria.Firmicutes.Clostridia.Clostridiales.Lachnospiraceae.Lachnospiraceae_FCS020_group,taxa_Bacteria.Firmicutes.Clostridia.Clostridiales.Lachnospiraceae.Lachnospiraceae_ND3007_group,taxa_Bacteria.Firmicutes.Clostridia.Clostridiales.Lachnospiraceae.Lachnospiraceae_NK4A136_group,taxa_Bacteria.Firmicutes.Clostridia.Clostridiales.Lachnospiraceae.Lachnospiraceae_UCG-001,taxa_Bacteria.Firmicutes.Clostridia.Clostridiales.Lachnospiraceae.Lachnospiraceae_UCG-004,taxa_Bacteria.Firmicutes.Clostridia.Clostridiales.Lachnospiraceae.Lachnospiraceae_UCG-010,taxa_Bacteria.Firmicutes.Clostridia.Clostridiales.Lachnospiraceae.Marvinbryantia,taxa_Bacteria.Firmicutes.Clostridia.Clostridiales.Lachnospiraceae.Roseburia,taxa_Bacteria.Firmicutes.Clostridia.Clostridiales.Lachnospiraceae.nan,taxa_Bacteria.Firmicutes.Clostridia.Clostridiales.Peptostreptococcaceae.Intestinibacter,taxa_Bacteria.Firmicutes.Clostridia.Clostridiales.Peptostreptococcaceae.Romboutsia,taxa_Bacteria.Firmicutes.Clostridia.Clostridiales.Ruminococcaceae.Anaerotruncus,taxa_Bacteria.Firmicutes.Clostridia.Clostridiales.Ruminococcaceae.Butyricicoccus,taxa_Bacteria.Firmicutes.Clostridia.Clostridiales.Ruminococcaceae.Candidatus_Soleaferrea,taxa_Bacteria.Firmicutes.Clostridia.Clostridiales.Ruminococcaceae.DTU089,taxa_Bacteria.Firmicutes.Clostridia.Clostridiales.Ruminococcaceae.Faecalibacterium,taxa_Bacteria.Firmicutes.Clostridia.Clostridiales.Ruminococcaceae.Flavonifractor,taxa_Bacteria.Firmicutes.Clostridia.Clostridiales.Ruminococcaceae.GCA-900066225,taxa_Bacteria.Firmicutes.Clostridia.Clostridiales.Ruminococcaceae.Intestinimonas,taxa_Bacteria.Firmicutes.Clostridia.Clostridiales.Ruminococcaceae.Negativibacillus,taxa_Bacteria.Firmicutes.Clostridia.Clostridiales.Ruminococcaceae.Oscillibacter,taxa_Bacteria.Firmicutes.Clostridia.Clostridiales.Ruminococcaceae.Oscillospira,taxa_Bacteria.Firmicutes.Clostridia.Clostridiales.Ruminococcaceae.Ruminiclostridium_5,taxa_Bacteria.Firmicutes.Clostridia.Clostridiales.Ruminococcaceae.Ruminiclostridium_9,taxa_Bacteria.Firmicutes.Clostridia.Clostridiales.Ruminococcaceae.Ruminococcaceae_NK4A214_group,taxa_Bacteria.Firmicutes.Clostridia.Clostridiales.Ruminococcaceae.Ruminococcaceae_UCG-002,taxa_Bacteria.Firmicutes.Clostridia.Clostridiales.Ruminococcaceae.Ruminococcaceae_UCG-003,taxa_Bacteria.Firmicutes.Clostridia.Clostridiales.Ruminococcaceae.Ruminococcaceae_UCG-004,taxa_Bacteria.Firmicutes.Clostridia.Clostridiales.Ruminococcaceae.Ruminococcaceae_UCG-005,taxa_Bacteria.Firmicutes.Clostridia.Clostridiales.Ruminococcaceae.Ruminococcaceae_UCG-013,taxa_Bacteria.Firmicutes.Clostridia.Clostridiales.Ruminococcaceae.Ruminococcus_1,taxa_Bacteria.Firmicutes.Clostridia.Clostridiales.Ruminococcaceae.Ruminococcus_2,taxa_Bacteria.Firmicutes.Clostridia.Clostridiales.Ruminococcaceae.Subdoligranulum,taxa_Bacteria.Firmicutes.Clostridia.Clostridiales.Ruminococcaceae.UBA1819,taxa_Bacteria.Firmicutes.Clostridia.Clostridiales.Ruminococcaceae.nan,taxa_Bacteria.Firmicutes.Erysipelotrichia.Erysipelotrichales.Erysipelotrichaceae.Erysipelatoclostridium,taxa_Bacteria.Firmicutes.Erysipelotrichia.Erysipelotrichales.Erysipelotrichaceae.Erysipelotrichaceae_UCG-003,taxa_Bacteria.Firmicutes.Erysipelotrichia.Erysipelotrichales.Erysipelotrichaceae.Holdemania,taxa_Bacteria.Firmicutes.Erysipelotrichia.Erysipelotrichales.Erysipelotrichaceae.nan,taxa_Bacteria.Firmicutes.Negativicutes.Selenomonadales.Acidaminococcaceae.Phascolarctobacterium,taxa_Bacteria.Proteobacteria.Deltaproteobacteria.Desulfovibrionales.Desulfovibrionaceae.Bilophila,taxa_Bacteria.Proteobacteria.Gammaproteobacteria.Betaproteobacteriales.Burkholderiaceae.Parasutterella,taxa_Bacteria.Verrucomicrobia.Verrucomicrobiae.Verrucomicrobiales.Akkermansiaceae.Akkermansia
public_client_id,Unnamed: 1_level_1,Unnamed: 2_level_1,Unnamed: 3_level_1,Unnamed: 4_level_1,Unnamed: 5_level_1,Unnamed: 6_level_1,Unnamed: 7_level_1,Unnamed: 8_level_1,Unnamed: 9_level_1,Unnamed: 10_level_1,Unnamed: 11_level_1,Unnamed: 12_level_1,Unnamed: 13_level_1,Unnamed: 14_level_1,Unnamed: 15_level_1,Unnamed: 16_level_1,Unnamed: 17_level_1,Unnamed: 18_level_1,Unnamed: 19_level_1,Unnamed: 20_level_1,Unnamed: 21_level_1,Unnamed: 22_level_1,Unnamed: 23_level_1,Unnamed: 24_level_1,Unnamed: 25_level_1,Unnamed: 26_level_1,Unnamed: 27_level_1,Unnamed: 28_level_1,Unnamed: 29_level_1,Unnamed: 30_level_1,Unnamed: 31_level_1,Unnamed: 32_level_1,Unnamed: 33_level_1,Unnamed: 34_level_1,Unnamed: 35_level_1,Unnamed: 36_level_1,Unnamed: 37_level_1,Unnamed: 38_level_1,Unnamed: 39_level_1,Unnamed: 40_level_1,Unnamed: 41_level_1,Unnamed: 42_level_1,Unnamed: 43_level_1,Unnamed: 44_level_1,Unnamed: 45_level_1,Unnamed: 46_level_1,Unnamed: 47_level_1,Unnamed: 48_level_1,Unnamed: 49_level_1,Unnamed: 50_level_1,Unnamed: 51_level_1,Unnamed: 52_level_1,Unnamed: 53_level_1,Unnamed: 54_level_1,Unnamed: 55_level_1,Unnamed: 56_level_1,Unnamed: 57_level_1,Unnamed: 58_level_1,Unnamed: 59_level_1,Unnamed: 60_level_1,Unnamed: 61_level_1,Unnamed: 62_level_1,Unnamed: 63_level_1,Unnamed: 64_level_1,Unnamed: 65_level_1,Unnamed: 66_level_1,Unnamed: 67_level_1,Unnamed: 68_level_1,Unnamed: 69_level_1,Unnamed: 70_level_1,Unnamed: 71_level_1,Unnamed: 72_level_1,Unnamed: 73_level_1,Unnamed: 74_level_1
01001298,F,61.0,25.948604,research-microbiome,3,99.024207,23,578,49,107017,973,4295,2118,25,841,44,77,105,195,156,0,4290,8476,12679,416,1627,1821,1886,1437,9114,15149,342,0,51,833,1277,709,736,119,9736,0,0,42,671,53,12,32584,444,8,499,106,846,15,222,412,80,1375,1312,247,400,2165,2457,0,8698,78,944,1512,0,166,35,4720,674,57,0
01001621,F,54.0,23.627456,research-microbiome,2,69.439409,812,6857,0,85495,0,0,5358,2387,45,282,0,48,79,159,505,9281,402,44146,0,65,2555,0,72,5597,3589,16,0,0,384,0,299,0,10042,13198,528,2340,68,436,241,340,17397,1840,424,0,0,1873,1656,12018,2902,0,0,0,904,486,105,3490,0,786,670,4376,4699,0,224,491,4543,1850,0,0
01001661,F,30.0,21.031478,research-microbiome,3,120.947576,18,0,94,31605,0,1289,853,308,1594,16,51,32,54,71,0,6077,1316,5601,0,0,502,0,71,795,4015,114,0,1695,873,791,215,93,1556,3134,0,20,5,1378,0,12,6913,38,13,106,9,61,42,32,71,21,689,351,120,122,1335,352,1414,0,34,221,58,1688,38,9,535,112,133,97
01001798,F,55.0,29.579491,research-microbiome,1,93.963151,2439,2657,297,54320,751,4798,2228,1284,44,23,0,139,70,28,56,13180,3689,20076,0,470,2659,0,90,2327,6201,492,0,382,0,1381,521,46,15938,4450,52,37,67,212,66,14,37083,127,20,59,358,1082,0,2725,397,132,641,0,308,15,0,2491,7668,1421,0,3039,796,20,101,34,1810,436,2565,0
01002192,M,26.0,33.858776,research-microbiome,3,125.172870,826,742,200,21014,201,1238,1305,124,799,275,0,361,129,42,0,1848,774,4394,200,0,911,381,33,1219,2865,78,989,1043,1079,407,231,114,1763,2440,0,43,18,448,0,7,17142,19,0,56,69,138,20,92,193,367,943,269,235,399,1742,1739,1715,720,38,1758,71,1368,41,29,0,132,1178,429
...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...
HX884201,F,67.0,29.571540,research-microbiome,3,56.932265,613,870,180,76450,259,3620,6453,443,1617,118,3426,184,294,80,251,4438,125,2449,0,811,1592,936,31,4330,6880,20,174,1148,833,11,405,11,3786,7431,2146,613,129,1411,67,217,23551,1425,189,916,1172,1865,456,485,2026,772,783,21,124,1203,1125,3670,1512,5195,459,3998,166,371,127,162,2119,775,272,1703
HX981293,M,45.0,24.806553,research-microbiome,3,107.698203,630,511,65,65715,230,1949,2363,370,186,108,0,269,67,14,180,4906,272,923,95,614,233,498,4,815,1315,39,72,94,112,202,52,21,467,884,34,0,579,159,19,65,15117,75,22,283,96,343,33,189,192,3272,3900,1277,71,566,448,3842,230,3210,117,1008,13,71,28,0,252,320,0,2561
ZU273983,F,32.0,32.042139,research-microbiome,2,118.179626,6853,144,138,43541,188,3797,3528,106,553,169,86,37,161,76,18,9457,1249,4700,431,633,888,1063,24,2631,2061,0,300,1217,169,287,219,0,3459,5949,28,0,65,382,28,28,12587,177,29,47,0,502,0,803,279,510,635,0,185,499,524,4855,7658,3244,30,2492,81,0,50,22,0,296,177,1973
ZU621944,F,37.0,51.577264,research-microbiome,3,120.472213,2204,3571,0,154921,196,3314,5769,118,0,0,25,0,0,0,0,13318,1800,2512,153,0,2319,1306,0,7304,0,17,0,0,34,265,432,0,722,3134,10,18,0,638,11,0,1620,170,0,86,0,336,0,549,162,0,0,0,0,0,424,0,1183,0,11,491,26,1268,0,2,0,1476,1583,131


In [24]:
# Replace F's with W in sex column so that R Studio does not interpret as boolean
micro = micro.replace(to_replace = 'F', value = 'W')

# Taxonomy metadata
# Algorithm provided by Christian Diener, PhD:
###################################################
taxa_table = micro.iloc[:,6:].reset_index()
taxa_table = taxa.iloc[:,1:].columns  # to get only the taxa columns
taxa_table = taxa_table[taxa_table.str.startswith("taxa_")]
taxa_table = taxa_table.str.replace("taxa_", "")  # to remove the leading `genus_`
taxa_table = taxa_table.to_series().str.split(".", expand=True)# will split on the dot and save it into the columns of a new DataFrame
taxa_table.columns = ["kingdom", "phylum", "class", "order", "family", "genus"
                     ]# set the column names
taxa_table
taxa_table.to_csv("taxa.csv", sep="\t")
####################################################

taxa = taxa.dropna()
micro = micro.dropna()

# Taxa count data:
micro.iloc[:,6:].reset_index().to_csv('gut.csv',index = False)
# Heat map data:
micro.iloc[:,6:].reset_index().to_csv('gut_hm.csv')
# Full data:
micro.reset_index().to_csv('gut_full.csv', index = False)

In [25]:
micro.columns.values

array(['sex', 'age', 'BMI_CALC', 'vendor_dashboard', 'bowel', 'eGFR',
       'taxa_Bacteria.Actinobacteria.Actinobacteria.Bifidobacteriales.Bifidobacteriaceae.Bifidobacterium',
       'taxa_Bacteria.Actinobacteria.Coriobacteriia.Coriobacteriales.Coriobacteriaceae.Collinsella',
       'taxa_Bacteria.Actinobacteria.Coriobacteriia.Coriobacteriales.Eggerthellaceae.Adlercreutzia',
       'taxa_Bacteria.Bacteroidetes.Bacteroidia.Bacteroidales.Bacteroidaceae.Bacteroides',
       'taxa_Bacteria.Bacteroidetes.Bacteroidia.Bacteroidales.Marinifilaceae.Odoribacter',
       'taxa_Bacteria.Bacteroidetes.Bacteroidia.Bacteroidales.Rikenellaceae.Alistipes',
       'taxa_Bacteria.Bacteroidetes.Bacteroidia.Bacteroidales.Tannerellaceae.Parabacteroides',
       'taxa_Bacteria.Firmicutes.Bacilli.Lactobacillales.Streptococcaceae.Streptococcus',
       'taxa_Bacteria.Firmicutes.Clostridia.Clostridiales.Christensenellaceae.Christensenellaceae_R-7_group',
       'taxa_Bacteria.Firmicutes.Clostridia.Clostridiale

In [26]:
taxa

Name,public_client_id,taxa_Bacteria.Actinobacteria.Actinobacteria.Bifidobacteriales.Bifidobacteriaceae.Bifidobacterium,taxa_Bacteria.Actinobacteria.Coriobacteriia.Coriobacteriales.Coriobacteriaceae.Collinsella,taxa_Bacteria.Actinobacteria.Coriobacteriia.Coriobacteriales.Eggerthellaceae.Adlercreutzia,taxa_Bacteria.Bacteroidetes.Bacteroidia.Bacteroidales.Bacteroidaceae.Bacteroides,taxa_Bacteria.Bacteroidetes.Bacteroidia.Bacteroidales.Marinifilaceae.Odoribacter,taxa_Bacteria.Bacteroidetes.Bacteroidia.Bacteroidales.Rikenellaceae.Alistipes,taxa_Bacteria.Bacteroidetes.Bacteroidia.Bacteroidales.Tannerellaceae.Parabacteroides,taxa_Bacteria.Firmicutes.Bacilli.Lactobacillales.Streptococcaceae.Streptococcus,taxa_Bacteria.Firmicutes.Clostridia.Clostridiales.Christensenellaceae.Christensenellaceae_R-7_group,taxa_Bacteria.Firmicutes.Clostridia.Clostridiales.Christensenellaceae.nan,taxa_Bacteria.Firmicutes.Clostridia.Clostridiales.Clostridiaceae_1.Clostridium_sensu_stricto_1,taxa_Bacteria.Firmicutes.Clostridia.Clostridiales.Clostridiales_vadinBB60_group.nan,taxa_Bacteria.Firmicutes.Clostridia.Clostridiales.Family_XIII.Family_XIII_AD3011_group,taxa_Bacteria.Firmicutes.Clostridia.Clostridiales.Family_XIII.Family_XIII_UCG-001,taxa_Bacteria.Firmicutes.Clostridia.Clostridiales.Family_XIII.nan,taxa_Bacteria.Firmicutes.Clostridia.Clostridiales.Lachnospiraceae.Agathobacter,taxa_Bacteria.Firmicutes.Clostridia.Clostridiales.Lachnospiraceae.Anaerostipes,taxa_Bacteria.Firmicutes.Clostridia.Clostridiales.Lachnospiraceae.Blautia,taxa_Bacteria.Firmicutes.Clostridia.Clostridiales.Lachnospiraceae.Coprococcus_1,taxa_Bacteria.Firmicutes.Clostridia.Clostridiales.Lachnospiraceae.Coprococcus_3,taxa_Bacteria.Firmicutes.Clostridia.Clostridiales.Lachnospiraceae.Dorea,taxa_Bacteria.Firmicutes.Clostridia.Clostridiales.Lachnospiraceae.Fusicatenibacter,taxa_Bacteria.Firmicutes.Clostridia.Clostridiales.Lachnospiraceae.GCA-900066575,taxa_Bacteria.Firmicutes.Clostridia.Clostridiales.Lachnospiraceae.Lachnoclostridium,taxa_Bacteria.Firmicutes.Clostridia.Clostridiales.Lachnospiraceae.Lachnospira,taxa_Bacteria.Firmicutes.Clostridia.Clostridiales.Lachnospiraceae.Lachnospiraceae_FCS020_group,taxa_Bacteria.Firmicutes.Clostridia.Clostridiales.Lachnospiraceae.Lachnospiraceae_ND3007_group,taxa_Bacteria.Firmicutes.Clostridia.Clostridiales.Lachnospiraceae.Lachnospiraceae_NK4A136_group,taxa_Bacteria.Firmicutes.Clostridia.Clostridiales.Lachnospiraceae.Lachnospiraceae_UCG-001,taxa_Bacteria.Firmicutes.Clostridia.Clostridiales.Lachnospiraceae.Lachnospiraceae_UCG-004,taxa_Bacteria.Firmicutes.Clostridia.Clostridiales.Lachnospiraceae.Lachnospiraceae_UCG-010,taxa_Bacteria.Firmicutes.Clostridia.Clostridiales.Lachnospiraceae.Marvinbryantia,taxa_Bacteria.Firmicutes.Clostridia.Clostridiales.Lachnospiraceae.Roseburia,taxa_Bacteria.Firmicutes.Clostridia.Clostridiales.Lachnospiraceae.nan,taxa_Bacteria.Firmicutes.Clostridia.Clostridiales.Peptostreptococcaceae.Intestinibacter,taxa_Bacteria.Firmicutes.Clostridia.Clostridiales.Peptostreptococcaceae.Romboutsia,taxa_Bacteria.Firmicutes.Clostridia.Clostridiales.Ruminococcaceae.Anaerotruncus,taxa_Bacteria.Firmicutes.Clostridia.Clostridiales.Ruminococcaceae.Butyricicoccus,taxa_Bacteria.Firmicutes.Clostridia.Clostridiales.Ruminococcaceae.Candidatus_Soleaferrea,taxa_Bacteria.Firmicutes.Clostridia.Clostridiales.Ruminococcaceae.DTU089,taxa_Bacteria.Firmicutes.Clostridia.Clostridiales.Ruminococcaceae.Faecalibacterium,taxa_Bacteria.Firmicutes.Clostridia.Clostridiales.Ruminococcaceae.Flavonifractor,taxa_Bacteria.Firmicutes.Clostridia.Clostridiales.Ruminococcaceae.GCA-900066225,taxa_Bacteria.Firmicutes.Clostridia.Clostridiales.Ruminococcaceae.Intestinimonas,taxa_Bacteria.Firmicutes.Clostridia.Clostridiales.Ruminococcaceae.Negativibacillus,taxa_Bacteria.Firmicutes.Clostridia.Clostridiales.Ruminococcaceae.Oscillibacter,taxa_Bacteria.Firmicutes.Clostridia.Clostridiales.Ruminococcaceae.Oscillospira,taxa_Bacteria.Firmicutes.Clostridia.Clostridiales.Ruminococcaceae.Ruminiclostridium_5,taxa_Bacteria.Firmicutes.Clostridia.Clostridiales.Ruminococcaceae.Ruminiclostridium_9,taxa_Bacteria.Firmicutes.Clostridia.Clostridiales.Ruminococcaceae.Ruminococcaceae_NK4A214_group,taxa_Bacteria.Firmicutes.Clostridia.Clostridiales.Ruminococcaceae.Ruminococcaceae_UCG-002,taxa_Bacteria.Firmicutes.Clostridia.Clostridiales.Ruminococcaceae.Ruminococcaceae_UCG-003,taxa_Bacteria.Firmicutes.Clostridia.Clostridiales.Ruminococcaceae.Ruminococcaceae_UCG-004,taxa_Bacteria.Firmicutes.Clostridia.Clostridiales.Ruminococcaceae.Ruminococcaceae_UCG-005,taxa_Bacteria.Firmicutes.Clostridia.Clostridiales.Ruminococcaceae.Ruminococcaceae_UCG-013,taxa_Bacteria.Firmicutes.Clostridia.Clostridiales.Ruminococcaceae.Ruminococcus_1,taxa_Bacteria.Firmicutes.Clostridia.Clostridiales.Ruminococcaceae.Ruminococcus_2,taxa_Bacteria.Firmicutes.Clostridia.Clostridiales.Ruminococcaceae.Subdoligranulum,taxa_Bacteria.Firmicutes.Clostridia.Clostridiales.Ruminococcaceae.UBA1819,taxa_Bacteria.Firmicutes.Clostridia.Clostridiales.Ruminococcaceae.nan,taxa_Bacteria.Firmicutes.Erysipelotrichia.Erysipelotrichales.Erysipelotrichaceae.Erysipelatoclostridium,taxa_Bacteria.Firmicutes.Erysipelotrichia.Erysipelotrichales.Erysipelotrichaceae.Erysipelotrichaceae_UCG-003,taxa_Bacteria.Firmicutes.Erysipelotrichia.Erysipelotrichales.Erysipelotrichaceae.Holdemania,taxa_Bacteria.Firmicutes.Erysipelotrichia.Erysipelotrichales.Erysipelotrichaceae.nan,taxa_Bacteria.Firmicutes.Negativicutes.Selenomonadales.Acidaminococcaceae.Phascolarctobacterium,taxa_Bacteria.Proteobacteria.Deltaproteobacteria.Desulfovibrionales.Desulfovibrionaceae.Bilophila,taxa_Bacteria.Proteobacteria.Gammaproteobacteria.Betaproteobacteriales.Burkholderiaceae.Parasutterella,taxa_Bacteria.Verrucomicrobia.Verrucomicrobiae.Verrucomicrobiales.Akkermansiaceae.Akkermansia
0,01000261,14866,522,0,10461,94,1293,73,1722,430,12,245,0,11,78,0,13434,1180,5969,644,1282,2150,1631,122,493,2007,37,688,2533,358,347,111,273,9011,5934,25,251,0,1065,0,0,7248,0,0,79,29,182,31,518,81,193,659,275,0,723,327,3310,2223,10315,0,580,100,1866,0,37,516,52,43,136
1,01001298,23,578,49,107017,973,4295,2118,25,841,44,77,105,195,156,0,4290,8476,12679,416,1627,1821,1886,1437,9114,15149,342,0,51,833,1277,709,736,119,9736,0,0,42,671,53,12,32584,444,8,499,106,846,15,222,412,80,1375,1312,247,400,2165,2457,0,8698,78,944,1512,0,166,35,4720,674,57,0
2,01001621,812,6857,0,85495,0,0,5358,2387,45,282,0,48,79,159,505,9281,402,44146,0,65,2555,0,72,5597,3589,16,0,0,384,0,299,0,10042,13198,528,2340,68,436,241,340,17397,1840,424,0,0,1873,1656,12018,2902,0,0,0,904,486,105,3490,0,786,670,4376,4699,0,224,491,4543,1850,0,0
3,01001661,18,0,94,31605,0,1289,853,308,1594,16,51,32,54,71,0,6077,1316,5601,0,0,502,0,71,795,4015,114,0,1695,873,791,215,93,1556,3134,0,20,5,1378,0,12,6913,38,13,106,9,61,42,32,71,21,689,351,120,122,1335,352,1414,0,34,221,58,1688,38,9,535,112,133,97
4,01001798,2439,2657,297,54320,751,4798,2228,1284,44,23,0,139,70,28,56,13180,3689,20076,0,470,2659,0,90,2327,6201,492,0,382,0,1381,521,46,15938,4450,52,37,67,212,66,14,37083,127,20,59,358,1082,0,2725,397,132,641,0,308,15,0,2491,7668,1421,0,3039,796,20,101,34,1810,436,2565,0
...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...
3689,MUMA,5150,49966,1005,8604,3046,5544,7,1631,466,55,30,0,191,189,445,1938,5634,16935,0,1644,659,70,0,3480,952,0,7978,936,826,26,170,113,1415,8988,0,227,0,91,0,27354,5961,166,0,642,85,375,451,85,628,103,7404,85,1977,72,313,0,0,4264,0,1264,663,4546,0,0,0,12790,6749,59632
3690,QIRL,20953,87296,1905,16481,3098,14265,0,2217,23430,333,963,131,671,116,56,4899,2895,14001,823,1378,3986,3768,20,1969,3858,148,4284,816,158,61,152,502,779,5719,155,3434,0,0,0,25,18727,43,0,314,0,556,220,923,139,1406,14750,0,1646,3451,154,840,7800,1978,0,1512,36,4825,0,0,930,4207,13115,7050
3691,ZU273983,6853,144,138,43541,188,3797,3528,106,553,169,86,37,161,76,18,9457,1249,4700,431,633,888,1063,24,2631,2061,0,300,1217,169,287,219,0,3459,5949,28,0,65,382,28,28,12587,177,29,47,0,502,0,803,279,510,635,0,185,499,524,4855,7658,3244,30,2492,81,0,50,22,0,296,177,1973
3692,ZU621944,2204,3571,0,154921,196,3314,5769,118,0,0,25,0,0,0,0,13318,1800,2512,153,0,2319,1306,0,7304,0,17,0,0,34,265,432,0,722,3134,10,18,0,638,11,0,1620,170,0,86,0,336,0,549,162,0,0,0,0,0,424,0,1183,0,11,491,26,1268,0,2,0,1476,1583,131
