In [1]:
import os
import pandas as pd
from collections import Counter

In [2]:
os.chdir('/Volumes/AHN/captive_ape_microbiome')
%run scripts/analyses/functions.ipynb

In [3]:
INDIR = 'results/16s/inputs'
metadata_file = f'{INDIR}/16S_metadata.txt'
tax_table_file = f'{INDIR}/ASVs_taxonomy.txt'
asv_table_file = f'{INDIR}/ASV_tab.txt'
OUTDIR = 'results/16s/analyses'
!mkdir -pv $OUTDIR/tables
!mkdir -pv $OUTDIR/figures

#### designate 16-ASVs as host-restricted, mixed host, or unique to captive

In [4]:
asv_16s = asv_hr_table(asv_table_file,metadata_file,tax_table_file) #load from function.ipynb
print(len(asv_16s),'total 16S ASVs')
asv_16s_captive = asv_16s[asv_16s['CP_pres']==True]
print(len(asv_16s_captive),'16S ASVs found in captive apes')
print(asv_16s_captive['HR_type'].value_counts())
asv_16s.head()

2042 total 16S ASVs
872 16S ASVs found in captive apes
MX_human_wild_apes    486
HR_human              230
Unique_CP             104
MX_wild_apes           36
HR_wild_chimp           9
HR_wild_gorilla         7
Name: HR_type, dtype: int64


Unnamed: 0,ASV,sampleNames,sampleNum,HR_sampleTypes,HR_sampleNum,HR_cat,HR_type,CP_pres,CP_sampleNum,CP_sampleTypes,captiveNames,Phylum,Order,Family,Genus
0,ASV_2425,"[cp.bon.COLZ.2.16s, cp.ora.COLZ.1.16s, cp.ora....",7,[],0,Unique_CP,Unique_CP,True,7,"[captive_orangutan, captive_bonobo, captive_ch...","[cp.bon.COLZ.2.16s, cp.ora.COLZ.1.16s, cp.ora....",Firmicutes,Clostridiales,Lachnospiraceae,Lachnospiraceae_NK4A136_group
1,ASV_905,"[cp.ora.ATLZ.121.16s, wd.chi.GM.1.16s, wd.chi....",101,"[human, wild_gorilla, wild_bonobo, wild_chimp]",99,MX,MX_human_wild_apes,True,2,"[captive_orangutan, captive_gorilla]","[cp.ora.ATLZ.121.16s, cp.gor.COLZ.5.16s]",Firmicutes,Clostridiales,Lachnospiraceae,Lachnospiraceae_AC2044_group
2,ASV_1085,"[wd.chi.GM.106.16s, wd.chi.GM.111.16s, wd.chi....",31,"[human, wild_chimp, wild_gorilla]",31,MX,MX_human_wild_apes,False,0,[],[],Firmicutes,Clostridiales,Lachnospiraceae,Lachnospiraceae_NK4A136_group
3,ASV_1261,"[cp.ora.ATLZ.121.16s, cp.chi.PC.A2.16S, cp.chi...",15,[human],11,HR,HR_human,True,4,"[captive_orangutan, captive_chimp]","[cp.ora.ATLZ.121.16s, cp.chi.PC.A2.16S, cp.chi...",Firmicutes,Clostridiales,Lachnospiraceae,Lachnospiraceae_NK4A136_group
4,ASV_1111,"[cp.ora.ATLZ.114.16s, cp.ora.ATLZ.117.16s, cp....",39,"[human, wild_gorilla, wild_bonobo, wild_chimp]",26,MX,MX_human_wild_apes,True,13,"[captive_orangutan, captive_bonobo, captive_go...","[cp.ora.ATLZ.114.16s, cp.ora.ATLZ.117.16s, cp....",Firmicutes,Clostridiales,Lachnospiraceae,unclassified


In [21]:
def multi_site_sp(cp_desc):
    """Takes in a list of sample site_Descriptions and designate ASVs 
    based on whether they are found across multiple locations or 
    a single location, observed in a single host species or multiple 
    host species"""
    sites = list(set([x.split('_')[0] for x in cp_desc]))
    sitesNum = len(sites)
    sitesMulti = 'multi_site' if sitesNum > 1 else 'single_site'      
    species = list(set([x.split('_')[2] for x in cp_desc]))
    speciesNum = len(species)
    speciessMulti = 'multi_sp' if speciesNum > 1 else 'single_sp' 
    return(sitesMulti + '_' + speciessMulti)

metadata = pd.read_csv(metadata_file,sep='\t',index_col=None)
metadata['Description_site'] = metadata['site_code']+'_'+metadata['Description']
sample_type_site_dict = dict(zip(metadata['X.SampleID'], metadata['Description_site']))
description_df = asv_16s['sampleNames'].apply(lambda l: pd.Series(
    [sample_type_site_dict[name] for name in l]).value_counts()) 
description_df = description_df.fillna(0) 
capt_desc = list(set(metadata['Description_site'][metadata['captivity_status']=='captive'])) 
description_df = description_df[capt_desc]
description_df['CP_sp_loc'] = description_df.apply(lambda row: list(row.index[row>0]),axis=1)
description_df['numEnclosure'] = description_df['CP_sp_loc'].apply(lambda x: len(x))
description_df['multi_site_sp'] = description_df['CP_sp_loc'].apply(lambda x:  
                                                                    multi_site_sp(x))
asv_16s_description = asv_16s.merge(description_df,left_index=True,right_index=True)
asv_16s_description.to_csv(f'{OUTDIR}/tables/16S_ASVs_summary.txt',sep='\t',index=None)
asv_16s_description.head()

Unnamed: 0,ASV,sampleNames,sampleNum,HR_sampleTypes,HR_sampleNum,HR_cat,HR_type,CP_pres,CP_sampleNum,CP_sampleTypes,...,ATLZ_captive_orangutan,HOUZ_captive_gorilla,COLZ_captive_gorilla,HOUZ_captive_orangutan,PC_captive_chimp,HOUZ_captive_chimp,COMZ_captive_gorilla,CP_sp_loc,numEnclosure,multi_site_sp
0,ASV_617,"[wd.gor.BI.a172.16s, wd.gor.BI.a173.16s, wd.go...",108,[wild_gorilla],108,HR,HR_wild_gorilla,False,0,[],...,0.0,0.0,0.0,0.0,0.0,0.0,0.0,[],0,single_site_single_sp
1,ASV_982,"[wd.gor.BI.a175.16s, wd.gor.BI.a177.16s, wd.go...",64,[wild_gorilla],64,HR,HR_wild_gorilla,False,0,[],...,0.0,0.0,0.0,0.0,0.0,0.0,0.0,[],0,single_site_single_sp
2,ASV_453,"[wd.chi.GM.10.16s, wd.chi.GM.100.16s, wd.chi.G...",86,"[wild_gorilla, wild_chimp]",86,MX,MX_2_wild_apes,False,0,[],...,0.0,0.0,0.0,0.0,0.0,0.0,0.0,[],0,single_site_single_sp
3,ASV_932,"[wd.bon.ML.a207.16s, wd.bon.ML.a208.16s, wd.bo...",48,[wild_bonobo],48,HR,HR_wild_bonobo,False,0,[],...,0.0,0.0,0.0,0.0,0.0,0.0,0.0,[],0,single_site_single_sp
4,ASV_1398,"[wd.bon.LA.a187.16s, wd.bon.LA.a189.16s, wd.bo...",18,"[wild_bonobo, wild_gorilla]",18,MX,MX_2_wild_apes,False,0,[],...,0.0,0.0,0.0,0.0,0.0,0.0,0.0,[],0,single_site_single_sp
