### Subsetting feature table into pre- and post-HCT samples

In [1]:
import os
import pandas as pd
import qiime2 as q2
from qiime2 import Visualization
from qiime2 import Artifact

data_dir = '../data'
    
%matplotlib inline

In [2]:
table = q2.Artifact.load(f'{data_dir}/taxonomy/table-filtered.qza').view(pd.DataFrame)

In [3]:
table.head()

Unnamed: 0,fcefd2cc81e6b698cf230b94cf6bc046,83172f4983ceb57d25b013054fe23f9b,6ac82139a801c5eebb1de1f3dbe9201e,a1d275645f04fa46722204cd3c58af06,98aaa6625a97f3a8f5c9e6de2e6c7793,2ab405deaaef0bfbbd12a429f8bfc546,3ca8918bada3f369cb1f38393023a803,66686afa383d4cacea68c0e707c710eb,600d2ea1a4e523b5b7945662ccd0e1e4,8be23c0cfcbf439adb0d15266d1ab612,...,3ca83d359e01a68bb545574fce557397,07c705f17b004dec460060e39758e64a,d905d941b896d68338bd96cd1742eeda,284f730edec0c74c6cffe9b5aa2256df,39b1dcf9fc20454834d9b6f7d19a98eb,6b7e3f645d9eb0cb3b61dcc79459db1b,38088a4e6f67d35ca0327d02d681e969,02f5bc7340fe680bd81ce3b3551396f2,8f7d1ebfef43038bb9e2a9def65cc069,ff76204ace4734146de4d938d2589df1
0DOSLC,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,...,0.0,0.0,0.0,0.0,7.0,0.0,0.0,0.0,0.0,0.0
0KB68F,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,...,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0
0TR6O2,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,...,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0
0UWI7J,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,...,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0
0WG67D,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,...,0.0,0.0,0.0,0.0,3.0,0.0,0.0,0.0,0.0,0.0


In [4]:
meta = pd.read_csv(f'{data_dir}/metadata/sample_metadata.tsv', sep = "\t")
meta.head()

Unnamed: 0,sampleid,stool_consistency,hct_source,disease,categorical_time_relative_to_engraftment,week_relative_to_hct,timepoint_of_transplant,day_relative_to_nearest_hct,alleged_abduction
0,N4VICF,formed,cord,Myelodysplastic Syndromes,pre,-1.0,6.0,-6.0,0
1,8A0F9A,formed,cord,Leukemia,pre,-2.0,7.0,-7.0,0
2,5Y49IM,semi-formed,cord,Leukemia,peri,-1.0,7.0,0.0,1
3,ZKJI45,semi-formed,cord,Leukemia,post,1.0,7.0,8.0,0
4,2I7SIQ,liquid,cord,Leukemia,peri,-1.0,0.0,0.0,1


In [5]:
pre_ids = meta.loc[meta['day_relative_to_nearest_hct'] <= 0].sampleid
pre_ids

0      N4VICF
1      8A0F9A
2      5Y49IM
4      2I7SIQ
6      XO59R8
        ...  
156    HCMB0Y
157    P0A2X1
158    F89RLS
159    ZQT8ZN
160    62ARKK
Name: sampleid, Length: 80, dtype: object

In [6]:
post_ids = meta.loc[meta['day_relative_to_nearest_hct'] >= 0].sampleid
post_ids

2      5Y49IM
3      ZKJI45
4      2I7SIQ
5      PCUMU7
7      AFG7YZ
        ...  
144    RE2G74
145    K613DJ
148    0WG67D
151    95NCMG
152    4NU0Q9
Name: sampleid, Length: 87, dtype: object

In [7]:
pre_table = table.loc[pre_ids]
pre_table.head()

Unnamed: 0,fcefd2cc81e6b698cf230b94cf6bc046,83172f4983ceb57d25b013054fe23f9b,6ac82139a801c5eebb1de1f3dbe9201e,a1d275645f04fa46722204cd3c58af06,98aaa6625a97f3a8f5c9e6de2e6c7793,2ab405deaaef0bfbbd12a429f8bfc546,3ca8918bada3f369cb1f38393023a803,66686afa383d4cacea68c0e707c710eb,600d2ea1a4e523b5b7945662ccd0e1e4,8be23c0cfcbf439adb0d15266d1ab612,...,3ca83d359e01a68bb545574fce557397,07c705f17b004dec460060e39758e64a,d905d941b896d68338bd96cd1742eeda,284f730edec0c74c6cffe9b5aa2256df,39b1dcf9fc20454834d9b6f7d19a98eb,6b7e3f645d9eb0cb3b61dcc79459db1b,38088a4e6f67d35ca0327d02d681e969,02f5bc7340fe680bd81ce3b3551396f2,8f7d1ebfef43038bb9e2a9def65cc069,ff76204ace4734146de4d938d2589df1
N4VICF,0.0,0.0,0.0,0.0,1.0,0.0,0.0,0.0,0.0,0.0,...,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0
8A0F9A,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,...,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0
5Y49IM,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,...,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0
2I7SIQ,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,...,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0
XO59R8,0.0,0.0,0.0,3.0,0.0,0.0,0.0,0.0,0.0,0.0,...,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0


In [8]:
post_table = table.loc[post_ids]
post_table.head()

Unnamed: 0,fcefd2cc81e6b698cf230b94cf6bc046,83172f4983ceb57d25b013054fe23f9b,6ac82139a801c5eebb1de1f3dbe9201e,a1d275645f04fa46722204cd3c58af06,98aaa6625a97f3a8f5c9e6de2e6c7793,2ab405deaaef0bfbbd12a429f8bfc546,3ca8918bada3f369cb1f38393023a803,66686afa383d4cacea68c0e707c710eb,600d2ea1a4e523b5b7945662ccd0e1e4,8be23c0cfcbf439adb0d15266d1ab612,...,3ca83d359e01a68bb545574fce557397,07c705f17b004dec460060e39758e64a,d905d941b896d68338bd96cd1742eeda,284f730edec0c74c6cffe9b5aa2256df,39b1dcf9fc20454834d9b6f7d19a98eb,6b7e3f645d9eb0cb3b61dcc79459db1b,38088a4e6f67d35ca0327d02d681e969,02f5bc7340fe680bd81ce3b3551396f2,8f7d1ebfef43038bb9e2a9def65cc069,ff76204ace4734146de4d938d2589df1
5Y49IM,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,...,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0
ZKJI45,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,...,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0
2I7SIQ,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,...,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0
PCUMU7,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,...,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0
AFG7YZ,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,...,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0


In [9]:
pre_table_artifact = Artifact.import_data("FeatureTable[Frequency]", pre_table)

In [10]:
post_table_artifact = Artifact.import_data("FeatureTable[Frequency]", post_table)

In [11]:
pre_table_artifact.save('table-filtered-pre-hct.qza')
post_table_artifact.save('table-filtered-post-hct.qza')

'table-filtered-post-hct.qza'