### Subsetting feature table into pre- and post-HCT samples

In [None]:
import os
import matplotlib.pyplot as plt
import pandas as pd
import qiime2 as q2
from qiime2 import Visualization
import seaborn as sns
from scipy.stats import shapiro, kruskal, f_oneway
from qiime2 import Artifact


data_dir = 'Alien_data'
    
%matplotlib inline

In [3]:
table = q2.Artifact.load(f'{data_dir}/table-filtered.qza').view(pd.DataFrame)

In [4]:
table.head()

Unnamed: 0,3c63d8484c63ea7146f6fe93a4205e7e,0c04118a12f674ad0547f15c1c44e8da,ef53d9e1ef0b393a6a4f41f8dfe9124b,6e40232a52b1de392b9d7be3fd01308e,e91391066147399757ef892640f3203c,e0da32b74ec6285a835af1e7e5bb06e5,d4b67b45eb34ce9a1ad48ef1d3e8a8c7,9f1cf1b2ea8846178d1cc2a9c6f64ed2,39b1dcf9fc20454834d9b6f7d19a98eb,a3b262705104c91b68054cdda3fbc1eb,...,01b5b00f6380652e7e30b255de5fe6d3,d1e8ff7d1318c7e4aa9b53f21b310bd5,9543abb453f1445202169e08d372441d,aa49db5cc37fec090a1d5ec84934f30a,a7ad30f3079ad8fce1bd9f22c5d08c3f,dc159beed16e7948f8f998310c869774,2e34fd3c31282715d7917cea0af62bf7,5646514a4af36a054f6deddd550055e6,52e72c231581700241e8eee8d5eff714,cdd8698ef0a1bdc1944cf885f60324da
0DOSLC,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,7.0,0.0,...,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0
0KB68F,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,...,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0
0TR6O2,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,...,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0
0UWI7J,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,...,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0
0WG67D,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,3.0,0.0,...,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,15.0,0.0


In [5]:
meta = pd.read_csv(f'{data_dir}/sample_metadata.tsv', sep = "\t")
meta.head()

Unnamed: 0,sampleid,stool_consistency,hct_source,disease,categorical_time_relative_to_engraftment,week_relative_to_hct,timepoint_of_transplant,day_relative_to_nearest_hct,alleged_abduction
0,N4VICF,formed,cord,Myelodysplastic Syndromes,pre,-1.0,6.0,-6.0,0
1,8A0F9A,formed,cord,Leukemia,pre,-2.0,7.0,-7.0,0
2,5Y49IM,semi-formed,cord,Leukemia,peri,-1.0,7.0,0.0,1
3,ZKJI45,semi-formed,cord,Leukemia,post,1.0,7.0,8.0,0
4,2I7SIQ,liquid,cord,Leukemia,peri,-1.0,0.0,0.0,1


In [6]:
pre_ids = meta.loc[meta['day_relative_to_nearest_hct'] <= 0].sampleid
pre_ids

0      N4VICF
1      8A0F9A
2      5Y49IM
4      2I7SIQ
6      XO59R8
        ...  
156    HCMB0Y
157    P0A2X1
158    F89RLS
159    ZQT8ZN
160    62ARKK
Name: sampleid, Length: 80, dtype: object

In [7]:
post_ids = meta.loc[meta['day_relative_to_nearest_hct'] >= 0].sampleid
post_ids

2      5Y49IM
3      ZKJI45
4      2I7SIQ
5      PCUMU7
7      AFG7YZ
        ...  
144    RE2G74
145    K613DJ
148    0WG67D
151    95NCMG
152    4NU0Q9
Name: sampleid, Length: 87, dtype: object

In [8]:
pre_table = table.loc[pre_ids]
pre_table.head()

Unnamed: 0,3c63d8484c63ea7146f6fe93a4205e7e,0c04118a12f674ad0547f15c1c44e8da,ef53d9e1ef0b393a6a4f41f8dfe9124b,6e40232a52b1de392b9d7be3fd01308e,e91391066147399757ef892640f3203c,e0da32b74ec6285a835af1e7e5bb06e5,d4b67b45eb34ce9a1ad48ef1d3e8a8c7,9f1cf1b2ea8846178d1cc2a9c6f64ed2,39b1dcf9fc20454834d9b6f7d19a98eb,a3b262705104c91b68054cdda3fbc1eb,...,01b5b00f6380652e7e30b255de5fe6d3,d1e8ff7d1318c7e4aa9b53f21b310bd5,9543abb453f1445202169e08d372441d,aa49db5cc37fec090a1d5ec84934f30a,a7ad30f3079ad8fce1bd9f22c5d08c3f,dc159beed16e7948f8f998310c869774,2e34fd3c31282715d7917cea0af62bf7,5646514a4af36a054f6deddd550055e6,52e72c231581700241e8eee8d5eff714,cdd8698ef0a1bdc1944cf885f60324da
N4VICF,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,...,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,2.0
8A0F9A,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,...,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0
5Y49IM,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,...,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0
2I7SIQ,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,...,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0
XO59R8,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,...,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,1.0


In [9]:
post_table = table.loc[post_ids]
post_table.head()

Unnamed: 0,3c63d8484c63ea7146f6fe93a4205e7e,0c04118a12f674ad0547f15c1c44e8da,ef53d9e1ef0b393a6a4f41f8dfe9124b,6e40232a52b1de392b9d7be3fd01308e,e91391066147399757ef892640f3203c,e0da32b74ec6285a835af1e7e5bb06e5,d4b67b45eb34ce9a1ad48ef1d3e8a8c7,9f1cf1b2ea8846178d1cc2a9c6f64ed2,39b1dcf9fc20454834d9b6f7d19a98eb,a3b262705104c91b68054cdda3fbc1eb,...,01b5b00f6380652e7e30b255de5fe6d3,d1e8ff7d1318c7e4aa9b53f21b310bd5,9543abb453f1445202169e08d372441d,aa49db5cc37fec090a1d5ec84934f30a,a7ad30f3079ad8fce1bd9f22c5d08c3f,dc159beed16e7948f8f998310c869774,2e34fd3c31282715d7917cea0af62bf7,5646514a4af36a054f6deddd550055e6,52e72c231581700241e8eee8d5eff714,cdd8698ef0a1bdc1944cf885f60324da
5Y49IM,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,...,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0
ZKJI45,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,...,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0
2I7SIQ,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,...,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0
PCUMU7,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,...,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0
AFG7YZ,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,...,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0


In [13]:
pre_table_artifact = Artifact.import_data("FeatureTable[Frequency]", pre_table)

In [15]:
post_table_artifact = Artifact.import_data("FeatureTable[Frequency]", post_table)

In [16]:
pre_table_artifact.save('table-filtered-pre-hct.qza')
post_table_artifact.save('table-filtered-post-hct.qza')

'table-filtered-post-hct.qza'