Skip to content

Commit

Permalink
Merge pull request #169 from wasade/8-collapse_samples
Browse files Browse the repository at this point in the history
8 collapse samples
  • Loading branch information
squirrelo committed Oct 24, 2015
2 parents a3fc767 + 323c9f4 commit dd16cf8
Show file tree
Hide file tree
Showing 3 changed files with 234 additions and 23 deletions.
2 changes: 1 addition & 1 deletion .travis.yml
Original file line number Diff line number Diff line change
Expand Up @@ -29,4 +29,4 @@ script:
- runipy ipynb/primary-processing/5-alpha_diversity.ipynb
- runipy ipynb/primary-processing/6-beta-diversity.ipynb
- runipy ipynb/primary-processing/7-taxonomy-summaries.ipynb
# runipy ipynb/primary-processing/7.5-collapse_samples.ipynb
- runipy ipynb/primary-processing/8-collapse_samples.ipynb
44 changes: 22 additions & 22 deletions americangut/notebook_environment.py
Original file line number Diff line number Diff line change
Expand Up @@ -150,30 +150,30 @@
'ag-L6-taxa-fecal-biom': '7/taxa/otu_table_fecal_L6.biom',

# collapsed samples
'ag-100nt-1k-biom': '7.5/ag-100nt-1k.biom',
'ag-100nt-1k-fecal-biom': '7.5/ag-100nt-1k-fecal.biom',
'ag-100nt-1k-skin-biom': '7.5/ag-100nt-1k-oral.biom',
'ag-100nt-1k-oral-biom': '7.5/ag-100nt-1k-skin.biom',

'ag-100nt-1k-fecal-sex-biom': '7.5/ag-100nt-1k-fecal-sex.biom',
'ag-100nt-1k-fecal-diet-biom': '7.5/ag-100nt-1k-fecal-diet.biom',
'ag-100nt-1k-fecal-age-biom': '7.5/ag-100nt-1k-fecal-age.biom',
'ag-100nt-1k-fecal-bmi-biom': '7.5/ag-100nt-1k-fecal-bmi.biom',

'ag-100nt-1k-oral-sex-biom': '7.5/ag-100nt-1k-oral-sex.biom',
'ag-100nt-1k-oral-diet-biom': '7.5/ag-100nt-1k-oral-diet.biom',
'ag-100nt-1k-oral-age-biom': '7.5/ag-100nt-1k-oral-age.biom',
'ag-100nt-1k-oral-flossing-biom': '7.5/ag-100nt-1k-oral-flossing.biom',

'ag-100nt-1k-skin-sex-biom': '7.5/ag-100nt-1k-skin-sex.biom',
'ag-100nt-1k-skin-cosmetics-biom': '7.5/ag-100nt-1k-skin-cosmetics.biom',
'ag-100nt-1k-skin-age-biom': '7.5/ag-100nt-1k-skin-age.biom',
'ag-100nt-1k-skin-hand-biom': '7.5/ag-100nt-1k-skin-hand.biom',
'ag-100nt-1k-biom': '8/ag-100nt-1k.biom',
'ag-100nt-1k-fecal-biom': '8/ag-100nt-1k-fecal.biom',
'ag-100nt-1k-skin-biom': '8/ag-100nt-1k-oral.biom',
'ag-100nt-1k-oral-biom': '8/ag-100nt-1k-skin.biom',

'ag-100nt-1k-fecal-sex-biom': '8/ag-100nt-1k-fecal-sex.biom',
'ag-100nt-1k-fecal-diet-biom': '8/ag-100nt-1k-fecal-diet.biom',
'ag-100nt-1k-fecal-age-biom': '8/ag-100nt-1k-fecal-age.biom',
'ag-100nt-1k-fecal-bmi-biom': '8/ag-100nt-1k-fecal-bmi.biom',

'ag-100nt-1k-oral-sex-biom': '8/ag-100nt-1k-oral-sex.biom',
'ag-100nt-1k-oral-diet-biom': '8/ag-100nt-1k-oral-diet.biom',
'ag-100nt-1k-oral-age-biom': '8/ag-100nt-1k-oral-age.biom',
'ag-100nt-1k-oral-flossing-biom': '8/ag-100nt-1k-oral-flossing.biom',

'ag-100nt-1k-skin-sex-biom': '8/ag-100nt-1k-skin-sex.biom',
'ag-100nt-1k-skin-cosmetics-biom': '8/ag-100nt-1k-skin-cosmetics.biom',
'ag-100nt-1k-skin-age-biom': '8/ag-100nt-1k-skin-age.biom',
'ag-100nt-1k-skin-hand-biom': '8/ag-100nt-1k-skin-hand.biom',

# per-sample results
'successful-ids': '8/successful_ids.txt',
'unsuccessful-ids': '8/unsuccessful_ids.txt',
'per-sample-results': '8/per-sample-results',
'successful-ids': '9/successful_ids.txt',
'unsuccessful-ids': '9/unsuccessful_ids.txt',
'per-sample-results': '9/per-sample-results',
}


Expand Down
211 changes: 211 additions & 0 deletions ipynb/primary-processing/8-collapse_samples.md
Original file line number Diff line number Diff line change
@@ -0,0 +1,211 @@
Some of the per-results figures require various slices and perspectives of the data. This notebook performs these necessary additional summarizations.

```python
>>> import os
...
>>> import americangut.notebook_environment as agenv
>>> import americangut.util as agu
...
>>> import qiime_default_reference as qdr
...
>>> chp_path = agenv.activate('8')
```

Let's make sure we have the paths we need.

```python
>>> ag_100nt_biom = agu.get_existing_path(agenv.paths['ag-100nt-biom'])
>>> ag_cleaned_md = agu.get_existing_path(agenv.paths['ag-cleaned-md'])
```

And let's setup all the paths that we're going to create.

```python
>>> ag_100nt_1k_biom = agu.get_new_path(agenv.paths['ag-100nt-1k-biom'])
>>> ag_100nt_1k_fecal_biom = agu.get_new_path(agenv.paths['ag-100nt-1k-fecal-biom'])
>>> ag_100nt_1k_skin_biom = agu.get_new_path(agenv.paths['ag-100nt-1k-skin-biom'])
>>> ag_100nt_1k_oral_biom = agu.get_new_path(agenv.paths['ag-100nt-1k-oral-biom'])
...
>>> ag_100nt_1k_fecal_sex_biom = agu.get_new_path(agenv.paths['ag-100nt-1k-fecal-sex-biom'])
>>> ag_100nt_1k_fecal_diet_biom = agu.get_new_path(agenv.paths['ag-100nt-1k-fecal-diet-biom'])
>>> ag_100nt_1k_fecal_age_biom = agu.get_new_path(agenv.paths['ag-100nt-1k-fecal-age-biom'])
>>> ag_100nt_1k_fecal_bmi_biom = agu.get_new_path(agenv.paths['ag-100nt-1k-fecal-bmi-biom'])
...
>>> ag_100nt_1k_oral_sex_biom = agu.get_new_path(agenv.paths['ag-100nt-1k-oral-sex-biom'])
>>> ag_100nt_1k_oral_diet_biom = agu.get_new_path(agenv.paths['ag-100nt-1k-oral-diet-biom'])
>>> ag_100nt_1k_oral_age_biom = agu.get_new_path(agenv.paths['ag-100nt-1k-oral-age-biom'])
>>> ag_100nt_1k_oral_flossing_biom = agu.get_new_path(agenv.paths['ag-100nt-1k-oral-flossing-biom'])
...
>>> ag_100nt_1k_skin_sex_biom = agu.get_new_path(agenv.paths['ag-100nt-1k-skin-sex-biom'])
>>> ag_100nt_1k_skin_cosmetics_biom = agu.get_new_path(agenv.paths['ag-100nt-1k-skin-cosmetics-biom'])
>>> ag_100nt_1k_skin_age_biom = agu.get_new_path(agenv.paths['ag-100nt-1k-skin-age-biom'])
>>> ag_100nt_1k_skin_hand_biom = agu.get_new_path(agenv.paths['ag-100nt-1k-skin-hand-biom'])
...
>>> # A file path that was necessary for the system call but not used after the fact
>>> ignored = 'foo'
```

First, we're going to operate on rarefied data again.

```python
>>> depth = agenv.get_rarefaction_depth()
```

```python
>>> !single_rarefaction.py -i $ag_100nt_biom \
... -o $ag_100nt_1k_biom \
... -d $depth
```

Next, we're going to partition the data into per-body site tables.

```python
>>> !filter_samples_from_otu_table.py -i $ag_100nt_1k_biom \
... -o $ag_100nt_1k_fecal_biom \
... -m $ag_cleaned_md \
... -s "SIMPLE_BODY_SITE:FECAL"
```

```python
>>> !filter_samples_from_otu_table.py -i $ag_100nt_1k_biom \
... -o $ag_100nt_1k_skin_biom \
... -m $ag_cleaned_md \
... -s "SIMPLE_BODY_SITE:SKIN"
```

```python
>>> !filter_samples_from_otu_table.py -i $ag_100nt_1k_biom \
... -o $ag_100nt_1k_oral_biom \
... -m $ag_cleaned_md \
... -s "SIMPLE_BODY_SITE:ORAL"
```

Finally, within each body site, we're going to collapse over categories of interest.

```python
>>> !collapse_samples.py -m $ag_cleaned_md \
... --output_biom_fp $ag_100nt_1k_fecal_sex_biom \
... --normalize \
... -b $ag_100nt_1k_fecal_biom \
... --collapse_fields "SEX" \
... --output_mapping_fp $ignored
```

```python
>>> !collapse_samples.py -m $ag_cleaned_md \
... --output_biom_fp $ag_100nt_1k_fecal_diet_biom \
... --normalize \
... -b $ag_100nt_1k_fecal_biom \
... --collapse_fields "DIET_TYPE" \
... --output_mapping_fp $ignored
```

```python
>>> !collapse_samples.py -m $ag_cleaned_md \
... --output_biom_fp $ag_100nt_1k_fecal_age_biom \
... --normalize \
... -b $ag_100nt_1k_fecal_biom \
... --collapse_fields "AGE_CAT" \
... --output_mapping_fp $ignored
```

```python
>>> !collapse_samples.py -m $ag_cleaned_md \
... --output_biom_fp $ag_100nt_1k_fecal_bmi_biom \
... --normalize \
... -b $ag_100nt_1k_fecal_biom \
... --collapse_fields "BMI_CAT" \
... --output_mapping_fp $ignored
```

```python
>>> !collapse_samples.py -m $ag_cleaned_md \
... --output_biom_fp $ag_100nt_1k_oral_sex_biom \
... --normalize \
... -b $ag_100nt_1k_oral_biom \
... --collapse_fields "SEX" \
... --output_mapping_fp $ignored
```

```python
>>> !collapse_samples.py -m $ag_cleaned_md \
... --output_biom_fp $ag_100nt_1k_oral_diet_biom \
... --normalize \
... -b $ag_100nt_1k_oral_biom \
... --collapse_fields "DIET_TYPE" \
... --output_mapping_fp $ignored
```

```python
>>> !collapse_samples.py -m $ag_cleaned_md \
... --output_biom_fp $ag_100nt_1k_oral_age_biom \
... --normalize \
... -b $ag_100nt_1k_oral_biom \
... --collapse_fields "AGE_CAT" \
... --output_mapping_fp $ignored
```

```python
>>> !collapse_samples.py -m $ag_cleaned_md \
... --output_biom_fp $ag_100nt_1k_oral_flossing_biom \
... --normalize \
... -b $ag_100nt_1k_oral_biom \
... --collapse_fields "FLOSSING_FREQUENCY" \
... --output_mapping_fp $ignored
```

```python
>>> !collapse_samples.py -m $ag_cleaned_md \
... --output_biom_fp $ag_100nt_1k_skin_sex_biom \
... --normalize \
... -b $ag_100nt_1k_skin_biom \
... --collapse_fields "SEX" \
... --output_mapping_fp $ignored
```

```python
>>> !collapse_samples.py -m $ag_cleaned_md \
... --output_biom_fp $ag_100nt_1k_skin_cosmetics_biom \
... --normalize \
... -b $ag_100nt_1k_skin_biom \
... --collapse_fields "COSMETICS_FREQUENCY" \
... --output_mapping_fp $ignored
```

```python
>>> !collapse_samples.py -m $ag_cleaned_md \
... --output_biom_fp $ag_100nt_1k_skin_age_biom \
... --normalize \
... -b $ag_100nt_1k_skin_biom \
... --collapse_fields "AGE_CAT" \
... --output_mapping_fp $ignored
```

```python
>>> !collapse_samples.py -m $ag_cleaned_md \
... --output_biom_fp $ag_100nt_1k_skin_hand_biom \
... --normalize \
... -b $ag_100nt_1k_skin_biom \
... --collapse_fields "DOMINANT_HAND" \
... --output_mapping_fp $ignored
```

As usual, let's make sure we have files.

```python
>>> assert os.stat(ag_100nt_1k_fecal_biom).st_size > 0
>>> assert os.stat(ag_100nt_1k_skin_biom).st_size > 0
>>> assert os.stat(ag_100nt_1k_oral_biom).st_size > 0
>>> assert os.stat(ag_100nt_1k_fecal_sex_biom).st_size > 0
>>> assert os.stat(ag_100nt_1k_fecal_diet_biom).st_size > 0
>>> assert os.stat(ag_100nt_1k_fecal_age_biom).st_size > 0
>>> assert os.stat(ag_100nt_1k_fecal_bmi_biom).st_size > 0
>>> assert os.stat(ag_100nt_1k_oral_sex_biom).st_size > 0
>>> assert os.stat(ag_100nt_1k_oral_diet_biom).st_size > 0
>>> assert os.stat(ag_100nt_1k_oral_age_biom).st_size > 0
>>> assert os.stat(ag_100nt_1k_oral_flossing_biom).st_size > 0
>>> assert os.stat(ag_100nt_1k_skin_sex_biom).st_size > 0
>>> assert os.stat(ag_100nt_1k_skin_hand_biom).st_size > 0
>>> assert os.stat(ag_100nt_1k_skin_age_biom).st_size > 0
>>> assert os.stat(ag_100nt_1k_skin_cosmetics_biom).st_size > 0
```

0 comments on commit dd16cf8

Please sign in to comment.