In [8]:
import calour as ca
import pandas as pd
ca.set_log_level(20)
%matplotlib notebook

In [9]:
dat=ca.read_amplicon('./all.fixids.biom','./map.txt', normalize=10000, min_reads=1000)

2017-08-25 11:57:41 INFO loaded 370 samples, 2442 observations
2017-08-25 11:57:41 INFO 340 remaining


In [10]:
datc=dat.cluster_features(10)

2017-08-25 11:57:41 INFO 2404 remaining


In [11]:
datc

AmpliconExperiment all.fixids.biom with 340 samples, 2404 features

In [12]:
datc.sample_metadata.columns

Index(['#SampleID', 'extraction', 'BarcodeSequence', 'LinkerPrimerSequence',
       'Country', 'Date', 'Species', 'Location', 'Study.ID', 'Survey.Habitat',
       'Start.Loc', 'End.Loc', 'Experimental.Cat', 'startloc', 'endloc',
       'Experiment.ID', 'Life.Stage', 'Pond.Ind', 'Individual_ID', 'Swab_Type',
       'Extraction_ID', 'SequenceRun_ID', 'Description',
       '_calour_original_abundance'],
      dtype='object')

In [13]:
datc=datc.sort_samples('Extraction_ID')

In [14]:
datc.sample_metadata.endloc = ['Prior' if pd.isnull(i) else i for i in datc.sample_metadata.endloc]

# pond<->stream transfer experiment

In [15]:
trans=datc.filter_samples('Study.ID','Transplant')

In [16]:
trans=trans.cluster_features(10)

2017-08-25 11:57:54 INFO 2054 remaining


In [17]:
trans=trans.sort_samples('Experimental.Cat')

## Skin

In [18]:
tskin=trans.filter_samples('Swab_Type','skin').cluster_features(10).normalize(100, axis='s')

2017-08-25 11:57:56 INFO 1650 remaining


## All features (fig1D)

In [183]:
f = tskin.plot_sort('Experimental.Cat',gui='jupyter',sample_color_bars=['endloc','startloc'], feature_field=None, clim=[0, 20], xticklabel_rot=None)
f.axes.set_xlabel('experiment category')

<IPython.core.display.Javascript object>

<matplotlib.text.Text at 0x147970e48>

In [184]:
f.save_figure('figure-1D-skin-all.pdf')

## pond/stream cluster (fig S1A)

In [210]:
f = tskin.plot_sort('Experimental.Cat',gui='jupyter',sample_color_bars=['endloc','startloc'], feature_field=None,
                   clim=[0, 20],
                   xticklabel_rot=None,rect=[-0.5, 118.5, 1572, 1505])
f.axes.set_xlabel('experiment category')

<IPython.core.display.Javascript object>

<matplotlib.text.Text at 0x14dbb64a8>

In [211]:
f.save_figure('figure-S1A-skin-source.pdf')

## extraction plate cluster (fig1e zoom2)

In [187]:
f = tskin.plot_sort('Experimental.Cat',gui='jupyter',sample_color_bars=['endloc','startloc'], feature_field=None,
                   show_legend_colorbar=True,clim=[0,50],
                   xticklabel_rot=None,rect=[-0.5, 118.5, 1648, 1598])
f.axes.set_xlabel('experiment category')

<IPython.core.display.Javascript object>

<matplotlib.text.Text at 0x1485d5630>

In [188]:
f.save_figure('figure-1E-skin-extraction.pdf')

## extraction cluster ordered by extraction (fig1F)

In [219]:
ttskin=tskin.sort_samples('extraction')

In [227]:
f = ttskin.plot(sample_field='extraction', gui='jupyter',feature_field=None,
                clim=[0,20],
                   xticklabel_rot=None,rect=[-0.5, 118.5, 1648, 1598])
f.axes.set_xlabel('extraction plate')

<IPython.core.display.Javascript object>

<matplotlib.text.Text at 0x1537379e8>

In [229]:
f.save_figure('figure-1F-skin-extraction-ordered-v3.pdf')

## supervised difference between pond and stream

In [192]:
dd=tskin.diff_abundance('Experimental.Cat',['P','P>P'],['S','S>S'])

2017-08-21 14:40:22 INFO 69 samples with both values
2017-08-21 14:40:22 INFO 1472 remaining
2017-08-21 14:40:22 INFO 39 samples with value 1 (['P', 'P>P'])
2017-08-21 14:40:23 INFO method meandiff. number of higher in ['P', 'P>P'] : 75. number of higher in ['S', 'S>S'] : 39. total 114


In [193]:
f = dd.plot_sort('Experimental.Cat',gui='jupyter',sample_color_bars=['endloc','startloc'], feature_field=None,
                   show_legend_colorbar=False,clim=[0,20],
                   xticklabel_rot=None)
f.axes.set_xlabel('experiment category')

<IPython.core.display.Javascript object>

<matplotlib.text.Text at 0x14b217518>

In [194]:
f.save_figure('figure-S1B-supervised-pond-stream.pdf')

## Gut

In [19]:
tgut=trans.filter_samples('Swab_Type','Intestines').cluster_features(10).normalize(100, axis='s')
tgut

2017-08-25 11:58:53 INFO 975 remaining


AmpliconExperiment all.fixids.biom with 154 samples, 975 features

## All features (fig1A)

In [20]:
f = tgut.plot_sort('Experimental.Cat',gui='jupyter',sample_color_bars=['endloc','startloc'], feature_field=None, clim=[0, 20], xticklabel_rot=None)
f.axes.set_xlabel('experiment category')

<IPython.core.display.Javascript object>

<matplotlib.text.Text at 0x11302eda0>

In [207]:
f.save_figure('figure-1A-gut-all.pdf')

## pond/stream cluster (fig1B)

In [21]:
f = tgut.plot_sort('Experimental.Cat',gui='jupyter',sample_color_bars=['endloc','startloc'], feature_field=None,
                   show_legend_colorbar=True,clim=[0, 20],
                   xticklabel_rot=None, rect=[-0.5, 153.5, 415, 300])
f.axes.set_xlabel('experiment category')

<IPython.core.display.Javascript object>

<matplotlib.text.Text at 0x11782cb70>

In [209]:
f.save_figure('figure-1B-gut-zoom.pdf')

## supervised pond/stream bacteria (fig1C)

In [22]:
tgut=trans.filter_samples('Swab_Type','Intestines').normalize_compositional(total=100)

2017-08-25 12:00:36 INFO 2 remaining
2017-08-25 12:00:36 INFO ignoring 2 features


In [23]:
dd=tgut.diff_abundance('Experimental.Cat',['P','P>P'],['S','S>S'])

2017-08-25 12:00:37 INFO 91 samples with both values
2017-08-25 12:00:37 INFO 1070 remaining
2017-08-25 12:00:37 INFO 47 samples with value 1 (['P', 'P>P'])
2017-08-25 12:00:37 INFO method meandiff. number of higher in ['P', 'P>P'] : 176. number of higher in ['S', 'S>S'] : 87. total 263


In [24]:
f = dd.plot_sort('Experimental.Cat',gui='jupyter',sample_color_bars=['endloc','startloc'], feature_field=None,
                   show_legend_colorbar=False,clim=[0, 20],
                   xticklabel_rot=None)
f.axes.set_xlabel('experiment category')

<IPython.core.display.Javascript object>

<matplotlib.text.Text at 0x11369d470>

In [203]:
f.save_figure('figure-1C-gut-supervised-pond-stream.pdf')