In [6]:
%matplotlib notebook
import calour as ca
from calour import Experiment

In [8]:
plt.ion()

# Simple Cases

In [9]:
a = ca.read_amplicon('../calour/tests/data/test2.biom', '../calour/tests/data/test2.sample', feature_metadata_file='../calour/tests/data/test2.feature', filter_reads=100, normalize=1000)
b = a.plot(sample_field='group', feature_field='taxonomy', feature_color_bars=['oxygen'], gui='jupyter');

<IPython.core.display.Javascript object>

In [3]:
x = ca.read('../calour/tests/data/timeseries.biom', '../calour/tests/data/timeseries.sample')

In [4]:
# sort feature by abundance
x.sort_by_data(axis=1, key=np.sum, inplace=True)

Experiment timeseries.biom with 95 samples, 4788 features

In [5]:
x.sample_metadata.columns.sort_values()

Index(['#SampleID', 'AGE', 'AGE_UNIT', 'ALTITUDE', 'ANONYMIZED_NAME',
       'ASSIGNED_FROM_GEO', 'Activity', 'BODY_HABITAT', 'BODY_MASS_INDEX',
       'BODY_PRODUCT', 'BODY_SITE', 'BarcodeSequence', 'COLLECTION_DATE',
       'COMMENTS', 'COUNTRY', 'DAY', 'DEPTH', 'DIET', 'Description',
       'ELEVATION', 'ENV_BIOME', 'ENV_FEATURE', 'ENV_MATTER',
       'EXPERIMENT_CENTER', 'EXPERIMENT_DESIGN_DESCRIPTION',
       'EXPERIMENT_TITLE', 'EXTRACTIONKIT_LOT', 'EXTRACTION_ROBOT',
       'HEIGHT_OR_LENGTH', 'HOST_COMMON_NAME', 'HOST_SUBJECT_ID', 'HOST_TAXID',
       'HOUR', 'KEY_SEQ', 'LATITUDE', 'LIBRARY_CONSTRUCTION_PROTOCOL',
       'LIFE_STAGE', 'LONGITUDE', 'LinkerPrimerSequence', 'MASTERMIX_LOT',
       'MF_SAMPLE_NUMBER', 'MINUTES', 'PCR_PRIMERS', 'PLATFORM', 'PLATING',
       'PRIMER_DATE', 'PRIMER_PLATE', 'PROCESSING_ROBOT', 'PUBLIC', 'REGION',
       'RUN_CENTER', 'RUN_DATE', 'RUN_PREFIX', 'SAMPLE_CENTER', 'SAMPLE_PLATE',
       'SAMP_SIZE', 'SEQUENCING_METH', 'SEX', 'SMOKER', 'STUD

In [6]:
x.feature_metadata.columns

Index(['taxonomy', 'id'], dtype='object')

In [7]:
x.sample_metadata[['DAY', 'HOUR', 'MINUTES']]

Unnamed: 0_level_0,DAY,HOUR,MINUTES
#SampleID,Unnamed: 1_level_1,Unnamed: 2_level_1,Unnamed: 3_level_1
40.M11.saliva.1168503,1,17,17
46.M11.saliva.1168574,1,18,49
7.M11.saliva.1168517,1,9,33
60.M11.saliva.1168546,1,22,19
85.M11.saliva.1168506,2,11,19
41.M11.saliva.1168551,1,17,32
69.M11.saliva.1168557,2,7,0
38.M11.saliva.1168521,1,16,46
73.M11.saliva.1168533,2,8,15
88.M11.saliva.1168591,2,12,5


In [8]:
for field in ['MINUTES', 'HOUR', 'DAY']:
    x.sort_by_metadata(field, axis=0, inplace=True)

In [9]:
x.sample_metadata[['DAY', 'HOUR', 'MINUTES']]

Unnamed: 0_level_0,DAY,HOUR,MINUTES
#SampleID,Unnamed: 1_level_1,Unnamed: 2_level_1,Unnamed: 3_level_1
1.M11.saliva.1168567,1,8,25
2.M11.saliva.1168561,1,8,38
3.M11.saliva.1168562,1,9,17
4.M11.saliva.1168558,1,9,29
5.M11.saliva.1168507,1,9,30
6.M11.saliva.1168502,1,9,31
7.M11.saliva.1168517,1,9,33
8.M11.saliva.1168543,1,9,34
9.M11.saliva.1168569,1,9,49
10.M11.saliva.1168573,1,10,4


In [10]:
y = x.plot(sample_field='DAY', feature_field='taxonomy', yticklabels_max=20, gui='jupyter')

<IPython.core.display.Javascript object>

# Decomposition

# Moving Pictures

In [7]:
mov = ca.read('data/moving_pic.biom', 'data/moving_pic.sample.txt', normalize=10000)

In [7]:
mov.sample_metadata.columns

Index(['#SampleID', '#SampleID.1', 'BarcodeSequence', 'LinkerPrimerSequence',
       'DAYS_SINCE_EPOCH', 'TARGET_SUBFRAGMENT', 'ASSIGNED_FROM_GEO',
       'EXPERIMENT_CENTER', 'TITLE', 'COMMON_SAMPLE_SITE', 'RUN_PREFIX',
       'HOST_COMMON_NAME', 'DEPTH', 'COMMON_NAME', 'BODY_SITE', 'ELEVATION',
       'RUN_DATE', 'DAYS_SINCE_EXPERIMENT_START', 'COLLECTION_DATE',
       'ALTITUDE', 'ENV_BIOME', 'SEX', 'PLATFORM', 'STUDY_CENTER', 'COUNTRY',
       'HOST_SUBJECT_ID', 'ANONYMIZED_NAME', 'TAXON_ID', 'SAMPLE_CENTER',
       'SAMP_SIZE', 'MISLABELED', 'LONGITUDE', 'STUDY_ID',
       'EXPERIMENT_DESIGN_DESCRIPTION', 'Description_duplicate',
       'BODY_HABITAT', 'SEQUENCING_METH', 'ENV_MATTER', 'TARGET_GENE',
       'ENV_FEATURE', 'KEY_SEQ', 'BODY_PRODUCT', 'AGE_IN_YEARS', 'RUN_CENTER',
       'LIBRARY_CONSTRUCTION_PROTOCOL', 'LATITUDE', 'REGION',
       'HOST_INDIVIDUAL', 'Description', '_calour_original_abundance'],
      dtype='object')

In [8]:
mov.sample_metadata.pivot_table(values='#SampleID', columns=['HOST_SUBJECT_ID'], index=['BODY_SITE'], aggfunc=len, fill_value=0)

HOST_SUBJECT_ID,F4,M3
BODY_SITE,Unnamed: 1_level_1,Unnamed: 2_level_1
UBERON:feces,131,336
UBERON:saliva,135,374
UBERON:sebum,268,724


In [8]:
def explore(host, site=None):
    exp = mov.filter_samples('HOST_SUBJECT_ID', host)
    if site is not None:
        exp.filter_samples('BODY_SITE', site, inplace=True)
    exp.sort_samples('DAYS_SINCE_EXPERIMENT_START', inplace=True)
    exp.sort_samples('BODY_SITE', inplace=True)
    exp.cluster_features(min_abundance=10, inplace=True)
    exp.plot(gui='jupyter',  sample_color_bars=['HOST_SUBJECT_ID', 'BODY_SITE'])

In [9]:
explore('M3')

<IPython.core.display.Javascript object>

In [4]:
explore('M3', 'UBERON:feces')

<IPython.core.display.Javascript object>

In [5]:
def explore2(host, site=None):
    exp = mov.filter_samples('HOST_SUBJECT_ID', host)
    if site is not None:
        exp.filter_samples('BODY_SITE', site, inplace=True)
    exp.filter_min_abundance(10, inplace=True)
    exp.sort_samples('DAYS_SINCE_EXPERIMENT_START', inplace=True)
    exp.sort_samples('BODY_SITE', inplace=True)
    exp.sort_center_mass(inplace=True)
    exp.plot(gui='jupyter',  sample_color_bars=['HOST_SUBJECT_ID', 'BODY_SITE'])
explore2('M3', 'UBERON:feces')

<IPython.core.display.Javascript object>

In [15]:
explore('M3', 'UBERON:sebum')

<IPython.core.display.Javascript object>