## Step 4: Summarize Beta Diversity Across Groups
**Goal: To create list of all combination of beta diversities: RS210/WoLR2, Unifraq Weighted, Unifraq Unweighted, RPCA at the genome level**

Note: This currently only works in the comad env due to deciode errors

### Imports

In [2]:
import pandas as pd

from qiime2.plugins.diversity.visualizers import alpha_rarefaction
from qiime2 import Artifact, Metadata, Visualization
import biom
from skbio.stats.distance import permanova

from python_scripts.beta_diversity import *

### Functions

In [3]:
def call_beta(fn, metric, rarefaction=2000, zebra_level='0.1', numRares=10, decoide_min_feature_count=10):
    
    #Metadata
    metadata = pd.read_csv('processed_data/metadata/metadata_' + fn + '.tsv', sep='\t')
    
    #biom tables in df format
    df_rs210_genome = pd.read_csv('processed_data/pandas_df/' + fn + '_qiita15336_prep16181_pangenome_rs210_scrubbed_zebraFilter' + zebra_level + '.tsv' , sep='\t', index_col = 0)
    df_wol2_genome = pd.read_csv('processed_data/pandas_df/' + fn + '_qiita15336_prep16181_pangenome_wol2_scrubbed_zebraFilter' + zebra_level + '.tsv' , sep='\t', index_col = 0)
   
    rs210_rpca_genome, wol2_rpca_genome, df_wol2_uwUni_genome, df_wol2_wUni_genome, df_wol2_rpca_rare_genome, df_rs210_rpca_rare_genome = all_beta(
        df_rs210_genome, 
        df_wol2_genome, 
        metadata, 
        rarefaction,
        metric,
        numRares= numRares,
        decoide_min_feature_count= decoide_min_feature_count)
    
    #return(rs210_rpca_genome, wol2_uwUni_genome, wol2_wUni_genome, wol2_rpca_genome, wol2_rpca_rare_genome, rs210_rpca_rare_genome)
    return(rs210_rpca_genome, wol2_rpca_genome, df_wol2_uwUni_genome, df_wol2_wUni_genome, df_wol2_rpca_rare_genome, df_rs210_rpca_rare_genome)


In [4]:
def call_rare(fn, zebra_level='0.1'):
    
    #Metadata
    metadata = pd.read_csv('processed_data/metadata/metadata_' + fn + '.tsv', sep='\t').set_index('sample_name', drop = True)
    q2_meta = Metadata(metadata)
    
    #import tables
    df_rs210_genome = pd.read_csv('processed_data/pandas_df/' + fn + '_qiita15336_prep16181_pangenome_rs210_scrubbed_zebraFilter' + zebra_level + '.tsv' , sep='\t', index_col = 0)
    df_wol2_genome = pd.read_csv('processed_data/pandas_df/' + fn + '_qiita15336_prep16181_pangenome_wol2_scrubbed_zebraFilter' + zebra_level + '.tsv' , sep='\t', index_col = 0)
        
    #Convert table to q2 object
    ft_rs210_genome = Artifact.import_data("FeatureTable[Frequency]", df_rs210_genome.T) 
    ft_wol2_genome = Artifact.import_data("FeatureTable[Frequency]", df_wol2_genome.T) 
    
    #Calculate alpha rarefaction curves
    ar_rs210_genome = alpha_rare_curve(ft_rs210_genome, 2300, metadata)
    ar_wol2_genome = alpha_rare_curve(ft_wol2_genome, 1900, metadata)
    
    #Save objects
    ar_rs210_genome.visualization.save('processed_data/alpha_rarefaction/' + fn + '_qiita15336_prep16181_pangenome_rs210_scrubbed_zebraFilter' + zebra_level + '.qzv')
    ar_wol2_genome.visualization.save('processed_data/alpha_rarefaction/' + fn + '_qiita15336_prep16181_pangenome_wol2_scrubbed_zebraFilter' + zebra_level + '.qzv')
    
    return(ar_rs210_genome, ar_wol2_genome)
    

In [5]:
def call_beta_species(fn, metric, rarefaction=2000, zebra_level='0.1', numRares=10, decoide_min_feature_count=10):
    
    #Metadata
    metadata = pd.read_csv('processed_data/metadata/metadata_' + fn + '.tsv', sep='\t')
    
    #Convert biom tables from genome to species
    #Path to taxonomic tress
    taxonomy_path='/Users/cguccion/Dropbox/Storage/HelpfulLabDocs/taxonomy_trees'
    
     #biom tables in df format
    df_rs210_species = pd.read_csv('processed_data/pandas_df/' + fn + '_qiita15336_prep16181_pangenome_rs210_scrubbed_zebraFilter' + zebra_level + '_species.tsv' , sep='\t', index_col = 0)
    df_wol2_species = pd.read_csv('processed_data/pandas_df/' + fn + '_qiita15336_prep16181_pangenome_wol2_scrubbed_zebraFilter' + zebra_level + '_species.tsv' , sep='\t', index_col = 0)
    
    rs210_rpca_species, wol2_rpca_species, df_wol2_rpca_rare_species, df_rs210_rpca_rare_species = all_beta_species(
        df_rs210_species, 
        df_wol2_species, 
        metadata, 
        rarefaction,
        metric,
        numRares= numRares,
        decoide_min_feature_count= decoide_min_feature_count)
    
    return(rs210_rpca_species, wol2_rpca_species, df_wol2_rpca_rare_species, df_rs210_rpca_rare_species)



### Datasets

### Pangenome Host Depletion
Note: alpha rarefaction was estimated using this Qiita analysis https://qiita.ucsd.edu/analysis/description/60472/

#### HCC Tissue Tumor vs. CRC Tissue Tumor

In [17]:
ar_rs210_genome, ar_wol2_genome = call_rare('tumor_HCC_v_CRC')


In [266]:

rs210_rpca_genome, wol2_rpca_genome, df_wol2_uwUni_genome, df_wol2_wUni_genome, df_wol2_rpca_rare_genome, df_rs210_rpca_rare_genome = call_beta('tumor_HCC_v_CRC', 'tumor_type', rarefaction=2000, numRares=50, decoide_min_feature_count=1)


[32mSaved PCoAResults % Properties('biplot') to: deicode_biplot.qza[0m
[32mSaved DistanceMatrix to: deicode_distance_test.qza[0m
[32mSaved PCoAResults % Properties('biplot') to: deicode_biplot.qza[0m
[32mSaved DistanceMatrix to: deicode_distance_test.qza[0m
[32mSaved PCoAResults % Properties('biplot') to: deicode_biplot.qza[0m
[32mSaved DistanceMatrix to: deicode_distance_test.qza[0m
[32mSaved PCoAResults % Properties('biplot') to: deicode_biplot.qza[0m
[32mSaved DistanceMatrix to: deicode_distance_test.qza[0m
[32mSaved PCoAResults % Properties('biplot') to: deicode_biplot.qza[0m
[32mSaved DistanceMatrix to: deicode_distance_test.qza[0m
[32mSaved PCoAResults % Properties('biplot') to: deicode_biplot.qza[0m
[32mSaved DistanceMatrix to: deicode_distance_test.qza[0m
[32mSaved PCoAResults % Properties('biplot') to: deicode_biplot.qza[0m
[32mSaved DistanceMatrix to: deicode_distance_test.qza[0m
[32mSaved PCoAResults % Properties('biplot') to: deicode_biplot.qza

Unnamed: 0,p-value,pseudo-F,Sample Size
0,0.072,2.157234,16.0
1,0.07,1.933433,16.0
2,0.088,1.900041,16.0
3,0.046,2.072226,16.0
4,0.053,2.022096,16.0
5,0.071,2.067461,16.0
6,0.108,1.763417,16.0
7,0.038,2.138,16.0
8,0.028,2.28117,16.0
9,0.045,2.231027,16.0


p-value         0.054520
pseudo-F        2.082573
Sample Size    16.000000
dtype: float64

Wol2 Weighted Unifrac Genome


Unnamed: 0,p-value,pseudo-F,Sample Size
0,0.601,0.928525,16.0
1,0.48,1.002315,16.0
2,0.28,1.106058,16.0
3,0.252,1.116132,16.0
4,0.096,1.379418,16.0
5,0.236,1.1377,16.0
6,0.244,1.144675,16.0
7,0.09,1.325283,16.0
8,0.378,1.053358,16.0
9,0.274,1.122825,16.0


p-value         0.258360
pseudo-F        1.170328
Sample Size    16.000000
dtype: float64

Wol2 RPCA Rare Genome


Unnamed: 0,p-value,pseudo-F,Sample Size
0,0.362,1.1675,16.0
1,0.067,2.37962,16.0
2,0.379,1.099309,16.0
3,0.283,1.292327,16.0
4,0.442,1.075515,16.0
5,0.448,0.999942,16.0
6,0.433,1.056672,16.0
7,0.234,1.527832,16.0
8,0.499,0.84913,16.0
9,0.336,1.290751,16.0


p-value         0.341680
pseudo-F        1.308776
Sample Size    16.000000
dtype: float64

RS210 RPCA Rare Genome


Unnamed: 0,p-value,pseudo-F,Sample Size
0,0.498,0.716937,17.0
1,0.353,1.095126,17.0
2,0.358,1.09205,17.0
3,0.047,3.394353,17.0
4,0.061,3.25699,17.0
5,0.405,0.89982,17.0
6,0.2,1.522156,17.0
7,0.178,1.797084,17.0
8,0.096,2.413264,17.0
9,0.024,4.254241,17.0


p-value         0.193460
pseudo-F        2.123473
Sample Size    17.000000
dtype: float64


In [8]:

s210_rpca_species, wol2_rpca_species, df_wol2_rpca_rare_species, df_rs210_rpca_rare_species = call_beta_species('tumor_HCC_v_CRC', 'tumor_type', rarefaction=2000, numRares=50, decoide_min_feature_count=1)



Wol2 RPCA Rare Genome


Unnamed: 0,p-value,pseudo-F,Sample Size
0,0.006,2.850214,16.0
1,0.602,0.743509,16.0
2,0.398,1.038486,16.0
3,0.766,0.420353,16.0
4,0.593,0.738668,16.0
5,0.61,0.735651,16.0
6,0.483,0.916755,16.0
7,0.626,0.663548,16.0
8,0.677,0.535754,16.0
9,0.636,0.644519,16.0


p-value         0.466740
pseudo-F        1.145572
Sample Size    16.000000
dtype: float64

RS210 RPCA Rare Genome


Unnamed: 0,p-value,pseudo-F,Sample Size
0,0.214,1.666694,17.0
1,0.172,1.95547,17.0
2,0.198,1.779702,17.0
3,0.057,3.094248,17.0
4,0.924,0.159532,17.0
5,0.415,0.952458,17.0
6,0.42,0.838308,17.0
7,0.272,1.398552,17.0
8,0.079,2.613051,17.0
9,0.116,2.452865,17.0


p-value         0.234680
pseudo-F        1.824473
Sample Size    17.000000
dtype: float64


#### CRC Adj Tissue vs. CRC Tumor Tissue

In [272]:

rs210_rpca_genome, wol2_rpca_genome, df_wol2_uwUni_genome, df_wol2_wUni_genome, df_wol2_rpca_rare_genome, df_rs210_rpca_rare_genome = call_beta('CRC_adj_v_tumor', 'host_sample_type', rarefaction=2000, numRares=50, decoide_min_feature_count=1)


[32mSaved PCoAResults % Properties('biplot') to: deicode_biplot.qza[0m
[32mSaved DistanceMatrix to: deicode_distance_test.qza[0m
[32mSaved PCoAResults % Properties('biplot') to: deicode_biplot.qza[0m
[32mSaved DistanceMatrix to: deicode_distance_test.qza[0m
[32mSaved PCoAResults % Properties('biplot') to: deicode_biplot.qza[0m
[32mSaved DistanceMatrix to: deicode_distance_test.qza[0m
[32mSaved PCoAResults % Properties('biplot') to: deicode_biplot.qza[0m
[32mSaved DistanceMatrix to: deicode_distance_test.qza[0m
[32mSaved PCoAResults % Properties('biplot') to: deicode_biplot.qza[0m
[32mSaved DistanceMatrix to: deicode_distance_test.qza[0m
[32mSaved PCoAResults % Properties('biplot') to: deicode_biplot.qza[0m
[32mSaved DistanceMatrix to: deicode_distance_test.qza[0m
[32mSaved PCoAResults % Properties('biplot') to: deicode_biplot.qza[0m
[32mSaved DistanceMatrix to: deicode_distance_test.qza[0m
[32mSaved PCoAResults % Properties('biplot') to: deicode_biplot.qza

Unnamed: 0,p-value,pseudo-F,Sample Size
0,0.038,1.90215,17.0
1,0.049,1.806452,17.0
2,0.055,1.902125,17.0
3,0.041,1.87635,17.0
4,0.055,1.728472,17.0
5,0.042,1.893145,17.0
6,0.045,1.919976,17.0
7,0.038,2.024745,17.0
8,0.017,2.152957,17.0
9,0.053,1.800288,17.0


p-value         0.054560
pseudo-F        1.840409
Sample Size    17.000000
dtype: float64

Wol2 Weighted Unifrac Genome


Unnamed: 0,p-value,pseudo-F,Sample Size
0,0.022,1.728196,17.0
1,0.013,1.765365,17.0
2,0.013,1.666741,17.0
3,0.05,1.517841,17.0
4,0.017,1.586594,17.0
5,0.011,1.850826,17.0
6,0.015,1.86393,17.0
7,0.011,1.718778,17.0
8,0.018,1.705832,17.0
9,0.146,1.257916,17.0


p-value         0.031040
pseudo-F        1.642417
Sample Size    17.000000
dtype: float64

Wol2 RPCA Rare Genome


Unnamed: 0,p-value,pseudo-F,Sample Size
0,0.024,3.025058,17.0
1,0.24,1.580555,17.0
2,0.061,2.473991,17.0
3,0.334,1.128896,17.0
4,0.049,2.617075,17.0
5,0.065,2.641859,17.0
6,0.531,0.806794,17.0
7,0.281,1.387879,17.0
8,0.063,2.387799,17.0
9,0.005,4.842948,17.0


p-value         0.172900
pseudo-F        2.151001
Sample Size    17.000000
dtype: float64

RS210 RPCA Rare Genome


Unnamed: 0,p-value,pseudo-F,Sample Size
0,0.146,2.015176,17.0
1,0.271,1.427479,17.0
2,0.308,1.149146,17.0
3,0.003,6.030982,17.0
4,0.1,2.473829,17.0
5,0.008,4.737215,17.0
6,0.123,2.290803,17.0
7,0.002,7.004421,17.0
8,0.006,5.989445,17.0
9,0.149,1.960214,17.0


p-value         0.123540
pseudo-F        3.549345
Sample Size    17.000000
dtype: float64


In [9]:

rs210_rpca_genome, wol2_rpca_genome, df_wol2_uwUni_genome, df_wol2_wUni_genome, df_wol2_rpca_rare_genome, df_rs210_rpca_rare_genome = call_beta_species('CRC_adj_v_tumor', 'host_sample_type', rarefaction=2000, numRares=50, decoide_min_feature_count=1)


Wol2 RPCA Rare Genome


Unnamed: 0,p-value,pseudo-F,Sample Size
0,0.468,0.967496,17.0
1,0.265,1.505341,17.0
2,0.037,2.66106,17.0
3,0.035,2.52082,17.0
4,0.04,2.482225,17.0
5,0.816,0.398574,17.0
6,0.171,1.816181,17.0
7,0.164,1.749293,17.0
8,0.148,1.755105,17.0
9,0.012,3.385102,17.0


p-value         0.164060
pseudo-F        2.413212
Sample Size    17.000000
dtype: float64

RS210 RPCA Rare Genome


Unnamed: 0,p-value,pseudo-F,Sample Size
0,0.107,2.369635,17.0
1,0.619,0.519463,17.0
2,0.219,1.589784,17.0
3,0.016,5.063055,17.0
4,0.021,4.425584,17.0
5,0.008,4.793345,17.0
6,0.953,0.049172,17.0
7,0.93,0.085404,17.0
8,0.044,3.150444,17.0
9,0.191,1.711014,17.0


p-value         0.195580
pseudo-F        2.884673
Sample Size    17.000000
dtype: float64


ValueError: not enough values to unpack (expected 6, got 4)

#### HCC Tissue Tumor + HCC Tissue Adj.

In [274]:

rs210_rpca_genome, wol2_rpca_genome, df_wol2_uwUni_genome, df_wol2_wUni_genome, df_wol2_rpca_rare_genome, df_rs210_rpca_rare_genome = call_beta('HCC_adj_v_tumor', 'host_sample_type', rarefaction=2000, numRares=50, decoide_min_feature_count=1)


[32mSaved PCoAResults % Properties('biplot') to: deicode_biplot.qza[0m
[32mSaved DistanceMatrix to: deicode_distance_test.qza[0m
[32mSaved PCoAResults % Properties('biplot') to: deicode_biplot.qza[0m
[32mSaved DistanceMatrix to: deicode_distance_test.qza[0m
[32mSaved PCoAResults % Properties('biplot') to: deicode_biplot.qza[0m
[32mSaved DistanceMatrix to: deicode_distance_test.qza[0m
[32mSaved PCoAResults % Properties('biplot') to: deicode_biplot.qza[0m
[32mSaved DistanceMatrix to: deicode_distance_test.qza[0m
[32mSaved PCoAResults % Properties('biplot') to: deicode_biplot.qza[0m
[32mSaved DistanceMatrix to: deicode_distance_test.qza[0m
[32mSaved PCoAResults % Properties('biplot') to: deicode_biplot.qza[0m
[32mSaved DistanceMatrix to: deicode_distance_test.qza[0m
[32mSaved PCoAResults % Properties('biplot') to: deicode_biplot.qza[0m
[32mSaved DistanceMatrix to: deicode_distance_test.qza[0m
[32mSaved PCoAResults % Properties('biplot') to: deicode_biplot.qza

Unnamed: 0,p-value,pseudo-F,Sample Size
0,0.018,1.920564,17.0
1,0.026,1.98262,17.0
2,0.015,2.310954,17.0
3,0.026,2.238284,17.0
4,0.011,2.248303,17.0
5,0.016,2.164633,17.0
6,0.008,2.081807,17.0
7,0.005,2.137912,17.0
8,0.008,2.273973,17.0
9,0.009,2.495141,17.0


p-value         0.015400
pseudo-F        2.166899
Sample Size    17.000000
dtype: float64

Wol2 Weighted Unifrac Genome


Unnamed: 0,p-value,pseudo-F,Sample Size
0,0.785,0.333615,17.0
1,0.786,0.328001,17.0
2,0.683,0.407743,17.0
3,0.879,0.166824,17.0
4,0.919,0.1114,17.0
5,0.912,0.156218,17.0
6,0.653,0.44644,17.0
7,0.767,0.296913,17.0
8,0.426,0.865159,17.0
9,0.605,0.480728,17.0


p-value         0.64000
pseudo-F        0.52846
Sample Size    17.00000
dtype: float64

Wol2 RPCA Rare Genome


Unnamed: 0,p-value,pseudo-F,Sample Size
0,0.553,0.759905,17.0
1,0.025,1.761474,17.0
2,0.028,1.757215,17.0
3,0.035,1.809637,17.0
4,0.042,1.743199,17.0
5,0.596,0.998018,17.0
6,0.662,0.991243,17.0
7,0.608,1.002343,17.0
8,0.035,1.770961,17.0
9,0.031,1.740328,17.0


p-value         0.176640
pseudo-F        1.517393
Sample Size    17.000000
dtype: float64

RS210 RPCA Rare Genome


Unnamed: 0,p-value,pseudo-F,Sample Size
0,0.435,1.016817,18.0
1,0.618,0.486542,18.0
2,0.911,0.097323,18.0
3,0.972,0.02233,18.0
4,0.987,0.01718,18.0
5,0.266,1.285053,18.0
6,0.548,0.607553,18.0
7,0.646,0.489456,18.0
8,0.632,0.417893,18.0
9,0.854,0.154202,18.0


p-value         0.642300
pseudo-F        0.606434
Sample Size    18.000000
dtype: float64


In [10]:

rs210_rpca_genome, wol2_rpca_genome, df_wol2_rpca_rare_genome, df_rs210_rpca_rare_genome = call_beta_species('HCC_adj_v_tumor', 'host_sample_type', rarefaction=2000, numRares=50, decoide_min_feature_count=1)


Wol2 RPCA Rare Genome


Unnamed: 0,p-value,pseudo-F,Sample Size
0,0.035,1.721158,17.0
1,0.059,1.464186,17.0
2,0.034,1.693199,17.0
3,0.03,1.735805,17.0
4,0.035,1.701113,17.0
5,0.034,1.702916,17.0
6,0.032,1.773644,17.0
7,0.043,1.730103,17.0
8,0.06,1.628096,17.0
9,0.003,1.493609,17.0


p-value         0.053580
pseudo-F        1.657806
Sample Size    17.000000
dtype: float64

RS210 RPCA Rare Genome


Unnamed: 0,p-value,pseudo-F,Sample Size
0,0.987,0.022015,18.0
1,0.332,1.070241,18.0
2,0.914,0.101913,18.0
3,0.806,0.289176,18.0
4,0.859,0.177543,18.0
5,0.552,0.651527,18.0
6,0.216,1.574507,18.0
7,0.961,0.036588,18.0
8,0.903,0.133051,18.0
9,0.685,0.403224,18.0


p-value         0.628180
pseudo-F        0.612283
Sample Size    18.000000
dtype: float64


#### HCC Tissue Adj. vs. CRC Tissue Adj.

In [276]:

rs210_rpca_genome, wol2_rpca_genome, df_wol2_uwUni_genome, df_wol2_wUni_genome, df_wol2_rpca_rare_genome, df_rs210_rpca_rare_genome = call_beta('adj_HCC_v_CRC', 'tumor_type', rarefaction=2000, numRares=50, decoide_min_feature_count=1)


[32mSaved PCoAResults % Properties('biplot') to: deicode_biplot.qza[0m
[32mSaved DistanceMatrix to: deicode_distance_test.qza[0m
[32mSaved PCoAResults % Properties('biplot') to: deicode_biplot.qza[0m
[32mSaved DistanceMatrix to: deicode_distance_test.qza[0m
[32mSaved PCoAResults % Properties('biplot') to: deicode_biplot.qza[0m
[32mSaved DistanceMatrix to: deicode_distance_test.qza[0m
[32mSaved PCoAResults % Properties('biplot') to: deicode_biplot.qza[0m
[32mSaved DistanceMatrix to: deicode_distance_test.qza[0m
[32mSaved PCoAResults % Properties('biplot') to: deicode_biplot.qza[0m
[32mSaved DistanceMatrix to: deicode_distance_test.qza[0m
[32mSaved PCoAResults % Properties('biplot') to: deicode_biplot.qza[0m
[32mSaved DistanceMatrix to: deicode_distance_test.qza[0m
[32mSaved PCoAResults % Properties('biplot') to: deicode_biplot.qza[0m
[32mSaved DistanceMatrix to: deicode_distance_test.qza[0m
[32mSaved PCoAResults % Properties('biplot') to: deicode_biplot.qza

Unnamed: 0,p-value,pseudo-F,Sample Size
0,0.014,1.9921,18.0
1,0.034,1.852715,18.0
2,0.041,1.759698,18.0
3,0.036,1.772074,18.0
4,0.03,1.716138,18.0
5,0.113,1.584295,18.0
6,0.032,1.744215,18.0
7,0.027,1.851551,18.0
8,0.032,1.563862,18.0
9,0.082,1.68643,18.0


p-value         0.039340
pseudo-F        1.781762
Sample Size    18.000000
dtype: float64

Wol2 Weighted Unifrac Genome


Unnamed: 0,p-value,pseudo-F,Sample Size
0,0.384,0.980427,18.0
1,0.164,1.648662,18.0
2,0.149,1.715323,18.0
3,0.187,1.543764,18.0
4,0.204,1.594321,18.0
5,0.324,1.155212,18.0
6,0.335,1.198787,18.0
7,0.57,0.67271,18.0
8,0.321,1.148831,18.0
9,0.558,0.750657,18.0


p-value         0.306380
pseudo-F        1.388946
Sample Size    18.000000
dtype: float64

Wol2 RPCA Rare Genome


Unnamed: 0,p-value,pseudo-F,Sample Size
0,0.002,3.566008,18.0
1,0.014,3.145418,18.0
2,0.001,3.580575,18.0
3,0.114,1.80659,18.0
4,0.005,2.689622,18.0
5,0.001,2.747252,18.0
6,0.02,2.077882,18.0
7,0.01,3.698857,18.0
8,0.212,1.215039,18.0
9,0.005,3.782608,18.0


p-value         0.018620
pseudo-F        2.963164
Sample Size    18.000000
dtype: float64

RS210 RPCA Rare Genome


Unnamed: 0,p-value,pseudo-F,Sample Size
0,0.233,1.579617,18.0
1,0.019,4.022438,18.0
2,0.074,2.589632,18.0
3,0.069,2.692016,18.0
4,0.049,3.366297,18.0
5,0.049,3.210847,18.0
6,0.097,2.765712,18.0
7,0.126,2.19007,18.0
8,0.028,4.266125,18.0
9,0.018,4.775981,18.0


p-value         0.077020
pseudo-F        3.221415
Sample Size    18.000000
dtype: float64


In [11]:

rs210_rpca_genome, wol2_rpca_genome, df_wol2_rpca_rare_genome, df_rs210_rpca_rare_genome = call_beta_species('adj_HCC_v_CRC', 'tumor_type', rarefaction=2000, numRares=50, decoide_min_feature_count=1)


Wol2 RPCA Rare Genome


Unnamed: 0,p-value,pseudo-F,Sample Size
0,0.001,2.330154,18.0
1,0.002,2.599941,18.0
2,0.002,3.276865,18.0
3,0.003,3.527529,18.0
4,0.003,3.191829,18.0
5,0.005,2.903672,18.0
6,0.005,3.112804,18.0
7,0.002,3.357094,18.0
8,0.002,3.066347,18.0
9,0.002,2.588289,18.0


p-value         0.007640
pseudo-F        3.032403
Sample Size    18.000000
dtype: float64

RS210 RPCA Rare Genome


Unnamed: 0,p-value,pseudo-F,Sample Size
0,0.585,0.59305,18.0
1,0.67,0.416354,18.0
2,0.16,2.061191,18.0
3,0.656,0.470095,18.0
4,0.239,1.554627,18.0
5,0.386,0.958453,18.0
6,0.702,0.34012,18.0
7,0.322,1.253773,18.0
8,0.074,2.7829,18.0
9,0.421,0.940915,18.0


p-value         0.335600
pseudo-F        1.426953
Sample Size    18.000000
dtype: float64


#### HCC Blood vs. CRC Blood

In [18]:
ar_rs210_genome, ar_wol2_genome = call_rare('blood_HCC_v_CRC')

In [269]:

rs210_rpca_genome, wol2_rpca_genome, df_wol2_uwUni_genome, df_wol2_wUni_genome, df_wol2_rpca_rare_genome, df_rs210_rpca_rare_genome = call_beta('blood_HCC_v_CRC', 'tumor_type', rarefaction=550, numRares=50, decoide_min_feature_count=1)


[32mSaved PCoAResults % Properties('biplot') to: deicode_biplot.qza[0m
[32mSaved DistanceMatrix to: deicode_distance_test.qza[0m
[32mSaved PCoAResults % Properties('biplot') to: deicode_biplot.qza[0m
[32mSaved DistanceMatrix to: deicode_distance_test.qza[0m
[32mSaved PCoAResults % Properties('biplot') to: deicode_biplot.qza[0m
[32mSaved DistanceMatrix to: deicode_distance_test.qza[0m
[32mSaved PCoAResults % Properties('biplot') to: deicode_biplot.qza[0m
[32mSaved DistanceMatrix to: deicode_distance_test.qza[0m
[32mSaved PCoAResults % Properties('biplot') to: deicode_biplot.qza[0m
[32mSaved DistanceMatrix to: deicode_distance_test.qza[0m
[32mSaved PCoAResults % Properties('biplot') to: deicode_biplot.qza[0m
[32mSaved DistanceMatrix to: deicode_distance_test.qza[0m
[32mSaved PCoAResults % Properties('biplot') to: deicode_biplot.qza[0m
[32mSaved DistanceMatrix to: deicode_distance_test.qza[0m
[32mSaved PCoAResults % Properties('biplot') to: deicode_biplot.qza

Unnamed: 0,p-value,pseudo-F,Sample Size
0,0.089,1.294869,20.0
1,0.069,1.345532,20.0
2,0.156,1.194296,20.0
3,0.177,1.171275,20.0
4,0.064,1.331654,20.0
5,0.094,1.290853,20.0
6,0.16,1.203698,20.0
7,0.015,1.55645,20.0
8,0.209,1.148073,20.0
9,0.078,1.297615,20.0


p-value         0.112200
pseudo-F        1.310071
Sample Size    20.000000
dtype: float64

Wol2 Weighted Unifrac Genome


Unnamed: 0,p-value,pseudo-F,Sample Size
0,0.004,5.814796,20.0
1,0.004,5.663464,20.0
2,0.001,6.252267,20.0
3,0.003,6.321391,20.0
4,0.003,5.447233,20.0
5,0.005,5.946132,20.0
6,0.004,5.812468,20.0
7,0.006,6.304807,20.0
8,0.004,5.774779,20.0
9,0.004,6.116241,20.0


p-value         0.003980
pseudo-F        5.935341
Sample Size    20.000000
dtype: float64

Wol2 RPCA Rare Genome


Unnamed: 0,p-value,pseudo-F,Sample Size
0,0.004,6.164315,20.0
1,0.005,5.592596,20.0
2,0.004,5.880849,20.0
3,0.003,6.440016,20.0
4,0.014,5.148596,20.0
5,0.005,5.728225,20.0
6,0.004,5.92007,20.0
7,0.004,5.868993,20.0
8,0.009,5.413322,20.0
9,0.005,6.119361,20.0


p-value         0.004800
pseudo-F        5.886985
Sample Size    20.000000
dtype: float64

RS210 RPCA Rare Genome


Unnamed: 0,p-value,pseudo-F,Sample Size
0,0.005,5.819296,20.0
1,0.003,6.396957,20.0
2,0.002,6.014491,20.0
3,0.003,6.152466,20.0
4,0.006,6.385845,20.0
5,0.002,6.035287,20.0
6,0.002,6.649175,20.0
7,0.006,5.87842,20.0
8,0.008,4.617218,20.0
9,0.009,5.002805,20.0


p-value         0.004700
pseudo-F        5.965188
Sample Size    20.000000
dtype: float64


In [12]:

rs210_rpca_genome, wol2_rpca_genome, df_wol2_rpca_rare_genome, df_rs210_rpca_rare_genome = call_beta_species('blood_HCC_v_CRC', 'tumor_type', rarefaction=550, numRares=50, decoide_min_feature_count=1)


Wol2 RPCA Rare Genome


Unnamed: 0,p-value,pseudo-F,Sample Size
0,0.007,5.181617,20.0
1,0.027,3.607091,20.0
2,0.005,5.233764,20.0
3,0.003,5.62282,20.0
4,0.002,5.60642,20.0
5,0.002,5.71599,20.0
6,0.036,3.717181,20.0
7,0.024,4.287934,20.0
8,0.017,4.932993,20.0
9,0.007,5.786845,20.0


p-value         0.014600
pseudo-F        4.835801
Sample Size    20.000000
dtype: float64

RS210 RPCA Rare Genome


Unnamed: 0,p-value,pseudo-F,Sample Size
0,0.003,6.675081,20.0
1,0.058,3.199895,20.0
2,0.006,4.967959,20.0
3,0.015,5.166435,20.0
4,0.003,5.706271,20.0
5,0.015,4.626212,20.0
6,0.013,4.613024,20.0
7,0.017,4.232508,20.0
8,0.012,4.787935,20.0
9,0.021,4.171611,20.0


p-value         0.014640
pseudo-F        4.709296
Sample Size    20.000000
dtype: float64


### Pick one from above to view: Alpha Rarefaction
Can view alpha rarefaction here for ease, or can use the qiime2 view file saved

In [19]:
ar_rs210_genome.visualization

In [8]:
ar_wol2_genome.visualization