# Import libraries

In [None]:
! pip install pandas
! pip install numpy
! pip install scikit-learn

In [1]:
import pandas as pd
import numpy as np
import sys
import warnings

from sklearn.linear_model import LogisticRegression
from sklearn.base import BaseEstimator
from sklearn.metrics import balanced_accuracy_score

# Set constants

In [2]:
PRESENCE_CUTOFF = 0.00001

try:
    import google.colab
    IN_COLAB = True
except:
    IN_COLAB = False

# Create helper functions and classes

In [3]:
if not sys.warnoptions:
    warnings.simplefilter("ignore")

class GMWI(BaseEstimator):

    def __init__(self, use_shannon=False, theta_f=1, theta_d=0):
        self.use_shannon = use_shannon
        self.fitted = False
        self.thresh = 0.00001
        self.health_abundant = None
        self.health_scarce = None
        self.features = None
        self.theta_f = theta_f
        self.theta_d = theta_d

    def fit(self, X, y):
        """
        Identifies health_abundant and health_scarce
        columns/features
        """
        self.features = X.columns
        self.classes_ = np.unique(y)
        if(isinstance(X, pd.DataFrame)):
            X = X.values
        if(isinstance(y, pd.DataFrame)):
            y = y.values
        self.fitted = True
        difference, fold_change = self.get_proportion_comparisons(X, y)
        self.select_features(difference, fold_change)

    def get_proportion_comparisons(self, X, y):
        # get healthy and unhealthy samples
        healthies = X[y.flatten(), :]
        unhealthies = X[~y.flatten(), :]

        # get proportions for each species
        proportion_healthy = self.get_proportions(healthies)
        proportion_unhealthy = self.get_proportions(unhealthies)

        # get differences and fold change
        diff = proportion_healthy - proportion_unhealthy
        fold = proportion_healthy / proportion_unhealthy
        return diff, fold

    def get_proportions(self, samples_of_a_class):
        num_samples = samples_of_a_class.shape[0]
        p = np.sum(samples_of_a_class > self.thresh, axis=0) / num_samples
        return p

    def select_features(self, difference, fold_change):
        # based on proportion differences and fold change, select health abundant
        # and health scarce
        self.health_abundant = self.features[self.cutoff(difference, fold_change)]
        self.health_scarce = self.features[self.cutoff(-1 * difference, 1 / fold_change)]

    def cutoff(self, diff, fold):
        diff_cutoff = diff > self.theta_d
        fold_cutoff = fold > self.theta_f
        both_cutoff = np.bitwise_and(diff_cutoff, fold_cutoff)
        columns = np.where(both_cutoff)
        return columns[0]

    def decision_function(self, X):
        if not self.fitted:
            return None
        if list(X.columns) != list(self.features):
            raise Exception("Model was trained with (different) feature names than input")
        # if(isinstance(X, pd.DataFrame)):
        #    X = X.values
        X_healthy_features = X[self.health_abundant]
        X_unhealthy_features = X[self.health_scarce]
        psi_MH = self.get_psi(X_healthy_features.values) / (
            X_healthy_features.shape[1])
        psi_MN = self.get_psi(X_unhealthy_features.values) / (
            (X_unhealthy_features.shape[1]))
        num = psi_MH + self.thresh
        dem = psi_MN + self.thresh
        return np.log10(num / dem)

    def get_psi(self, X):
        psi = self.richness(X) * 1.0
        if self.use_shannon:
            shan = self.shannon(X)
            psi *= shan
        return psi

    def richness(self, X):
        """
        Returns the number of nonzero values for each sample (row) in X
        """
        rich = np.sum(X > self.thresh, axis=1)
        return rich

    def shannon(self, X):
        logged = np.log(X)
        logged[logged == -np.inf] = 0
        logged[logged == np.inf] = 0
        shan = logged * X * -1
        return np.sum(shan, axis=1)

    def predict(self, X):
        return self.decision_function(X) > 0

# Download and load up data

In [4]:
# download the zipped data if we're in colab
! $IN_COLAB && wget https://raw.githubusercontent.com/danielchang2002/GMWI2/main/manuscript/data.zip
    
# unzip the data
! unzip data.zip

unzip:  cannot find or open data.zip, data.zip.zip or data.zip.ZIP.


In [4]:
training_set = pd.read_csv("data/training_set.csv", index_col=[0, 1])

In [5]:
longitudinal_cases = pd.read_csv("data/longitudinal_cases.csv", index_col=[0, 1])

In [6]:
X = training_set.iloc[:, :-1]
X

Unnamed: 0_level_0,Unnamed: 1_level_0,k__Archaea,k__Archaea|p__Euryarchaeota,k__Archaea|p__Euryarchaeota|c__Methanobacteria,k__Archaea|p__Euryarchaeota|c__Methanobacteria|o__Methanobacteriales,k__Archaea|p__Euryarchaeota|c__Methanobacteria|o__Methanobacteriales|f__Methanobacteriaceae,k__Archaea|p__Euryarchaeota|c__Methanobacteria|o__Methanobacteriales|f__Methanobacteriaceae|g__Methanobrevibacter,k__Archaea|p__Euryarchaeota|c__Methanobacteria|o__Methanobacteriales|f__Methanobacteriaceae|g__Methanobrevibacter|s__Methanobrevibacter_smithii,k__Archaea|p__Euryarchaeota|c__Methanobacteria|o__Methanobacteriales|f__Methanobacteriaceae|g__Methanosphaera,k__Archaea|p__Euryarchaeota|c__Methanobacteria|o__Methanobacteriales|f__Methanobacteriaceae|g__Methanosphaera|s__Methanosphaera_stadtmanae,k__Archaea|p__Euryarchaeota|c__Thermoplasmata,...,k__Viruses|p__Viruses_unclassified|c__Viruses_unclassified|o__Viruses_unclassified|f__Virgaviridae|g__Hordeivirus|s__Barley_stripe_mosaic_virus,k__Viruses|p__Viruses_unclassified|c__Viruses_unclassified|o__Viruses_unclassified|f__Virgaviridae|g__Tobamovirus,k__Viruses|p__Viruses_unclassified|c__Viruses_unclassified|o__Viruses_unclassified|f__Virgaviridae|g__Tobamovirus|s__Cactus_mild_mottle_virus,k__Viruses|p__Viruses_unclassified|c__Viruses_unclassified|o__Viruses_unclassified|f__Virgaviridae|g__Tobamovirus|s__Cucumber_green_mottle_mosaic_virus,k__Viruses|p__Viruses_unclassified|c__Viruses_unclassified|o__Viruses_unclassified|f__Virgaviridae|g__Tobamovirus|s__Paprika_mild_mottle_virus,k__Viruses|p__Viruses_unclassified|c__Viruses_unclassified|o__Viruses_unclassified|f__Virgaviridae|g__Tobamovirus|s__Pepper_mild_mottle_virus,k__Viruses|p__Viruses_unclassified|c__Viruses_unclassified|o__Viruses_unclassified|f__Virgaviridae|g__Tobamovirus|s__Tobacco_mild_green_mosaic_virus,k__Viruses|p__Viruses_unclassified|c__Viruses_unclassified|o__Viruses_unclassified|f__Viruses_unclassified|g__Viruses_unclassified|s__Deep_sea_thermophilic_phage_D6E,k__Viruses|p__Viruses_unclassified|c__Viruses_unclassified|o__Viruses_unclassified|f__Viruses_unclassified|g__Viruses_unclassified|s__Loktanella_phage_pCB2051_A,k__Viruses|p__Viruses_unclassified|c__Viruses_unclassified|o__Viruses_unclassified|f__Viruses_unclassified|g__Viruses_unclassified|s__Tetraselmis_viridis_virus_S1
Study_ID,Sample Accession,Unnamed: 2_level_1,Unnamed: 3_level_1,Unnamed: 4_level_1,Unnamed: 5_level_1,Unnamed: 6_level_1,Unnamed: 7_level_1,Unnamed: 8_level_1,Unnamed: 9_level_1,Unnamed: 10_level_1,Unnamed: 11_level_1,Unnamed: 12_level_1,Unnamed: 13_level_1,Unnamed: 14_level_1,Unnamed: 15_level_1,Unnamed: 16_level_1,Unnamed: 17_level_1,Unnamed: 18_level_1,Unnamed: 19_level_1,Unnamed: 20_level_1,Unnamed: 21_level_1,Unnamed: 22_level_1
Obregon-Tito (2015),SAMN03283239,0.000000,0.000000,0.000000,0.000000,0.000000,0.000000,0.000000,0.0,0.0,0.0,...,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0
Obregon-Tito (2015),SAMN03283266,0.000000,0.000000,0.000000,0.000000,0.000000,0.000000,0.000000,0.0,0.0,0.0,...,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0
Obregon-Tito (2015),SAMN03283281,0.009764,0.009764,0.009764,0.009764,0.009764,0.009764,0.009764,0.0,0.0,0.0,...,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0
Obregon-Tito (2015),SAMN03283294,0.000000,0.000000,0.000000,0.000000,0.000000,0.000000,0.000000,0.0,0.0,0.0,...,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0
Obregon-Tito (2015),SAMN03283288,0.011865,0.011865,0.011865,0.011865,0.011865,0.011865,0.011865,0.0,0.0,0.0,...,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0
...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...
Yang (2020),SRR6456373,0.000000,0.000000,0.000000,0.000000,0.000000,0.000000,0.000000,0.0,0.0,0.0,...,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0
Yang (2020),SRR6456374,0.000000,0.000000,0.000000,0.000000,0.000000,0.000000,0.000000,0.0,0.0,0.0,...,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0
Yang (2020),SRR6456375,0.000000,0.000000,0.000000,0.000000,0.000000,0.000000,0.000000,0.0,0.0,0.0,...,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0
Yang (2020),SRR6456376,0.002415,0.002415,0.002415,0.002415,0.002415,0.002415,0.002415,0.0,0.0,0.0,...,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0


In [7]:
y = training_set.iloc[:, [-1]]
y

Unnamed: 0_level_0,Unnamed: 1_level_0,is_healthy
Study_ID,Sample Accession,Unnamed: 2_level_1
Obregon-Tito (2015),SAMN03283239,True
Obregon-Tito (2015),SAMN03283266,True
Obregon-Tito (2015),SAMN03283281,True
Obregon-Tito (2015),SAMN03283294,True
Obregon-Tito (2015),SAMN03283288,True
...,...,...
Yang (2020),SRR6456373,True
Yang (2020),SRR6456374,True
Yang (2020),SRR6456375,True
Yang (2020),SRR6456376,True


# Exploratory analysis!

## Let's take a look at the metadata (Fig. 1a)

In [11]:
# ! pip install rpy2

fig1a.csv                  prebiotics.csv
longitudinal_cases.csv     taxonomic_features.csv
metadata.csv               taxonomic_features_val.csv
metadata_val.csv           training_set.csv


In [21]:
import rpy2.robjects as robjects

r_script = """
install.packages("ggplot2")
install.packages("dplyr")
"""

robjects.r(r_script)

--- Please select a CRAN mirror for use in this session ---
Secure CRAN mirrors 

 1: 0-Cloud [https]
 2: Australia (Canberra) [https]
 3: Australia (Melbourne 1) [https]
 4: Australia (Melbourne 2) [https]
 5: Australia (Perth) [https]
 6: Austria [https]
 7: Belgium (Brussels) [https]
 8: Brazil (PR) [https]
 9: Brazil (SP 1) [https]
10: Brazil (SP 2) [https]
11: Bulgaria [https]
12: Canada (MB) [https]
13: Canada (ON) [https]
14: Chile (Santiago) [https]
15: China (Beijing 2) [https]
16: China (Beijing 3) [https]
17: China (Hefei) [https]
18: China (Hong Kong) [https]
19: China (Guangzhou) [https]
20: China (Jinan) [https]
21: China (Lanzhou) [https]
22: China (Nanjing) [https]
23: China (Shanghai 2) [https]
24: China (Shenzhen) [https]
25: Colombia (Cali) [https]
26: Costa Rica [https]
27: Cyprus [https]
28: Czech Republic [https]
29: Denmark [https]
30: East Asia [https]
31: Ecuador (Cuenca) [https]
32: France (Lyon 1) [https]
33: France (Lyon 2) [https]
34: France (Marseille) [ht

R[write to console]: also installing the dependencies ‘lattice’, ‘colorspace’, ‘utf8’, ‘nlme’, ‘Matrix’, ‘farver’, ‘labeling’, ‘munsell’, ‘R6’, ‘RColorBrewer’, ‘viridisLite’, ‘fansi’, ‘magrittr’, ‘pillar’, ‘pkgconfig’, ‘cli’, ‘glue’, ‘gtable’, ‘isoband’, ‘lifecycle’, ‘MASS’, ‘mgcv’, ‘rlang’, ‘scales’, ‘tibble’, ‘vctrs’, ‘withr’


R[write to console]: trying URL 'https://mirror.las.iastate.edu/CRAN/src/contrib/lattice_0.21-8.tar.gz'

R[write to console]: Content type 'application/x-gzip'
R[write to console]:  length 589330 bytes (575 KB)

R[write to console]: =
R[write to console]: =
R[write to console]: =
R[write to console]: =
R[write to console]: =
R[write to console]: =
R[write to console]: =
R[write to console]: =
R[write to console]: =
R[write to console]: =
R[write to console]: =
R[write to console]: =
R[write to console]: =
R[write to console]: =
R[write to console]: =
R[write to console]: =
R[write to console]: =
R[write to console]: =
R[write to console]: =
R[write to console]

R[write to console]: =
R[write to console]: =
R[write to console]: =
R[write to console]: =
R[write to console]: =
R[write to console]: =
R[write to console]: =
R[write to console]: =
R[write to console]: =
R[write to console]: =
R[write to console]: =
R[write to console]: =
R[write to console]: =
R[write to console]: =
R[write to console]: =
R[write to console]: =
R[write to console]: =
R[write to console]: =
R[write to console]: =
R[write to console]: =
R[write to console]: =
R[write to console]: =
R[write to console]: =
R[write to console]: =
R[write to console]: =
R[write to console]: =
R[write to console]: =
R[write to console]: 

R[write to console]: downloaded 1.2 MB


R[write to console]: trying URL 'https://mirror.las.iastate.edu/CRAN/src/contrib/labeling_0.4.2.tar.gz'

R[write to console]: Content type 'application/x-gzip'
R[write to console]:  length 10156 bytes

R[write to console]: =
R[write to console]: =
R[write to console]: =
R[write to console]: =
R[write to console]: 

R[write to console]: =
R[write to console]: =
R[write to console]: =
R[write to console]: =
R[write to console]: =
R[write to console]: =
R[write to console]: =
R[write to console]: =
R[write to console]: =
R[write to console]: =
R[write to console]: =
R[write to console]: =
R[write to console]: =
R[write to console]: =
R[write to console]: =
R[write to console]: =
R[write to console]: =
R[write to console]: =
R[write to console]: =
R[write to console]: =
R[write to console]: =
R[write to console]: =
R[write to console]: =
R[write to console]: =
R[write to console]: =
R[write to console]: =
R[write to console]: =
R[write to console]: =
R[write to console]: =
R[write to console]: =
R[write to console]: =
R[write to console]: =
R[write to console]: =
R[write to console]: =
R[write to console]: =
R[write to console]: =
R[write to console]: =
R[write to console]: =
R[write to console]: =
R[write to console]: =
R[write to console]: =
R[write to console]: =
R[write to console]: 

R[write to 

R[write to console]: Content type 'application/x-gzip'
R[write to console]:  length 130057 bytes (127 KB)

R[write to console]: =
R[write to console]: =
R[write to console]: =
R[write to console]: =
R[write to console]: =
R[write to console]: =
R[write to console]: =
R[write to console]: =
R[write to console]: =
R[write to console]: =
R[write to console]: =
R[write to console]: =
R[write to console]: =
R[write to console]: =
R[write to console]: =
R[write to console]: =
R[write to console]: =
R[write to console]: =
R[write to console]: =
R[write to console]: =
R[write to console]: =
R[write to console]: =
R[write to console]: =
R[write to console]: =
R[write to console]: =
R[write to console]: =
R[write to console]: =
R[write to console]: =
R[write to console]: =
R[write to console]: =
R[write to console]: =
R[write to console]: =
R[write to console]: =
R[write to console]: =
R[write to console]: =
R[write to console]: =
R[write to console]: =
R[write to console]: =
R[write to console]

R[write to console]: =
R[write to console]: =
R[write to console]: =
R[write to console]: =
R[write to console]: =
R[write to console]: =
R[write to console]: =
R[write to console]: =
R[write to console]: 

R[write to console]: downloaded 744 KB


R[write to console]: trying URL 'https://mirror.las.iastate.edu/CRAN/src/contrib/scales_1.2.1.tar.gz'

R[write to console]: Content type 'application/x-gzip'
R[write to console]:  length 270609 bytes (264 KB)

R[write to console]: =
R[write to console]: =
R[write to console]: =
R[write to console]: =
R[write to console]: =
R[write to console]: =
R[write to console]: =
R[write to console]: =
R[write to console]: =
R[write to console]: =
R[write to console]: =
R[write to console]: =
R[write to console]: =
R[write to console]: =
R[write to console]: =
R[write to console]: =
R[write to console]: =
R[write to console]: =
R[write to console]: =
R[write to console]: =
R[write to console]: =
R[write to console]: =
R[write to console]: =
R[write to co

x86_64-apple-darwin13.4.0-clang -I"/Users/daniel/opt/anaconda3/envs/gmwi2_analysis/lib/R/include" -DNDEBUG   -D_FORTIFY_SOURCE=2 -isystem /Users/daniel/opt/anaconda3/envs/gmwi2_analysis/include -mmacosx-version-min=10.9 -I/Users/daniel/opt/anaconda3/envs/gmwi2_analysis/include   -fPIC  -march=core2 -mtune=haswell -mssse3 -ftree-vectorize -fPIC -fPIE -fstack-protector-strong -O2 -pipe -isystem /Users/daniel/opt/anaconda3/envs/gmwi2_analysis/include -fdebug-prefix-map=/Users/runner/miniforge3/conda-bld/r-base-split_1678912332388/work=/usr/local/src/conda/r-base-4.2.3 -fdebug-prefix-map=/Users/daniel/opt/anaconda3/envs/gmwi2_analysis=/usr/local/src/conda-prefix  -c init.c -o init.o


In file included from init.c:2:
/Users/daniel/opt/anaconda3/envs/gmwi2_analysis/lib/R/include/R.h:44:11: fatal error: 'stdlib.h' file not found
# include <stdlib.h> /* Not used by R itself, but widely assumed in packages */
          ^~~~~~~~~~
1 error generated.
make: *** [/Users/daniel/opt/anaconda3/envs/gmwi2_analysis/lib/R/etc/Makeconf:171: init.o] Error 1
ERROR: compilation failed for package ‘lattice’
* removing ‘/Users/daniel/opt/anaconda3/envs/gmwi2_analysis/lib/R/library/lattice’
* installing *source* package ‘colorspace’ ...
** package ‘colorspace’ successfully unpacked and MD5 sums checked
** using staged installation
** libs
colorspace.c:2:10: fatal error: 'ctype.h' file not found
#include <ctype.h>
         ^~~~~~~~~
1 error generated.
make: *** [/Users/daniel/opt/anaconda3/envs/gmwi2_analysis/lib/R/etc/Makeconf:171: colorspace.o] Error 1
ERROR: compilation failed for package ‘colorspace’
* removing ‘/Users/daniel/opt/anaconda3/envs/gmwi2_analysis/lib/R/library/colorspace’

x86_64-apple-darwin13.4.0-clang -I"/Users/daniel/opt/anaconda3/envs/gmwi2_analysis/lib/R/include" -DNDEBUG   -D_FORTIFY_SOURCE=2 -isystem /Users/daniel/opt/anaconda3/envs/gmwi2_analysis/include -mmacosx-version-min=10.9 -I/Users/daniel/opt/anaconda3/envs/gmwi2_analysis/include   -fPIC  -march=core2 -mtune=haswell -mssse3 -ftree-vectorize -fPIC -fPIE -fstack-protector-strong -O2 -pipe -isystem /Users/daniel/opt/anaconda3/envs/gmwi2_analysis/include -fdebug-prefix-map=/Users/runner/miniforge3/conda-bld/r-base-split_1678912332388/work=/usr/local/src/conda/r-base-4.2.3 -fdebug-prefix-map=/Users/daniel/opt/anaconda3/envs/gmwi2_analysis=/usr/local/src/conda-prefix  -c colorspace.c -o colorspace.o


* installing *source* package ‘utf8’ ...
** package ‘utf8’ successfully unpacked and MD5 sums checked
** using staged installation
** libs
as_utf8.c:17:10: fatal error: 'assert.h' file not found
#include <assert.h>
         ^~~~~~~~~~
1 error generated.
make: *** [/Users/daniel/opt/anaconda3/envs/gmwi2_analysis/lib/R/etc/Makeconf:171: as_utf8.o] Error 1
ERROR: compilation failed for package ‘utf8’
* removing ‘/Users/daniel/opt/anaconda3/envs/gmwi2_analysis/lib/R/library/utf8’


x86_64-apple-darwin13.4.0-clang -I"/Users/daniel/opt/anaconda3/envs/gmwi2_analysis/lib/R/include" -DNDEBUG   -D_FORTIFY_SOURCE=2 -isystem /Users/daniel/opt/anaconda3/envs/gmwi2_analysis/include -mmacosx-version-min=10.9 -I/Users/daniel/opt/anaconda3/envs/gmwi2_analysis/include  -Iutf8lite/src -fPIC  -march=core2 -mtune=haswell -mssse3 -ftree-vectorize -fPIC -fPIE -fstack-protector-strong -O2 -pipe -isystem /Users/daniel/opt/anaconda3/envs/gmwi2_analysis/include -fdebug-prefix-map=/Users/runner/miniforge3/conda-bld/r-base-split_1678912332388/work=/usr/local/src/conda/r-base-4.2.3 -fdebug-prefix-map=/Users/daniel/opt/anaconda3/envs/gmwi2_analysis=/usr/local/src/conda-prefix  -c as_utf8.c -o as_utf8.o


* installing *source* package ‘farver’ ...
** package ‘farver’ successfully unpacked and MD5 sums checked
** using staged installation
** libs
In file included from ColorSpace.cpp:1:
In file included from ./ColorSpace.h:4:
In file included from /Users/daniel/opt/anaconda3/envs/gmwi2_analysis/bin/../include/c++/v1/typeinfo:64:
In file included from /Users/daniel/opt/anaconda3/envs/gmwi2_analysis/bin/../include/c++/v1/cstdlib:87:
/Users/daniel/opt/anaconda3/envs/gmwi2_analysis/bin/../include/c++/v1/stdlib.h:150:34: error: unknown type name 'ldiv_t'
inline _LIBCPP_INLINE_VISIBILITY ldiv_t div(long __x, long __y) _NOEXCEPT {
                                 ^
/Users/daniel/opt/anaconda3/envs/gmwi2_analysis/bin/../include/c++/v1/stdlib.h:151:12: error: no member named 'ldiv' in the global namespace
  return ::ldiv(__x, __y);
         ~~^
/Users/daniel/opt/anaconda3/envs/gmwi2_analysis/bin/../include/c++/v1/stdlib.h:154:34: error: unknown type name 'lldiv_t'
inline _LIBCPP_INLINE_VISIBILITY 

x86_64-apple-darwin13.4.0-clang++ -std=gnu++11 -I"/Users/daniel/opt/anaconda3/envs/gmwi2_analysis/lib/R/include" -DNDEBUG   -D_FORTIFY_SOURCE=2 -isystem /Users/daniel/opt/anaconda3/envs/gmwi2_analysis/include -mmacosx-version-min=10.9 -I/Users/daniel/opt/anaconda3/envs/gmwi2_analysis/include   -fPIC  -march=core2 -mtune=haswell -mssse3 -ftree-vectorize -fPIC -fPIE -fstack-protector-strong -O2 -pipe -stdlib=libc++ -fvisibility-inlines-hidden -fmessage-length=0 -isystem /Users/daniel/opt/anaconda3/envs/gmwi2_analysis/include -fdebug-prefix-map=/Users/runner/miniforge3/conda-bld/r-base-split_1678912332388/work=/usr/local/src/conda/r-base-4.2.3 -fdebug-prefix-map=/Users/daniel/opt/anaconda3/envs/gmwi2_analysis=/usr/local/src/conda-prefix  -c ColorSpace.cpp -o ColorSpace.o


In file included from ColorSpace.cpp:3:
In file included from /Users/daniel/opt/anaconda3/envs/gmwi2_analysis/lib/R/include/R.h:42:
In file included from /Users/daniel/opt/anaconda3/envs/gmwi2_analysis/bin/../include/c++/v1/cmath:317:
/Users/daniel/opt/anaconda3/envs/gmwi2_analysis/bin/../include/c++/v1/math.h:388:31: error: use of undeclared identifier 'FP_NAN'
  return __builtin_fpclassify(FP_NAN, FP_INFINITE, FP_NORMAL, FP_SUBNORMAL, FP_ZERO, __x);
                              ^
/Users/daniel/opt/anaconda3/envs/gmwi2_analysis/bin/../include/c++/v1/math.h:388:39: error: use of undeclared identifier 'FP_INFINITE'
  return __builtin_fpclassify(FP_NAN, FP_INFINITE, FP_NORMAL, FP_SUBNORMAL, FP_ZERO, __x);
                                      ^
/Users/daniel/opt/anaconda3/envs/gmwi2_analysis/bin/../include/c++/v1/math.h:388:52: error: use of undeclared identifier 'FP_NORMAL'
  return __builtin_fpclassify(FP_NAN, FP_INFINITE, FP_NORMAL, FP_SUBNORMAL, FP_ZERO, __x);
                      

x86_64-apple-darwin13.4.0-clang -I"/Users/daniel/opt/anaconda3/envs/gmwi2_analysis/lib/R/include" -DNDEBUG   -D_FORTIFY_SOURCE=2 -isystem /Users/daniel/opt/anaconda3/envs/gmwi2_analysis/include -mmacosx-version-min=10.9 -I/Users/daniel/opt/anaconda3/envs/gmwi2_analysis/include   -fPIC  -march=core2 -mtune=haswell -mssse3 -ftree-vectorize -fPIC -fPIE -fstack-protector-strong -O2 -pipe -isystem /Users/daniel/opt/anaconda3/envs/gmwi2_analysis/include -fdebug-prefix-map=/Users/runner/miniforge3/conda-bld/r-base-split_1678912332388/work=/usr/local/src/conda/r-base-4.2.3 -fdebug-prefix-map=/Users/daniel/opt/anaconda3/envs/gmwi2_analysis=/usr/local/src/conda-prefix  -c assumptions.c -o assumptions.o


* installing *source* package ‘magrittr’ ...
** package ‘magrittr’ successfully unpacked and MD5 sums checked
** using staged installation
** libs
In file included from pipe.c:3:
/Users/daniel/opt/anaconda3/envs/gmwi2_analysis/lib/R/include/Rinternals.h:39:11: fatal error: 'stdio.h' file not found
# include <stdio.h>
          ^~~~~~~~~
1 error generated.
make: *** [/Users/daniel/opt/anaconda3/envs/gmwi2_analysis/lib/R/etc/Makeconf:171: pipe.o] Error 1
ERROR: compilation failed for package ‘magrittr’
* removing ‘/Users/daniel/opt/anaconda3/envs/gmwi2_analysis/lib/R/library/magrittr’


x86_64-apple-darwin13.4.0-clang -I"/Users/daniel/opt/anaconda3/envs/gmwi2_analysis/lib/R/include" -DNDEBUG   -D_FORTIFY_SOURCE=2 -isystem /Users/daniel/opt/anaconda3/envs/gmwi2_analysis/include -mmacosx-version-min=10.9 -I/Users/daniel/opt/anaconda3/envs/gmwi2_analysis/include   -fPIC  -march=core2 -mtune=haswell -mssse3 -ftree-vectorize -fPIC -fPIE -fstack-protector-strong -O2 -pipe -isystem /Users/daniel/opt/anaconda3/envs/gmwi2_analysis/include -fdebug-prefix-map=/Users/runner/miniforge3/conda-bld/r-base-split_1678912332388/work=/usr/local/src/conda/r-base-4.2.3 -fdebug-prefix-map=/Users/daniel/opt/anaconda3/envs/gmwi2_analysis=/usr/local/src/conda-prefix  -c pipe.c -o pipe.o


* installing *source* package ‘pkgconfig’ ...
** package ‘pkgconfig’ successfully unpacked and MD5 sums checked
** using staged installation
** R
** inst
** byte-compile and prepare package for lazy loading
** help
*** installing help indices
** building package indices
** testing if installed package can be loaded from temporary location
** testing if installed package can be loaded from final location
** testing if installed package keeps a record of temporary installation path
* DONE (pkgconfig)
* installing *source* package ‘cli’ ...
** package ‘cli’ successfully unpacked and MD5 sums checked
** using staged installation
** libs
In file included from ansi.c:2:
In file included from ./cli.h:7:
/Users/daniel/opt/anaconda3/envs/gmwi2_analysis/lib/R/include/R.h:44:11: fatal error: 'stdlib.h' file not found
# include <stdlib.h> /* Not used by R itself, but widely assumed in packages */
          ^~~~~~~~~~
1 error generated.
make: *** [/Users/daniel/opt/anaconda3/envs/gmwi2_analysis/lib

x86_64-apple-darwin13.4.0-clang -I"/Users/daniel/opt/anaconda3/envs/gmwi2_analysis/lib/R/include" -DNDEBUG   -D_FORTIFY_SOURCE=2 -isystem /Users/daniel/opt/anaconda3/envs/gmwi2_analysis/include -mmacosx-version-min=10.9 -I/Users/daniel/opt/anaconda3/envs/gmwi2_analysis/include   -I../inst/include -fPIC  -march=core2 -mtune=haswell -mssse3 -ftree-vectorize -fPIC -fPIE -fstack-protector-strong -O2 -pipe -isystem /Users/daniel/opt/anaconda3/envs/gmwi2_analysis/include -fdebug-prefix-map=/Users/runner/miniforge3/conda-bld/r-base-split_1678912332388/work=/usr/local/src/conda/r-base-4.2.3 -fdebug-prefix-map=/Users/daniel/opt/anaconda3/envs/gmwi2_analysis=/usr/local/src/conda-prefix  -c ansi.c -o ansi.o


* installing *source* package ‘glue’ ...
** package ‘glue’ successfully unpacked and MD5 sums checked
** using staged installation
** libs
In file included from glue.c:3:
/Users/daniel/opt/anaconda3/envs/gmwi2_analysis/lib/R/include/Rinternals.h:39:11: fatal error: 'stdio.h' file not found
# include <stdio.h>
          ^~~~~~~~~
1 error generated.
make: *** [/Users/daniel/opt/anaconda3/envs/gmwi2_analysis/lib/R/etc/Makeconf:171: glue.o] Error 1
ERROR: compilation failed for package ‘glue’
* removing ‘/Users/daniel/opt/anaconda3/envs/gmwi2_analysis/lib/R/library/glue’


x86_64-apple-darwin13.4.0-clang -I"/Users/daniel/opt/anaconda3/envs/gmwi2_analysis/lib/R/include" -DNDEBUG   -D_FORTIFY_SOURCE=2 -isystem /Users/daniel/opt/anaconda3/envs/gmwi2_analysis/include -mmacosx-version-min=10.9 -I/Users/daniel/opt/anaconda3/envs/gmwi2_analysis/include   -fPIC  -march=core2 -mtune=haswell -mssse3 -ftree-vectorize -fPIC -fPIE -fstack-protector-strong -O2 -pipe -isystem /Users/daniel/opt/anaconda3/envs/gmwi2_analysis/include -fdebug-prefix-map=/Users/runner/miniforge3/conda-bld/r-base-split_1678912332388/work=/usr/local/src/conda/r-base-4.2.3 -fdebug-prefix-map=/Users/daniel/opt/anaconda3/envs/gmwi2_analysis=/usr/local/src/conda-prefix  -c glue.c -o glue.o


* installing *source* package ‘isoband’ ...
** package ‘isoband’ successfully unpacked and MD5 sums checked
** using staged installation
** libs
In file included from clip-lines.cpp:3:
In file included from /Users/daniel/opt/anaconda3/envs/gmwi2_analysis/lib/R/include/R.h:39:
In file included from /Users/daniel/opt/anaconda3/envs/gmwi2_analysis/bin/../include/c++/v1/cstdlib:87:
/Users/daniel/opt/anaconda3/envs/gmwi2_analysis/bin/../include/c++/v1/stdlib.h:150:34: error: unknown type name 'ldiv_t'
inline _LIBCPP_INLINE_VISIBILITY ldiv_t div(long __x, long __y) _NOEXCEPT {
                                 ^
/Users/daniel/opt/anaconda3/envs/gmwi2_analysis/bin/../include/c++/v1/stdlib.h:151:12: error: no member named 'ldiv' in the global namespace
  return ::ldiv(__x, __y);
         ~~^
/Users/daniel/opt/anaconda3/envs/gmwi2_analysis/bin/../include/c++/v1/stdlib.h:154:34: error: unknown type name 'lldiv_t'
inline _LIBCPP_INLINE_VISIBILITY lldiv_t div(long long __x,
                        

x86_64-apple-darwin13.4.0-clang++ -std=gnu++11 -I"/Users/daniel/opt/anaconda3/envs/gmwi2_analysis/lib/R/include" -DNDEBUG   -D_FORTIFY_SOURCE=2 -isystem /Users/daniel/opt/anaconda3/envs/gmwi2_analysis/include -mmacosx-version-min=10.9 -I/Users/daniel/opt/anaconda3/envs/gmwi2_analysis/include   -fPIC  -march=core2 -mtune=haswell -mssse3 -ftree-vectorize -fPIC -fPIE -fstack-protector-strong -O2 -pipe -stdlib=libc++ -fvisibility-inlines-hidden -fmessage-length=0 -isystem /Users/daniel/opt/anaconda3/envs/gmwi2_analysis/include -fdebug-prefix-map=/Users/runner/miniforge3/conda-bld/r-base-split_1678912332388/work=/usr/local/src/conda/r-base-4.2.3 -fdebug-prefix-map=/Users/daniel/opt/anaconda3/envs/gmwi2_analysis=/usr/local/src/conda-prefix  -c clip-lines.cpp -o clip-lines.o


20 errors generated.
make: *** [/Users/daniel/opt/anaconda3/envs/gmwi2_analysis/lib/R/etc/Makeconf:178: clip-lines.o] Error 1
ERROR: compilation failed for package ‘isoband’
* removing ‘/Users/daniel/opt/anaconda3/envs/gmwi2_analysis/lib/R/library/isoband’
* installing *source* package ‘MASS’ ...
** package ‘MASS’ successfully unpacked and MD5 sums checked
** using staged installation
** libs


x86_64-apple-darwin13.4.0-clang -I"/Users/daniel/opt/anaconda3/envs/gmwi2_analysis/lib/R/include" -DNDEBUG   -D_FORTIFY_SOURCE=2 -isystem /Users/daniel/opt/anaconda3/envs/gmwi2_analysis/include -mmacosx-version-min=10.9 -I/Users/daniel/opt/anaconda3/envs/gmwi2_analysis/include   -fPIC  -march=core2 -mtune=haswell -mssse3 -ftree-vectorize -fPIC -fPIE -fstack-protector-strong -O2 -pipe -isystem /Users/daniel/opt/anaconda3/envs/gmwi2_analysis/include -fdebug-prefix-map=/Users/runner/miniforge3/conda-bld/r-base-split_1678912332388/work=/usr/local/src/conda/r-base-4.2.3 -fdebug-prefix-map=/Users/daniel/opt/anaconda3/envs/gmwi2_analysis=/usr/local/src/conda-prefix  -c MASS.c -o MASS.o


MASS.c:18:10: fatal error: 'stdlib.h' file not found
#include <stdlib.h>
         ^~~~~~~~~~
1 error generated.
make: *** [/Users/daniel/opt/anaconda3/envs/gmwi2_analysis/lib/R/etc/Makeconf:171: MASS.o] Error 1
ERROR: compilation failed for package ‘MASS’
* removing ‘/Users/daniel/opt/anaconda3/envs/gmwi2_analysis/lib/R/library/MASS’
* installing *source* package ‘rlang’ ...
** package ‘rlang’ successfully unpacked and MD5 sums checked
** using staged installation
** libs
In file included from capture.c:1:
/Users/daniel/opt/anaconda3/envs/gmwi2_analysis/lib/R/include/Rinternals.h:39:11: fatal error: 'stdio.h' file not found
# include <stdio.h>
          ^~~~~~~~~
1 error generated.
make: *** [/Users/daniel/opt/anaconda3/envs/gmwi2_analysis/lib/R/etc/Makeconf:171: capture.o] Error 1
ERROR: compilation failed for package ‘rlang’
* removing ‘/Users/daniel/opt/anaconda3/envs/gmwi2_analysis/lib/R/library/rlang’


x86_64-apple-darwin13.4.0-clang -I"/Users/daniel/opt/anaconda3/envs/gmwi2_analysis/lib/R/include" -DNDEBUG -I./rlang/  -D_FORTIFY_SOURCE=2 -isystem /Users/daniel/opt/anaconda3/envs/gmwi2_analysis/include -mmacosx-version-min=10.9 -I/Users/daniel/opt/anaconda3/envs/gmwi2_analysis/include   -fPIC  -march=core2 -mtune=haswell -mssse3 -ftree-vectorize -fPIC -fPIE -fstack-protector-strong -O2 -pipe -isystem /Users/daniel/opt/anaconda3/envs/gmwi2_analysis/include -fdebug-prefix-map=/Users/runner/miniforge3/conda-bld/r-base-split_1678912332388/work=/usr/local/src/conda/r-base-4.2.3 -fdebug-prefix-map=/Users/daniel/opt/anaconda3/envs/gmwi2_analysis=/usr/local/src/conda-prefix  -c capture.c -o capture.o


* installing *source* package ‘withr’ ...
** package ‘withr’ successfully unpacked and MD5 sums checked
** using staged installation
** R
** inst
** byte-compile and prepare package for lazy loading
** help
*** installing help indices
*** copying figures
** building package indices
** installing vignettes
** testing if installed package can be loaded from temporary location
** testing if installed package can be loaded from final location
** testing if installed package keeps a record of temporary installation path
* DONE (withr)
ERROR: dependency ‘lattice’ is not available for package ‘nlme’
* removing ‘/Users/daniel/opt/anaconda3/envs/gmwi2_analysis/lib/R/library/nlme’
ERROR: dependency ‘lattice’ is not available for package ‘Matrix’
* removing ‘/Users/daniel/opt/anaconda3/envs/gmwi2_analysis/lib/R/library/Matrix’
ERROR: dependency ‘colorspace’ is not available for package ‘munsell’
* removing ‘/Users/daniel/opt/anaconda3/envs/gmwi2_analysis/lib/R/library/munsell’
ERROR: dependencies

R[write to console]: =
R[write to console]: =
R[write to console]: =
R[write to console]: =
R[write to console]: =
R[write to console]: =
R[write to console]: =
R[write to console]: =
R[write to console]: =
R[write to console]: =
R[write to console]: =
R[write to console]: =
R[write to console]: =
R[write to console]: =
R[write to console]: =
R[write to console]: =
R[write to console]: =
R[write to console]: =
R[write to console]: =
R[write to console]: =
R[write to console]: =
R[write to console]: 

R[write to console]: downloaded 168 KB


R[write to console]: trying URL 'https://mirror.las.iastate.edu/CRAN/src/contrib/glue_1.6.2.tar.gz'

R[write to console]: Content type 'application/x-gzip'
R[write to console]:  length 106510 bytes (104 KB)

R[write to console]: =
R[write to console]: =
R[write to console]: =
R[write to console]: =
R[write to console]: =
R[write to console]: =
R[write to console]: =
R[write to console]: =
R[write to console]: =
R[write to console]: =
R[write to cons

R[write to console]: =
R[write to console]: =
R[write to console]: =
R[write to console]: =
R[write to console]: =
R[write to console]: =
R[write to console]: =
R[write to console]: =
R[write to console]: =
R[write to console]: =
R[write to console]: =
R[write to console]: =
R[write to console]: =
R[write to console]: =
R[write to console]: =
R[write to console]: =
R[write to console]: =
R[write to console]: =
R[write to console]: =
R[write to console]: =
R[write to console]: =
R[write to console]: =
R[write to console]: =
R[write to console]: =
R[write to console]: =
R[write to console]: =
R[write to console]: =
R[write to console]: =
R[write to console]: =
R[write to console]: =
R[write to console]: =
R[write to console]: =
R[write to console]: =
R[write to console]: =
R[write to console]: =
R[write to console]: =
R[write to console]: 

R[write to console]: downloaded 552 KB


R[write to console]: trying URL 'https://mirror.las.iastate.edu/CRAN/src/contrib/tidyselect_1.2.0.tar.gz'

R

x86_64-apple-darwin13.4.0-clang -I"/Users/daniel/opt/anaconda3/envs/gmwi2_analysis/lib/R/include" -DNDEBUG   -D_FORTIFY_SOURCE=2 -isystem /Users/daniel/opt/anaconda3/envs/gmwi2_analysis/include -mmacosx-version-min=10.9 -I/Users/daniel/opt/anaconda3/envs/gmwi2_analysis/include   -fPIC  -march=core2 -mtune=haswell -mssse3 -ftree-vectorize -fPIC -fPIE -fstack-protector-strong -O2 -pipe -isystem /Users/daniel/opt/anaconda3/envs/gmwi2_analysis/include -fdebug-prefix-map=/Users/runner/miniforge3/conda-bld/r-base-split_1678912332388/work=/usr/local/src/conda/r-base-4.2.3 -fdebug-prefix-map=/Users/daniel/opt/anaconda3/envs/gmwi2_analysis=/usr/local/src/conda-prefix  -c assumptions.c -o assumptions.o


* installing *source* package ‘utf8’ ...
** package ‘utf8’ successfully unpacked and MD5 sums checked
** using staged installation
** libs
as_utf8.c:17:10: fatal error: 'assert.h' file not found
#include <assert.h>
         ^~~~~~~~~~
1 error generated.
make: *** [/Users/daniel/opt/anaconda3/envs/gmwi2_analysis/lib/R/etc/Makeconf:171: as_utf8.o] Error 1
ERROR: compilation failed for package ‘utf8’
* removing ‘/Users/daniel/opt/anaconda3/envs/gmwi2_analysis/lib/R/library/utf8’


x86_64-apple-darwin13.4.0-clang -I"/Users/daniel/opt/anaconda3/envs/gmwi2_analysis/lib/R/include" -DNDEBUG   -D_FORTIFY_SOURCE=2 -isystem /Users/daniel/opt/anaconda3/envs/gmwi2_analysis/include -mmacosx-version-min=10.9 -I/Users/daniel/opt/anaconda3/envs/gmwi2_analysis/include  -Iutf8lite/src -fPIC  -march=core2 -mtune=haswell -mssse3 -ftree-vectorize -fPIC -fPIE -fstack-protector-strong -O2 -pipe -isystem /Users/daniel/opt/anaconda3/envs/gmwi2_analysis/include -fdebug-prefix-map=/Users/runner/miniforge3/conda-bld/r-base-split_1678912332388/work=/usr/local/src/conda/r-base-4.2.3 -fdebug-prefix-map=/Users/daniel/opt/anaconda3/envs/gmwi2_analysis=/usr/local/src/conda-prefix  -c as_utf8.c -o as_utf8.o


* installing *source* package ‘cli’ ...
** package ‘cli’ successfully unpacked and MD5 sums checked
** using staged installation
** libs
In file included from ansi.c:2:
In file included from ./cli.h:7:
/Users/daniel/opt/anaconda3/envs/gmwi2_analysis/lib/R/include/R.h:44:11: fatal error: 'stdlib.h' file not found
# include <stdlib.h> /* Not used by R itself, but widely assumed in packages */
          ^~~~~~~~~~
1 error generated.
make: *** [/Users/daniel/opt/anaconda3/envs/gmwi2_analysis/lib/R/etc/Makeconf:171: ansi.o] Error 1
ERROR: compilation failed for package ‘cli’
* removing ‘/Users/daniel/opt/anaconda3/envs/gmwi2_analysis/lib/R/library/cli’


x86_64-apple-darwin13.4.0-clang -I"/Users/daniel/opt/anaconda3/envs/gmwi2_analysis/lib/R/include" -DNDEBUG   -D_FORTIFY_SOURCE=2 -isystem /Users/daniel/opt/anaconda3/envs/gmwi2_analysis/include -mmacosx-version-min=10.9 -I/Users/daniel/opt/anaconda3/envs/gmwi2_analysis/include   -I../inst/include -fPIC  -march=core2 -mtune=haswell -mssse3 -ftree-vectorize -fPIC -fPIE -fstack-protector-strong -O2 -pipe -isystem /Users/daniel/opt/anaconda3/envs/gmwi2_analysis/include -fdebug-prefix-map=/Users/runner/miniforge3/conda-bld/r-base-split_1678912332388/work=/usr/local/src/conda/r-base-4.2.3 -fdebug-prefix-map=/Users/daniel/opt/anaconda3/envs/gmwi2_analysis=/usr/local/src/conda-prefix  -c ansi.c -o ansi.o


* installing *source* package ‘generics’ ...
** package ‘generics’ successfully unpacked and MD5 sums checked
** using staged installation
** R
** byte-compile and prepare package for lazy loading
** help
*** installing help indices
** building package indices
** testing if installed package can be loaded from temporary location
** testing if installed package can be loaded from final location
** testing if installed package keeps a record of temporary installation path
* DONE (generics)
* installing *source* package ‘glue’ ...
** package ‘glue’ successfully unpacked and MD5 sums checked
** using staged installation
** libs
In file included from glue.c:3:
/Users/daniel/opt/anaconda3/envs/gmwi2_analysis/lib/R/include/Rinternals.h:39:11: fatal error: 'stdio.h' file not found
# include <stdio.h>
          ^~~~~~~~~
1 error generated.
make: *** [/Users/daniel/opt/anaconda3/envs/gmwi2_analysis/lib/R/etc/Makeconf:171: glue.o] Error 1
ERROR: compilation failed for package ‘glue’
* removing ‘/

x86_64-apple-darwin13.4.0-clang -I"/Users/daniel/opt/anaconda3/envs/gmwi2_analysis/lib/R/include" -DNDEBUG   -D_FORTIFY_SOURCE=2 -isystem /Users/daniel/opt/anaconda3/envs/gmwi2_analysis/include -mmacosx-version-min=10.9 -I/Users/daniel/opt/anaconda3/envs/gmwi2_analysis/include   -fPIC  -march=core2 -mtune=haswell -mssse3 -ftree-vectorize -fPIC -fPIE -fstack-protector-strong -O2 -pipe -isystem /Users/daniel/opt/anaconda3/envs/gmwi2_analysis/include -fdebug-prefix-map=/Users/runner/miniforge3/conda-bld/r-base-split_1678912332388/work=/usr/local/src/conda/r-base-4.2.3 -fdebug-prefix-map=/Users/daniel/opt/anaconda3/envs/gmwi2_analysis=/usr/local/src/conda-prefix  -c glue.c -o glue.o


* installing *source* package ‘magrittr’ ...
** package ‘magrittr’ successfully unpacked and MD5 sums checked
** using staged installation
** libs
In file included from pipe.c:3:
/Users/daniel/opt/anaconda3/envs/gmwi2_analysis/lib/R/include/Rinternals.h:39:11: fatal error: 'stdio.h' file not found
# include <stdio.h>
          ^~~~~~~~~
1 error generated.
make: *** [/Users/daniel/opt/anaconda3/envs/gmwi2_analysis/lib/R/etc/Makeconf:171: pipe.o] Error 1
ERROR: compilation failed for package ‘magrittr’
* removing ‘/Users/daniel/opt/anaconda3/envs/gmwi2_analysis/lib/R/library/magrittr’


x86_64-apple-darwin13.4.0-clang -I"/Users/daniel/opt/anaconda3/envs/gmwi2_analysis/lib/R/include" -DNDEBUG   -D_FORTIFY_SOURCE=2 -isystem /Users/daniel/opt/anaconda3/envs/gmwi2_analysis/include -mmacosx-version-min=10.9 -I/Users/daniel/opt/anaconda3/envs/gmwi2_analysis/include   -fPIC  -march=core2 -mtune=haswell -mssse3 -ftree-vectorize -fPIC -fPIE -fstack-protector-strong -O2 -pipe -isystem /Users/daniel/opt/anaconda3/envs/gmwi2_analysis/include -fdebug-prefix-map=/Users/runner/miniforge3/conda-bld/r-base-split_1678912332388/work=/usr/local/src/conda/r-base-4.2.3 -fdebug-prefix-map=/Users/daniel/opt/anaconda3/envs/gmwi2_analysis=/usr/local/src/conda-prefix  -c pipe.c -o pipe.o


* installing *source* package ‘rlang’ ...
** package ‘rlang’ successfully unpacked and MD5 sums checked
** using staged installation
** libs
In file included from capture.c:1:
/Users/daniel/opt/anaconda3/envs/gmwi2_analysis/lib/R/include/Rinternals.h:39:11: fatal error: 'stdio.h' file not found
# include <stdio.h>
          ^~~~~~~~~
1 error generated.
make: *** [/Users/daniel/opt/anaconda3/envs/gmwi2_analysis/lib/R/etc/Makeconf:171: capture.o] Error 1
ERROR: compilation failed for package ‘rlang’
* removing ‘/Users/daniel/opt/anaconda3/envs/gmwi2_analysis/lib/R/library/rlang’


x86_64-apple-darwin13.4.0-clang -I"/Users/daniel/opt/anaconda3/envs/gmwi2_analysis/lib/R/include" -DNDEBUG -I./rlang/  -D_FORTIFY_SOURCE=2 -isystem /Users/daniel/opt/anaconda3/envs/gmwi2_analysis/include -mmacosx-version-min=10.9 -I/Users/daniel/opt/anaconda3/envs/gmwi2_analysis/include   -fPIC  -march=core2 -mtune=haswell -mssse3 -ftree-vectorize -fPIC -fPIE -fstack-protector-strong -O2 -pipe -isystem /Users/daniel/opt/anaconda3/envs/gmwi2_analysis/include -fdebug-prefix-map=/Users/runner/miniforge3/conda-bld/r-base-split_1678912332388/work=/usr/local/src/conda/r-base-4.2.3 -fdebug-prefix-map=/Users/daniel/opt/anaconda3/envs/gmwi2_analysis=/usr/local/src/conda-prefix  -c capture.c -o capture.o


ERROR: dependencies ‘cli’, ‘glue’, ‘rlang’ are not available for package ‘lifecycle’
* removing ‘/Users/daniel/opt/anaconda3/envs/gmwi2_analysis/lib/R/library/lifecycle’
ERROR: dependencies ‘cli’, ‘glue’, ‘lifecycle’, ‘rlang’ are not available for package ‘vctrs’
* removing ‘/Users/daniel/opt/anaconda3/envs/gmwi2_analysis/lib/R/library/vctrs’
ERROR: dependencies ‘cli’, ‘fansi’, ‘glue’, ‘lifecycle’, ‘rlang’, ‘utf8’, ‘vctrs’ are not available for package ‘pillar’
* removing ‘/Users/daniel/opt/anaconda3/envs/gmwi2_analysis/lib/R/library/pillar’
ERROR: dependencies ‘cli’, ‘glue’, ‘lifecycle’, ‘rlang’, ‘vctrs’ are not available for package ‘tidyselect’
* removing ‘/Users/daniel/opt/anaconda3/envs/gmwi2_analysis/lib/R/library/tidyselect’
ERROR: dependencies ‘fansi’, ‘lifecycle’, ‘magrittr’, ‘pillar’, ‘rlang’, ‘vctrs’ are not available for package ‘tibble’
* removing ‘/Users/daniel/opt/anaconda3/envs/gmwi2_analysis/lib/R/library/tibble’
ERROR: dependencies ‘cli’, ‘glue’, ‘lifecycle’, ‘magritt

<rpy2.rinterface_lib.sexp.NULLType object at 0x1a953d510> [0]

In [22]:
import rpy2.robjects as robjects
from rpy2.robjects import pandas2ri
from rpy2.robjects.packages import importr
from rpy2.robjects.lib.dplyr import DataFrame
import rpy2.robjects.lib.ggplot2 as ggplot2

# Read CSV using rpy2
pandas2ri.activate()
read_csv = robjects.r['read.csv']
file_path = "data/fig1a.csv"
fig1a = read_csv(file_path, sep=",", header=True, na_strings=["", "NA"], check_names=False, row_names=1)

# Age Histogram
age_hist = ggplot2.ggplot(fig1a) + \
    ggplot2.aes_string(x='as.numeric(Age)') + \
    ggplot2.geom_histogram(binwidth=1) + \
    ggplot2.xlab("Age (years)") + \
    ggplot2.ylab("Sample count") + \
    ggplot2.ggtitle("")  # Put your title here
print(age_hist)

# Mean Age
mean_age = fig1a.rx2('Age').mean(na_rm=True)
print("Mean Age:", mean_age[0])

# Sex Plot
fig1a.rx2('Sex')[robjects.r['is.na'](fig1a.rx2('Sex'))] = "NA"
data_sex = fig1a.rx2('Sex').table()
data_sex = DataFrame(data_sex)
data_sex.colnames = ['category', 'count']
data_sex['fraction'] = data_sex.rx2('count') / sum(data_sex.rx2('count'))
data_sex['ymax'] = robjects.r['cumsum'](data_sex.rx2('fraction'))
data_sex['ymin'] = robjects.r['c'](0, robjects.r['head'](data_sex.rx2('ymax'), n=-1))
data_sex['labelPosition'] = (data_sex.rx2('ymax') + data_sex.rx2('ymin')) / 2
data_sex['label'] = robjects.r['paste0'](data_sex.rx2('category'), ": ", data_sex.rx2('count'))

plot_sex = ggplot2.ggplot(data_sex) + \
    ggplot2.aes_string(ymax='ymax', ymin='ymin', xmax='4', xmin='3', fill='category') + \
    ggplot2.geom_rect() + \
    ggplot2.geom_label(ggplot2.aes_string(x='3.5', y='labelPosition', label='label'), size=4, color="black") + \
    ggplot2.scale_fill_brewer(palette=4) + \
    ggplot2.coord_polar(theta="y") + \
    ggplot2.xlim(robjects.r['c'](2, 4)) + \
    ggplot2.theme_void() + \
    ggplot2.theme(legend_position="none")
print(plot_sex + ggplot2.scale_fill_manual(values=robjects.r['c']("#FFCC66", "#66CCCC", "#CCCCCC")))

# Geography Plot
fig1a.rx2('Continent')[robjects.r['is.na'](fig1a.rx2('Continent'))] = "NA"
data_geo = fig1a.rx2('Continent').table()
data_geo = DataFrame(data_geo)
data_geo.colnames = ['category', 'count']
data_geo['fraction'] = data_geo.rx2('count') / sum(data_geo.rx2('count'))
data_geo['ymax'] = robjects.r['cumsum'](data_geo.rx2('fraction'))
data_geo['ymin'] = robjects.r['c'](0, robjects.r['head'](data_geo.rx2('ymax'), n=-1))
data_geo['labelPosition'] = (data_geo.rx2('ymax') + data_geo.rx2('ymin')) / 2
data_geo['label'] = robjects.r['paste0'](data_geo.rx2('category'), ": ", data_geo.rx2('count'))

plot_geo = ggplot2.ggplot(data_geo) + \
    ggplot2.aes_string(ymax='ymax', ymin='ymin', xmax='4', xmin='3', fill='category') + \
    ggplot2.geom_rect() + \
    ggplot2.geom_label(ggplot2.aes_string(x='4', y='labelPosition', label='label'), size=4, color="black") + \
    ggplot2.scale_fill_brewer(palette=4) + \
    ggplot2.coord_polar(theta="y") + \
    ggplot2.xlim(robjects.r['c'](2, 4)) + \
    ggplot2.theme_void() + \
    ggplot2.theme(legend_position="none")
print(plot_geo + ggplot2.scale_fill_manual(values=robjects.r['c']("#FF6633", "#FFCC00", "#0066CC", "#CCCCCC", "#009933", "#33CCCC", "#996699")))

PackageNotInstalledError: The R package "dplyr" is not installed.

# Train GMWI2 and GMWI and evaluate on training set

In [12]:
# use same parameters as original paper
gmwi = GMWI(use_shannon=True, theta_f=1.4, theta_d=0.1)

# use only non-viral species
X_GMWI = X.copy()
X_GMWI = X_GMWI[list(filter(lambda x : "s__" in x and "virus" not in x, X_GMWI.columns))]

gmwi.fit(X_GMWI, y)
GMWI_scores = pd.DataFrame(gmwi.decision_function(X_GMWI), index=y.index, columns=["GMWI"])
display(GMWI_scores)
print("GMWI balanced_accuracy on training set:", balanced_accuracy_score(y, GMWI_scores > 0))

Unnamed: 0_level_0,Unnamed: 1_level_0,GMWI
Study_ID,Sample Accession,Unnamed: 2_level_1
Obregon-Tito (2015),SAMN03283239,3.832094
Obregon-Tito (2015),SAMN03283266,1.758548
Obregon-Tito (2015),SAMN03283281,3.341858
Obregon-Tito (2015),SAMN03283294,1.429055
Obregon-Tito (2015),SAMN03283288,1.064592
...,...,...
Yang (2020),SRR6456373,-4.472053
Yang (2020),SRR6456374,-2.158801
Yang (2020),SRR6456375,0.280615
Yang (2020),SRR6456376,1.132266


GMWI balanced_accuracy on training set: 0.7172501242714733


In [13]:
gmwi2 = LogisticRegression(random_state=42, penalty="l1", solver="liblinear", C=REGULARIZATION, class_weight="balanced")
X_GMWI2 = X > PRESENCE_CUTOFF
gmwi2.fit(X_GMWI2, y.values.flatten())
GMWI2_scores = pd.DataFrame(gmwi2.decision_function(X_GMWI2), index=y.index, columns=["GMWI2"])
display(GMWI2_scores)
print("GMWI2 balanced_accuracy on training set:", balanced_accuracy_score(y, GMWI2_scores > 0))

Unnamed: 0_level_0,Unnamed: 1_level_0,GMWI2
Study_ID,Sample Accession,Unnamed: 2_level_1
Obregon-Tito (2015),SAMN03283239,1.835237
Obregon-Tito (2015),SAMN03283266,0.103722
Obregon-Tito (2015),SAMN03283281,1.116329
Obregon-Tito (2015),SAMN03283294,1.758818
Obregon-Tito (2015),SAMN03283288,1.675791
...,...,...
Yang (2020),SRR6456373,-1.029233
Yang (2020),SRR6456374,-0.941967
Yang (2020),SRR6456375,-0.030621
Yang (2020),SRR6456376,0.973798


GMWI2 balanced_accuracy on training set: 0.7988526637127441


# Apply GMWI2 to longitudinal case studies

### Tanes *et al.* 2021 (dietary fiber)

In [14]:
tanes = longitudinal_cases.loc["Tanes (2021)"]
tanes

Unnamed: 0_level_0,k__Archaea,k__Archaea|p__Euryarchaeota,k__Archaea|p__Euryarchaeota|c__Methanobacteria,k__Archaea|p__Euryarchaeota|c__Methanobacteria|o__Methanobacteriales,k__Archaea|p__Euryarchaeota|c__Methanobacteria|o__Methanobacteriales|f__Methanobacteriaceae,k__Archaea|p__Euryarchaeota|c__Methanobacteria|o__Methanobacteriales|f__Methanobacteriaceae|g__Methanobrevibacter,k__Archaea|p__Euryarchaeota|c__Methanobacteria|o__Methanobacteriales|f__Methanobacteriaceae|g__Methanobrevibacter|s__Methanobrevibacter_smithii,k__Archaea|p__Euryarchaeota|c__Methanobacteria|o__Methanobacteriales|f__Methanobacteriaceae|g__Methanosphaera,k__Archaea|p__Euryarchaeota|c__Methanobacteria|o__Methanobacteriales|f__Methanobacteriaceae|g__Methanosphaera|s__Methanosphaera_stadtmanae,k__Archaea|p__Euryarchaeota|c__Thermoplasmata,...,k__Viruses|p__Viruses_unclassified|c__Viruses_unclassified|o__Viruses_unclassified|f__Virgaviridae|g__Tobamovirus|s__Cucumber_green_mottle_mosaic_virus,k__Viruses|p__Viruses_unclassified|c__Viruses_unclassified|o__Viruses_unclassified|f__Virgaviridae|g__Tobamovirus|s__Paprika_mild_mottle_virus,k__Viruses|p__Viruses_unclassified|c__Viruses_unclassified|o__Viruses_unclassified|f__Virgaviridae|g__Tobamovirus|s__Pepper_mild_mottle_virus,k__Viruses|p__Viruses_unclassified|c__Viruses_unclassified|o__Viruses_unclassified|f__Virgaviridae|g__Tobamovirus|s__Tobacco_mild_green_mosaic_virus,k__Viruses|p__Viruses_unclassified|c__Viruses_unclassified|o__Viruses_unclassified|f__Viruses_unclassified|g__Viruses_unclassified|s__Deep_sea_thermophilic_phage_D6E,k__Viruses|p__Viruses_unclassified|c__Viruses_unclassified|o__Viruses_unclassified|f__Viruses_unclassified|g__Viruses_unclassified|s__Loktanella_phage_pCB2051_A,k__Viruses|p__Viruses_unclassified|c__Viruses_unclassified|o__Viruses_unclassified|f__Viruses_unclassified|g__Viruses_unclassified|s__Tetraselmis_viridis_virus_S1,Subject_ID,health_status/host_diet,timepoint
Sample Accession,Unnamed: 1_level_1,Unnamed: 2_level_1,Unnamed: 3_level_1,Unnamed: 4_level_1,Unnamed: 5_level_1,Unnamed: 6_level_1,Unnamed: 7_level_1,Unnamed: 8_level_1,Unnamed: 9_level_1,Unnamed: 10_level_1,Unnamed: 11_level_1,Unnamed: 12_level_1,Unnamed: 13_level_1,Unnamed: 14_level_1,Unnamed: 15_level_1,Unnamed: 16_level_1,Unnamed: 17_level_1,Unnamed: 18_level_1,Unnamed: 19_level_1,Unnamed: 20_level_1,Unnamed: 21_level_1
SAMEA7082340,0.000000,0.000000,0.000000,0.000000,0.000000,0.000000,0.000000,0.0,0.0,0.0,...,0,0,0,0,0,0,0,11,Irritable bowel syndrome (IBS),0
SAMEA7082341,0.018776,0.018776,0.018776,0.018776,0.018776,0.018776,0.018776,0.0,0.0,0.0,...,0,0,0,0,0,0,0,11,Irritable bowel syndrome (IBS),12
SAMEA7082342,0.013714,0.013714,0.013714,0.013714,0.013714,0.013714,0.013714,0.0,0.0,0.0,...,0,0,0,0,0,0,0,11,Irritable bowel syndrome (IBS),6
SAMEA7082343,0.000000,0.000000,0.000000,0.000000,0.000000,0.000000,0.000000,0.0,0.0,0.0,...,0,0,0,0,0,0,0,12,Irritable bowel syndrome (IBS),0
SAMEA7082344,0.000000,0.000000,0.000000,0.000000,0.000000,0.000000,0.000000,0.0,0.0,0.0,...,0,0,0,0,0,0,0,12,Irritable bowel syndrome (IBS),12
...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...
SAMEA7082429,0.000000,0.000000,0.000000,0.000000,0.000000,0.000000,0.000000,0.0,0.0,0.0,...,0,0,0,0,0,0,0,D-6Fresk_S5,Healthy,
SAMEA7082430,0.000000,0.000000,0.000000,0.000000,0.000000,0.000000,0.000000,0.0,0.0,0.0,...,0,0,0,0,0,0,0,D-6Fryst_S4,Healthy,
SAMEA7082431,0.000000,0.000000,0.000000,0.000000,0.000000,0.000000,0.000000,0.0,0.0,0.0,...,0,0,0,0,0,0,0,D-7Fryst_S7,Healthy,
SAMEA7082432,0.000000,0.000000,0.000000,0.000000,0.000000,0.000000,0.000000,0.0,0.0,0.0,...,0,0,0,0,0,0,0,D-9Feresk_S9,Healthy,
