In [9]:
import pandas as pd
import numpy as np 
from sklearn.ensemble import RandomForestClassifier
from sklearn.model_selection import train_test_split
from sklearn.model_selection import LeaveOneOut
from sklearn.metrics import roc_auc_score, roc_curve
from sklearn.model_selection import cross_val_score
import matplotlib.pyplot as plt
%matplotlib inline

###  Does tissue associated microbiota provide a greater predictive accuracy of phenotype than caecal content microbiota? 


In [2]:
def read_merge(otu, alpha_div, mapping, alpha_metric="Shannon.effective", mapping_classes="Phenotype"):
    otu_table = pd.read_table(otu, header=0, index_col=0).T
    mapping_file = pd.read_table(mapping, header=0, index_col=0)
    alpha_diversity = pd.read_table(alpha_div, header=0, index_col=0)
    merged = pd.concat([alpha_diversity[alpha_metric], otu_table, mapping_file[mapping_classes]], axis=1 )
    return merged

In [3]:
cc_merged = read_merge("CC/OTUs_Table-norm-rel.tab", "CC/alpha-diversity.tab", "CC/mapping_file.tab")
tissue_merged = read_merge("tissue/OTUs_Table-norm-rel.tab", "tissue/alpha-diversity.tab", "tissue/mapping_file.tab")

of pandas will change to not sort by default.

To accept the future behavior, pass 'sort=True'.


  """


In [4]:
X_cc = cc_merged.drop("Phenotype", axis=1)
y_cc = cc_merged.Phenotype
X_tissue = tissue_merged.drop("Phenotype", axis=1)
y_tissue = tissue_merged.Phenotype

### Classification 

In [8]:
clf_cc = RandomForestClassifier(n_estimators=500,random_state=42,criterion='entropy', n_jobs=-1)
clf_tissue = RandomForestClassifier(n_estimators=500, random_state=42, criterion='entropy', n_jobs=-1)

loocv = LeaveOneOut()
results_cc = cross_val_score(clf_cc, X_cc, y_cc, cv=loocv)
results_tissue = cross_val_score(clf_tissue, X_tissue, y_tissue, cv=loocv)
print("Accuracy: %.2f%% (%.2f%%)" % (results_cc.mean()*100.0, results_cc.std()*100.0))
print("Accuracy: %.2f%% (%.2f%%)" % (results_tissue.mean()*100.0, results_tissue.std()*100.0))

Accuracy: 90.74% (28.99%)
Accuracy: 98.39% (12.60%)
