In [20]:
from matplotlib import pyplot as plt

%matplotlib inline
from IPython.display import set_matplotlib_formats
set_matplotlib_formats('pdf', 'svg')

import pandas as pd
pd.options.display.mpl_style = 'default'

from mpltools import style
from mpltools import layout

style.use('ggplot')

## see: https://stackoverflow.com/questions/19536817/manipulate-html-module-font-size-in-ipython-notebook
class sizeme():
    """ Class to change html fontsize of object's representation"""
    def __init__(self,ob, size=30, height=100):
        self.ob = ob
        self.size = size
        self.height = height
    def _repr_html_(self):
        repl_tuple = (self.size, self.height, self.ob._repr_html_())
        return u'<span style="font-size:{0}%; line-height:{1}%">{2}</span>'.format(*repl_tuple)
    
## see https://stackoverflow.com/questions/14656852/how-to-use-pandas-dataframes-and-numpy-arrays-in-rpy2
## and http://ipython.org/ipython-doc/rel-0.13/config/extensions/rmagic.html
## note there's a ri2pandas() to convert back.
## but note, rpy2 2.4.0 and later automagically translates dataframes: 
## https://stackoverflow.com/questions/20630121/pandas-how-to-convert-r-dataframe-back-to-pandas
%load_ext rpy2.ipython
%Rdevice svg
#import rpy2.robjects.pandas2ri as p2r
#rdf = p2r.pandas2ri(info)
#%Rpush rdf

The rpy2.ipython extension is already loaded. To reload it, use:
  %reload_ext rpy2.ipython


In [2]:
%R load("9. try Boruta and then randomForest on left-out replicates, ranked data, varying number of features, new growth values.RData")

array(['x', 'xx', 'inp_quantiles', 'Y', 'qqq', 'getY', '.Random.seed',
       'tmp_lst', 'do_it', 'b', 'mns', 'features', 'col_groups', 'nlev',
       'info', 'NOTEBOOK_FULL_PATH', 'imp_genes', 'n_genes',
       'b_trees_list', 'my.util', 'good_gene', 'cond_type', 'tmp',
       'n_features', 'expr_levels', 'x.ranks', 'genes', 'results',
       'imp_genes_list', 'results_noleaveout'], 
      dtype='|S18')

In [58]:
%%R
imp_genes <- imp_genes_list$electron_donor
xx <- as.matrix(x); colnames(xx) <- gsub('.','-',names(Y),fixed=T)
Y <- getY('electron_donor', info)
xx <- cbind(xx[,Y=='lactate'], xx[,Y=='pyruvate'], xx[,Y=='hydrogen'])
xx <- as.data.frame(xx[imp_genes,])

electron_donor 3 


In [59]:
%Rpull xx
%Rpull imp_genes
xx = xx.set_index(imp_genes)
sizeme(xx.head())
writer = pd.ExcelWriter('dvh_features_expression.xlsx')
xx.to_excel( writer, 'electron_donor' )
np.log10(xx+1).to_excel( writer, 'log10_electron_donor' )
#writer.save()

In [60]:
%%R
imp_genes <- imp_genes_list$electron_acceptor
xx <- as.matrix(x); colnames(xx) <- gsub('.','-',names(Y),fixed=T)
Y <- getY('electron_acceptor', info)
xx <- cbind(xx[,Y=='sulfate'], xx[,Y=='proton/hydrogenotroph'], xx[,Y=='none'])
xx <- as.data.frame(xx[imp_genes,])

electron_acceptor 3 


In [61]:
%Rpull xx
%Rpull imp_genes
xx = xx.set_index(imp_genes)
sizeme(xx.head())
#writer = pd.ExcelWriter('dvh_electron_donor.xlsx')
xx.to_excel( writer, 'electron_acceptor' )
np.log10(xx+1).to_excel( writer, 'log10_electron_acceptor' )
#writer.save()

In [62]:
%%R
imp_genes <- imp_genes_list$growth_rate
xx <- as.matrix(x); colnames(xx) <- gsub('.','-',names(Y),fixed=T)
Y <- getY('growth_rate', info)
xx <- cbind(xx[,Y=='med_growth'], xx[,Y=='no_growth'])
xx <- as.data.frame(xx[imp_genes,])

growth_rate 2 


In [63]:
%Rpull xx
%Rpull imp_genes
xx = xx.set_index(imp_genes)
sizeme(xx.head())
#writer = pd.ExcelWriter('dvh_electron_donor.xlsx')
xx.to_excel( writer, 'growth_rate' )
np.log10(xx+1).to_excel( writer, 'log10_growth_rate' )
#writer.save()

In [64]:
%%R
imp_genes <- imp_genes_list$temperature
xx <- as.matrix(x); colnames(xx) <- gsub('.','-',names(Y),fixed=T)
Y <- getY('temperature', info)
xx <- cbind(xx[,Y==TRUE], xx[,Y==FALSE])
xx <- as.data.frame(xx[imp_genes,])

temperature 2 


In [65]:
%Rpull xx
%Rpull imp_genes
xx = xx.set_index(imp_genes)
sizeme(xx.head())
#writer = pd.ExcelWriter('dvh_electron_donor.xlsx')
xx.to_excel( writer, 'temperature' )
np.log10(xx+1).to_excel( writer, 'log10_temperature' )
#writer.save()

In [66]:
%%R
qqq = new.env()
load('11. classify dvh mono vs. coculture only.RData', envir=qqq)

In [67]:
%%R
imp_genes <- qqq$imp_genes
Y <- as.character(info$organisms)
xx = as.matrix(x[,colnames(x)%in%colnames(x.ranks)])
colnames(xx) <- names(Y) <-gsub('.','-',colnames(xx),fixed=T)
xx <- cbind(xx[,Y=='D vulgaris Hildenborugh'], xx[,Y!='D vulgaris Hildenborugh'])
xx <- as.data.frame(xx[gsub('.','-',imp_genes,fixed=T),])

In [68]:
%Rpull xx
%Rpull imp_genes
xx = xx.set_index(imp_genes)
sizeme(xx.head())
#writer = pd.ExcelWriter('dvh_electron_donor.xlsx')
xx.to_excel( writer, 'mono_or_coculture' )
np.log10(xx+1).to_excel( writer, 'log10_mono_or_coculture' )
#writer.save()

In [69]:
writer.save()