Using Rpy2
==========

Covers the (very) basics of installing and running rpy2
------

to install:


Installing R packages
------


In [3]:
import rpy2.robjects.packages as rpackages #package functions as python library
from rpy2.robjects.vectors import StrVector #to explicitly create string vector object

packnames = ('ggplot2', 'ddply2')

packnames_to_install = [x for x in packnames if not rpackages.isinstalled(x)]
if len(packnames_to_install) > 0:
    utils.install_packages(StrVector(packnames_to_install))


--- Please select a CRAN mirror for use in this session ---
CRAN mirror 

  1: 0-Cloud                        2: Algeria                    
  3: Argentina (La Plata)           4: Australia (Canberra)       
  5: Australia (Melbourne)          6: Austria                    
  7: Belgium                        8: Brazil (BA)                
  9: Brazil (PR)                   10: Brazil (RJ)                
 11: Brazil (SP 1)                 12: Brazil (SP 2)              
 13: Canada (BC)                   14: Canada (NS)                
 15: Canada (ON)                   16: Canada (QC 1)              
 17: Canada (QC 2)                 18: Chile                      
 19: China (Beijing 1)             20: China (Beijing 2)          
 21: China (Beijing 3)             22: China (Beijing 4)          
 23: China (Hefei)                 24: China (Lanzhou)            
 25: China (Xiamen)                26: Colombia (Cali)            
 27: Czech Republic                28: Denmark         

Run R code
-------- 

In [10]:
from rpy2 import robjects
code = '''
    sqr <- function(x){
        return(x*x)
    }
    sqr(4) #return 16
'''
robjects.r(code) #returns vector and creates function

<FloatVector - Python:0x1078e6cf8 / R:0x7fb1f793f158>
[16.000000]

In [12]:
print robjects.globalenv['sqr'].r_repr()

function (x) 
{
    return(x * x)
}


Function is also runnable

In [14]:
sqr = robjects.globalenv['sqr']
sqr(4)[0]

16

Creating vectors
------

In [20]:
from rpy2.robjects.vectors import StrVector, IntVector, FloatVector
strs = StrVector(['abc', 'def'])
ints = IntVector([1,2,3])
floats = FloatVector([1.3,3.3,10.3])
print '\n\n'.join([strs.r_repr(), ints.r_repr(), floats.r_repr()])

c("abc", "def")

1:3

c(1.3, 3.3, 10.3)


Functions
------

Normal python functions work on R vectors

In [28]:
sorted(floats)

[1.3, 3.3, 10.3]

R functions return R objects

In [31]:
rsort = robjects.r['sort']
print rsort(floats).r_repr()

c(1.3, 3.3, 10.3)


ggplot2 
-----

More detailed instructions [here](http://rpy.sourceforge.net/rpy2/doc-2.5/html/graphics.html#graphics)

In [44]:
import rpy2.robjects.lib.ggplot2 as ggplot2
from rpy2.robjects.packages import importr, data

In [45]:
base, datasets = importr('base'), importr('datasets')
mtcars = data(datasets).fetch('mtcars')['mtcars']

In [46]:
grdevices = importr('grDevices')
grdevices.png(file="/Users/bshur/Downloads/file.png", width=512, height=512)

pp = ggplot2.ggplot(mtcars) + \
     ggplot2.aes_string(x='wt', y='mpg', col='factor(cyl)') + \
     ggplot2.geom_point() + \
     ggplot2.geom_smooth(ggplot2.aes_string(group = 'cyl'),
                         method = 'lm')
pp.plot()

grdevices.dev_off()

<IntVector - Python:0x1078d9560 / R:0x7fb1f98b0d48>
[       1]

Linear modeling
------

In [47]:
from rpy2.robjects import FloatVector
from rpy2.robjects.packages import importr
stats = importr('stats')
base = importr('base')

ctl = FloatVector([4.17,5.58,5.18,6.11,4.50,4.61,5.17,4.53,5.33,5.14])
trt = FloatVector([4.81,4.17,4.41,3.59,5.87,3.83,6.03,4.89,4.32,4.69])
group = base.gl(2, 10, 20, labels = ["Ctl","Trt"])
weight = ctl + trt

robjects.globalenv["weight"] = weight
robjects.globalenv["group"] = group
lm_D9 = stats.lm("weight ~ group")
print(stats.anova(lm_D9))

# omitting the intercept
lm_D90 = stats.lm("weight ~ group - 1")
print(base.summary(lm_D90))

Analysis of Variance Table

Response: weight
          Df Sum Sq Mean Sq F value Pr(>F)
group      1 0.6882 0.68820  1.4191  0.249
Residuals 18 8.7292 0.48496               


Call:
function (formula, data, subset, weights, na.action, method = "qr", 
    model = TRUE, x = FALSE, y = FALSE, qr = TRUE, singular.ok = TRUE, 
    contrasts = NULL, offset, ...) 
{
    ret.x <- x
    ret.y <- y
    cl <- match.call()
    mf <- match.call(expand.dots = FALSE)
    m <- match(c("formula", "data", "subset", "weights", "na.action", 
        "offset"), names(mf), 0L)
    mf <- mf[c(1L, m)]
    mf$drop.unused.levels <- TRUE
    mf[[1L]] <- as.name("model.frame")
    mf <- eval(mf, parent.frame())
    if (method == "model.frame") 
        return(mf)
    else if (method != "qr") 
            method), domain = NA)
    mt <- attr(mf, "terms")
    y <- model.response(mf, "numeric")
    w <- as.vector(model.weights(mf))
    if (!is.null(w) && !is.numeric(w)) 
        stop("'weights' must be a numeric vect