In [None]:
# default_exp converter

# Converter

> API details.

In [None]:
#hide
from nbdev.showdoc import *

In [None]:
#export
import collections
import datetime
import numpy as np
import pandas as pd
# For some reason just importing rpy2 is not working, so we need to import each submodule explicitely
import rpy2.robjects
import rpy2.robjects.numpy2ri

## Functions

In [None]:
#export
def dict2ri(D):
    """
    Convert a dictionary to an R ListVector
    """
    if not isinstance(D, dict):
        raise ValueError("Expected dict. Got '%s'." % str(type(D)))
    #fi
    return rpy2.robjects.ListVector(D.items())
#edef

In [None]:
dict2ri({"test":2})

0,1
test,[RTYPES.INTSXP]


In [None]:
#export
def ri2dict(D):
    """
    Convert a StrVector to a dictionary
    Note, that this conversion is not the inverse of dict2ri, as R values are always lists...
    Thus, ri2dict(dict2ri({'a': 1})) -> { 'a': [1]}
    """
    
    return dict(zip(D.names, map(list, list(D))))
#edef

In [None]:
ri2dict(dict2ri({"test":2}))

{'test': [2]}

In [None]:
#export
def tuple2ri(T):
    """
    Convert a tuple to an array.
    It is first converted to a numpy array, and then, based on that to an R array
    """
    if not isinstance(T, tuple):
        raise ValueError("Expected tuple. Got '%s'." % str(type(T)))
    #fi
    
    return rpy2.robjects.numpy2ri.numpy2rpy(np.array(T))
#edef

In [None]:
tuple2ri((2,3))

<rpy2.rinterface.IntSexpVector object at 0x184730140> [RTYPES.INTSXP]

In [None]:
#export
def none2ri(N):
    """
    Convert a None type to NULL
    """
    return rpy2.robjects.NULL
#edef

In [None]:
none2ri(None)

<rpy2.rinterface_lib.sexp.NULLType object at 0x110861440> [RTYPES.NILSXP]

In [None]:
#export
def datetime2ri(D):
    """
    Convert a datetime object to an R object
    """
    import rpy2.robjects.packages as rpackages
    base = rpackages.importr("base") 
    rdate = base.as_POSIXlt(D.strftime("%Y-%m-%d %H:%M:%S"), format="%Y-%m-%d %H:%M:%S")
    return rdate
#edef

> Warning: Not certain whether this is the intended conversion.

In [None]:
datetime2ri(pd.to_datetime("12. March 2020"))

0,1
[no name],FloatVector with 1 elements.  0.000000
0.000000,
[no name],IntVector with 1 elements.  0
0,
[no name],IntVector with 1 elements.  0
0,
...,...
[no name],IntVector with 1 elements.  0
0,
[no name],StrVector with 1 elements.  'EET'

0
0.0

0
0

0
0

0
0

0
'EET'

0
NA_integer_


In [None]:
#export
def dataframe_non_string_category_wrapper(S):
    """
    Convert a pandas series to an R object, but convert categories to strings, to overcome the following error:
        `Converting pandas "Category" series to R factor is only possible when categories are strings`
    """
    
    from rpy2.robjects import pandas2ri
    
    def convert_to_str(v):
        if pd.isna(v):
            return None
        else:
            return str(v)
        #fi
    #edef
    
    if S.dtype.name == 'category':
        S = S.apply(convert_to_str).astype('category')
    #fi
    
    return pandas2ri.py2rpy_pandasseries(S)

In [None]:
dataframe_non_string_category_wrapper(pd.Series(["Thies", "Gehrmann"]).astype('object'))

<rpy2.rinterface_lib.sexp.StrSexpVector object at 0x110f42b00> [RTYPES.STRSXP]

In [None]:
#export
def converter():
    """
    Return an rpy2 converter that automatically converts several formats
    
    Automatically added:
    * pandas converted
    * numpy objects
    * dict
    * tuple
    """
    from rpy2.robjects import numpy2ri
    from rpy2.robjects import pandas2ri
    
    my_converter  = rpy2.robjects.conversion.Converter('BIU converter')
    
    my_converter += rpy2.robjects.default_converter
    my_converter += numpy2ri.converter
    my_converter += pandas2ri.converter
    
    my_converter.rpy2py.register(rpy2.rinterface.ListSexpVector, lambda x: x)
    
    my_converter.py2rpy.register(dict, dict2ri)
    my_converter.rpy2py.register(rpy2.robjects.ListVector, ri2dict)
    
    my_converter.py2rpy.register(tuple, tuple2ri)
    my_converter.py2rpy.register(type(None), none2ri)
    my_converter.py2rpy.register(datetime.datetime, datetime2ri)
    
    my_converter.py2rpy.register(pd.core.series.Series, dataframe_non_string_category_wrapper)


    return my_converter

In [None]:
class R(object):
    """
    A wrapper for rpy2, which somewhat mimics the ipython magic functions.
    Basically, it handles the automatic conversion of some python objects to R objects.
    Further, it allows you to automatically push python objects, call code and get converted objects back to python.
    
    Example usage:
    --------------
    
    r = biu.R()
    x = pd.DataFrame([[1,2,3],[4,5,6]])
    r.push(x=x)
    r('y = x * 2')
    y = r.get('y')
    
    Or, altogether:
    ---------------
    y = r('y=x*2', push=dict(x=x), get='y')
    
    Doing a lot at the same time:
    -----------------------------
    
    y, z = r('''
        y = x * 2
        z = x + 2
        ''', push=dict(x=x), get=['y', 'z'])
    
    
    """
    _converter = None
    
    def __init__(self):
        """
        Initialize the rpy2 wrapper
        """
        self._converter  = converter()
    #edef
    
    def add_converter(self, obj_type, convert_func):
        """
        Add a converter to the object, if there is one missing.
        
        parameters:
        -----------
        obj_type: the type of the object that this converter relates to
        convert_func: function. The function that should be applied
        """

        self._converter.py2rpy.register(obj_type, convert_func)
    #edef
        
    def push(self, **kwargs):
        """
        Push values to R, based on the current converter
        
        parameters:
        -----------
        kwargs: Dictionary of values
        
        Example usage:
        --------------
        
        r.push(x=10, y='pool', ages=[10, 50, 100])
        """
        
        if kwargs is None:
            return None
        #fi
        
        for (k,v) in kwargs.items():
            with rpy2.robjects.conversion.localconverter(self._converter) as cv:
                rpy2.robjects.r.assign(k, v)
            #ewith
        #efor
    #edef
        
    def get(self, name, *pargs):
        """
        Get a value from R, based on the current converter
        
        parameters:
        -----------
        name: return this variable from the R instance
        *pargs, if specified, return a tuple of name + those in pargs
        
        returns:
        --------
        Either a converted R object, or
        if pargs is specified, then a tuple of values
        """
        with rpy2.robjects.conversion.localconverter(self._converter):
            if len(pargs)  == 0:
                return rpy2.robjects.globalenv.find(name)
            else:
                return [ rpy2.robjects.globalenv.find(n) for n in ([name] + list(pargs)) ]
            #fi
        #ewith
            
    #edef
    
    def exec(self, cmd, push=None, get=True):
        """
        Call R code, pushing values, and returning values if necessary
        
        parameters:
        -----------
        cmd: The R code you want to execute
        push: Dictionary of name:value pairs that you want to introduce to R session
        get: List of R object values that you want to get back
        
        returns:
        ---------
        if get is False, it returns nothing.
        If get is True, it returns the returned value from the R code.
        if get is not None, it returns a value, as specified by the get() function.
        """
        if push is None:
            push = {}
        #fi
        
        self.push(**push)
        
        res = rpy2.robjects.r(cmd)
        
        if isinstance(get, bool) and get:
            return self._converter.rpy2py(res)
        elif isinstance(get, bool) and (not get):
            return None
        elif isinstance(get, str):
            return self.get(get)
        else:
            return self.get(*get)
        #fi
    #edef
    
    def __call__(self, cmd, push=None, get=True):
        """
        Call R code, pushing values, and returning values if necessary
        
        parameters:
        -----------
        cmd: The R code you want to execute
        push: Dictionary of name:value pairs that you want to introduce to R session
        get: List of R object values that you want to get back
        
        returns:
        ---------
        if get is False, it returns nothing.
        If get is True, it returns the returned value from the R code.
        if get is not None, it returns a value, as specified by the get() function.
        """
        return self.exec(cmd, push, get)
    #edef
    
    def __str__(self):
        return "<BIU Rpy2 wrapper. See usage examples on github.>"
    #edef
    
    def __repr__(self):
        """
        Prepare a string representation of the class
        """
        return str(self)
    #edef
#eclass

In [None]:
r = R()
r.push(n=5000, mean=10, var=6, question="How many apples do you eat per day?")
r("""
    dist <- rnorm(n, mean, var)
    print(mean(dist))
    print(question)
""", get=False)

[1] 9.991866
[1] "How many apples do you eat per day?"
