In [101]:
from rpy2.robjects import r, pandas2ri
import pandas as pd
def data(name): 
    return pd.DataFrame(pandas2ri.ri2py(r[name]))

# Import data
mtcars = data('mtcars')
# https://stat.ethz.ch/R-manual/R-devel/library/datasets/html/state.html
states = pd.concat([data("state.name"),
                   data("state.abb"),
                   data('state.x77')],axis=1)
states.columns = ["name","abb","pop","income","illiteracy","lifeExp","murder","HS Grad","frost","area"]

state_arrests = pd.concat([data("state.name"),
                   data("state.abb"),
                   data('USArrests')],axis=1)
state_arrests.columns = ['Name','Abb'] + state_arrests.columns.tolist()[2:]

# Make a frequency count by distinct values of 
# column(s) listed in 'groupbyvars'
# Returns pandas dataframe
def tidy_count(df,groupbyvars):
    return(df.groupby(groupbyvars).size().reset_index().rename(columns={0: "n"}))

In [65]:
states.head(5)

Unnamed: 0,name,abb,pop,income,illiteracy,lifeExp,murder,HS Grad,frost,area
0,Alabama,AL,3615.0,3624.0,2.1,69.05,15.1,41.3,20.0,50708.0
1,Alaska,AK,365.0,6315.0,1.5,69.31,11.3,66.7,152.0,566432.0
2,Arizona,AZ,2212.0,4530.0,1.8,70.55,7.8,58.1,15.0,113417.0
3,Arkansas,AR,2110.0,3378.0,1.9,70.66,10.1,39.9,65.0,51945.0
4,California,CA,21198.0,5114.0,1.1,71.71,10.3,62.6,20.0,156361.0


In [62]:
state_arrests.head(5)

Unnamed: 0,Name,Abb,Murder,Assault,UrbanPop,Rape
0,Alabama,AL,13.2,236,58,21.2
1,Alaska,AK,10.0,263,48,44.5
2,Arizona,AZ,8.1,294,80,31.0
3,Arkansas,AR,8.8,190,50,19.5
4,California,CA,9.0,276,91,40.6


In [5]:
mtcars.head(5)

Unnamed: 0,mpg,cyl,disp,hp,drat,wt,qsec,vs,am,gear,carb
0,21.0,6.0,160.0,110.0,3.9,2.62,16.46,0.0,1.0,4.0,4.0
1,21.0,6.0,160.0,110.0,3.9,2.875,17.02,0.0,1.0,4.0,4.0
2,22.8,4.0,108.0,93.0,3.85,2.32,18.61,1.0,1.0,4.0,1.0
3,21.4,6.0,258.0,110.0,3.08,3.215,19.44,1.0,0.0,3.0,1.0
4,18.7,8.0,360.0,175.0,3.15,3.44,17.02,0.0,0.0,3.0,2.0


In [71]:
tidy_count(mtcars,["cyl","am"])

Unnamed: 0,cyl,am,n
0,4.0,0.0,3
1,4.0,1.0,8
2,6.0,0.0,4
3,6.0,1.0,3
4,8.0,0.0,12
5,8.0,1.0,2


In [74]:
pd.merge(state,state_arrests,left_on="abb",right_on="Abb").head(5)

Unnamed: 0,name,abb,pop,income,illiteracy,lifeExp,murder,HS Grad,frost,area,Name,Abb,Murder,Assault,UrbanPop,Rape
0,Alabama,AL,3615.0,3624.0,2.1,69.05,15.1,41.3,20.0,50708.0,Alabama,AL,13.2,236,58,21.2
1,Alaska,AK,365.0,6315.0,1.5,69.31,11.3,66.7,152.0,566432.0,Alaska,AK,10.0,263,48,44.5
2,Arizona,AZ,2212.0,4530.0,1.8,70.55,7.8,58.1,15.0,113417.0,Arizona,AZ,8.1,294,80,31.0
3,Arkansas,AR,2110.0,3378.0,1.9,70.66,10.1,39.9,65.0,51945.0,Arkansas,AR,8.8,190,50,19.5
4,California,CA,21198.0,5114.0,1.1,71.71,10.3,62.6,20.0,156361.0,California,CA,9.0,276,91,40.6


In [69]:
def print_values(**kwargs):
    for key, value in kwargs.items():
        print("The value of {} is {}".format(key, value))

print_values(left_on="this var", your_name="hulk")

The value of left_on is this var
The value of your_name is hulk


In [99]:
# Experimental wrapper function for pandas merge
# Drops column specified in right_on
# However this isn't always desired behaviour
def tidy_merge(*args,**kwargs):
    try:
        right_on=kwargs['right_on']
    except:
        right_on=''
    if right_on == '':
        return(pd.merge(*args,**kwargs))
    else:
        return(pd.merge(*args,**kwargs).drop(right_on,axis=1))

In [100]:
tidy_merge(states,state_arrests.rename(columns={'Abb':'abb'}),left_on="abb",right_on="abb",how="inner").head(5)

Unnamed: 0,name,pop,income,illiteracy,lifeExp,murder,HS Grad,frost,area,Name,Murder,Assault,UrbanPop,Rape
0,Alabama,3615.0,3624.0,2.1,69.05,15.1,41.3,20.0,50708.0,Alabama,13.2,236,58,21.2
1,Alaska,365.0,6315.0,1.5,69.31,11.3,66.7,152.0,566432.0,Alaska,10.0,263,48,44.5
2,Arizona,2212.0,4530.0,1.8,70.55,7.8,58.1,15.0,113417.0,Arizona,8.1,294,80,31.0
3,Arkansas,2110.0,3378.0,1.9,70.66,10.1,39.9,65.0,51945.0,Arkansas,8.8,190,50,19.5
4,California,21198.0,5114.0,1.1,71.71,10.3,62.6,20.0,156361.0,California,9.0,276,91,40.6


In [106]:
state_arrests.drop('Murder',axis=1).\
    merge(states.drop('name',axis=1),left_on="Abb",right_on='abb').head(5)

Unnamed: 0,Name,Abb,Assault,UrbanPop,Rape,abb,pop,income,illiteracy,lifeExp,murder,HS Grad,frost,area
0,Alabama,AL,236,58,21.2,AL,3615.0,3624.0,2.1,69.05,15.1,41.3,20.0,50708.0
1,Alaska,AK,263,48,44.5,AK,365.0,6315.0,1.5,69.31,11.3,66.7,152.0,566432.0
2,Arizona,AZ,294,80,31.0,AZ,2212.0,4530.0,1.8,70.55,7.8,58.1,15.0,113417.0
3,Arkansas,AR,190,50,19.5,AR,2110.0,3378.0,1.9,70.66,10.1,39.9,65.0,51945.0
4,California,CA,276,91,40.6,CA,21198.0,5114.0,1.1,71.71,10.3,62.6,20.0,156361.0
