Permalink
Switch branches/tags
Nothing to show
Find file Copy path
Fetching contributors…
Cannot retrieve contributors at this time
executable file 136 lines (119 sloc) 3.1 KB
#!/bin/bash
# Rio: Load CSV from stdin into R as a data.frame, execute given commands, and get the output as CSV or PNG on stdout
#
# Example usage:
# $ < seq 100 | Rio -nf sum (same as Rio -ne 'sum(df)')
#
# $ curl -s 'https://raw.githubusercontent.com/pydata/pandas/master/pandas/tests/data/iris.csv' > iris.csv
# $ < iris.csv Rio -e 'df$SepalLength^2'
# $ < iris.csv Rio -f summary
# $ < iris.csv Rio -se 'sqldf("select Name from df where df.SepalLength > 7")'
# $ < iris.csv Rio -ge 'g+geom_point(aes(x=SepalLength,y=SepalWidth,colour=Name))' > iris.png
#
# Dependency: R (with optionally the R packages ggplot2, dplyr, tidyr, and sqldf)
#
# Author: http://jeroenjanssens.com
usage() {
cat << EOF
Rio: Load CSV from stdin into R as a data.frame, execute given commands, and get the output as CSV on stdout
usage: Rio OPTIONS
OPTIONS:
-d Delimiter
-e Commands to execute
-f Single command to execute on data.frame
-h Show this message
-g Import ggplot2
-n CSV has no header
-r Import dplyr and tidyr
-s Import sqldf
-b Use same settings as used for book Data Science at the Command Line
-v Verbose
EOF
}
finish() {
rm -f $IN $OUT ${OUT%.png} ${ERR%.err}
## Removes error file if error file is empty.
if [[ ! -s $ERR ]]; then
rm -f $ERR
fi
rm -f Rplots.pdf
}
trap finish EXIT
callR() {
Rscript --vanilla -e "options(scipen=999);df<-read.csv('${IN}',header=${HEADER},sep='${DELIMITER}',stringsAsFactors=F);${REQUIRES}${SCRIPT}last<-.Last.value;if(is.matrix(last)){last<-as.data.frame(last)};if(is.data.frame(last)){write.table(last,'${OUT}',sep=',',quote=T,qmethod='double',row.names=F,col.names=${HEADER});}else if(is.vector(last)){cat(last,sep='\\\n', file='${OUT}')}else if(exists('is.ggplot')&&is.ggplot(last)){ggsave('${OUT}',last,dpi=${RIO_DPI-72},units='cm',width=20,height=15);}else{sink('${OUT}');print(last);}"
}
SCRIPT=
REQUIRES=
DELIMITER=","
HEADER="T"
VERBOSE=false
# OSX `mktemp' requires a temp file template, but Linux `mktemp' has it as optional.
# This explicitly uses a template, which works for both. The $TMPDIR is in case
# it isn't set as an enviroment variable, assumes you have /tmp.
if [ -z "$TMPDIR" ]; then
TMPDIR=/tmp/
fi
IN=$(mktemp ${TMPDIR}/Rio-XXXXXXXX)
OUT=$(mktemp ${TMPDIR}/Rio-XXXXXXXX).png
ERR=$(mktemp ${TMPDIR}/Rio-XXXXXXXX).err
while getopts "d:hgnprsve:f:b" OPTION
do
case $OPTION in
b)
RIO_DPI=300
;;
d)
DELIMITER=$OPTARG
;;
e)
SCRIPT=$OPTARG
if ! echo $SCRIPT | grep -qe "; *$"
then
SCRIPT="${SCRIPT};"
fi
;;
f)
SCRIPT="${OPTARG}(df);"
;;
h)
usage
exit 1
;;
g)
REQUIRES="${REQUIRES}require(ggplot2);g<-ggplot(df);"
;;
n)
HEADER="F"
;;
r)
REQUIRES="${REQUIRES}require(dplyr);require(tidyr);"
;;
s)
REQUIRES="${REQUIRES}require(sqldf);"
;;
v)
VERBOSE=true
;;
?)
usage
exit
;;
esac
done
cat /dev/stdin > $IN
if $VERBOSE
then
callR
else
callR > $ERR 2>&1
fi
if [[ ! -f $OUT ]]; then
cat $ERR
else
RESULT="$(cat $OUT)"
if [ "$RESULT" == "NULL" ]; then
cat $ERR
else
cat $OUT
fi
fi