From 5eb6a1090f1578535366aeaf78147ee0cc8def84 Mon Sep 17 00:00:00 2001 From: Jason Bryer Date: Tue, 20 Nov 2012 08:24:30 -0500 Subject: [PATCH] Database functions have been moved to the sqlutils package. --- .Rbuildignore | 3 +- DESCRIPTION | 5 +- NAMESPACE | 8 --- R/dbaccess.R | 123 ------------------------------------------ R/irutils-package.R | 6 +-- man/cacheQuery.Rd | 16 ------ man/execQuery.Rd | 19 ------- man/getParameters.Rd | 14 ----- man/getQueries.Rd | 14 ----- man/getQuery.Rd | 13 ----- man/getQueryDesc.Rd | 13 ----- man/getSQLRepos.Rd | 13 ----- man/setSQLRepos.Rd | 13 ----- vignettes/irutils.Rnw | 24 --------- 14 files changed, 6 insertions(+), 278 deletions(-) delete mode 100644 R/dbaccess.R delete mode 100644 man/cacheQuery.Rd delete mode 100644 man/execQuery.Rd delete mode 100644 man/getParameters.Rd delete mode 100644 man/getQueries.Rd delete mode 100644 man/getQuery.Rd delete mode 100644 man/getQueryDesc.Rd delete mode 100644 man/getSQLRepos.Rd delete mode 100644 man/setSQLRepos.Rd diff --git a/.Rbuildignore b/.Rbuildignore index ce91f44..1ff7dbf 100644 --- a/.Rbuildignore +++ b/.Rbuildignore @@ -1 +1,2 @@ -^\.Rproj\.user$ +^\.Rproj\.user$ +^.*\.Rproj$ diff --git a/DESCRIPTION b/DESCRIPTION index c2d5a76..d30c5a6 100644 --- a/DESCRIPTION +++ b/DESCRIPTION @@ -13,10 +13,11 @@ Depends: xtable, psych, reshape, - utils + utils, + stringr, + roxygen2 Collate: 'age.R' - 'dbaccess.R' 'irutils-package.R' 'saveIPEDStoDB.R' 'LocalRepos.R' diff --git a/NAMESPACE b/NAMESPACE index 9d72841..a2857d1 100644 --- a/NAMESPACE +++ b/NAMESPACE @@ -1,23 +1,15 @@ S3method(xtable,demographics) export(adjustedTableLaTeX) -export(cacheQuery) export(createLocalRepos) export(demographics) -export(execQuery) export(getAge) export(getAgeGroups) export(getAgeGroupsIPEDS) -export(getParameters) -export(getQueries) -export(getQuery) -export(getQueryDesc) -export(getSQLRepos) export(local.available.packages) export(local.install.packages) export(lsos) export(pie) export(saveIPEDStoDB) -export(setSQLRepos) export(updateLocalRepos) export(xtable.demographics) import(ggplot2) diff --git a/R/dbaccess.R b/R/dbaccess.R deleted file mode 100644 index d06202f..0000000 --- a/R/dbaccess.R +++ /dev/null @@ -1,123 +0,0 @@ -#' Returns the current directory containing SQL files. -#' @author Jason Bryer -#' @export -getSQLRepos <- function() { - pkgEnv <- pos.to.env(match('package:irutils', search())) - sqlrepos <- get("sqlrepos", envir=pkgEnv) - if(is.null(sqlrepos)) { - sqlrepos <- paste(system.file(package='irutils'), '/data', sep='') - } - return(sqlrepos) -} - -#' Sets the current directory containing SQL files. -#' @author Jason Bryer -#' @export -setSQLRepos <- function(repos) { - pkgEnv = pos.to.env(match('package:irutils', search())) - assign("sqlrepos", - value=paste(system.file(package='irutils'), '/data', sep=''), - envir=pkgEnv) -} - -#' Returns a list of available queries in the current repository. -#' @author Jason Bryer -#' @export -getQueries <- function() { - files = list.files(path=getSQLRepos(), pattern="*.sql") - return( substr(files, 0, nchar(files)-4) ) -} - -#' Executes the specified query and returns a data frame. This function currently -#' supports RODBC, RSQLite, and RMySQL. For other databases, use getQuery() and -#' execute the SQL statement using the appropriate database connection. -#' @author Jason Bryer -#' @export -execQuery <- function(query=NULL, connection=NULL, ...) { - sql = getQuery(query=query, ...) - df <- NULL - if(class(connection) == 'RODBC' ) { - df <- sqlQuery(connection, sql) - } else if(class(connection) == 'RSQLite') { - df <- dbSendQuery(connection, sql) - } else if(class(connection) == 'RMySQL') { - df <- dbSendQuery(connection, sql) - } else { - stop('Unsupported database connection.') - } - return(df) -} - -#' Returns the query as a string. -#' @author Jason Bryer -#' @export -getQuery <- function(query=NULL, ...) { - sql = scan(paste(getSQLRepos(), "/", query, ".sql", sep=''), what="character", - sep=';', multi.line=FALSE, comment.char=c("#"), quiet=TRUE, quote=NULL) - sql = paste(sql, collapse=" ") - parmvals = unlist(list(...)) - if(length(parmvals)>0) { - for(i in 1:length(parmvals)) { - sql = gsub(paste(":", names(parmvals)[i], ":", sep=''), parmvals[i], sql) - } - } - return(sql) -} - -#' Returns the query as a string. For internal use only. -#' @author Jason Bryer -#' @export -getQueryDesc <- function(query=NULL, ...) { - desc = '' - sql = scan(paste(getSQLRepos(), "/", query, ".sql", sep=''), what="character", - sep=';', multi.line=FALSE, comment.char=c(""), quiet=TRUE, quote=NULL) - for(i in 1:length(sql)) { - tmp = strsplit(sql[i], '#') - if(length(tmp[[1]]) > 1) { - desc = paste(desc, tmp[[1]][length(tmp[[1]])], sep=' ') - } - } - return(desc) -} - -#' This will first look in the given directory for a CSV version of the file, if -#' it exists, that will be read and returned. Otherwise it will execute the query -#' and then saves a CSV file. -#' @export -cacheQuery <- function(query=NULL, dir=getwd(), filename=NULL, ...) { - parms = getParameters(query) - parmvals = unlist(list(...)) - if(is.null(filename)) { - filename = paste(dir, '/', query, sep='') - if(length(parms) > 0) { - for(i in 1:length(parms)) { - filename = paste(filename, parms[i], parmvals[parms[i]], sep='.') - } - } - filename = paste(filename, 'csv', sep='.') - } - message(paste("Cached query file:", filename)) - if(file.exists(filename)) { - df = read.csv(filename) - } else { - df = execQuery(query=query, ...) - write.csv(df, filename, row.names=FALSE) - } - return(df) -} - - - -#' Returns the parameters that must be set for the given query. -#' @author Jason Bryer -#' @export -getParameters <- function(query) { - sql = getQuery(query) - pos = gregexpr(":", sql) - results = c() - for(i in seq(1, length(pos[[1]]), by=2)) { - results = c(results, (substr(sql, pos[[1]][i]+1, pos[[1]][i+1]-1)) ) - } - return(unique(results)) -} - diff --git a/R/irutils-package.R b/R/irutils-package.R index 337c949..37d7811 100644 --- a/R/irutils-package.R +++ b/R/irutils-package.R @@ -17,9 +17,5 @@ cranMain <- 'http://cran.r-project.org' #Main CRAN cranExtra <- 'http://www.stats.ox.ac.uk/pub/RWin' #Windows Binaries for some packages .onAttach <- function(libname, pkgname) { - pkgEnv = pos.to.env(match('package:irutils', search())) - assign("sqlrepos", - value=paste(system.file(package='irutils'), '/data', sep=''), - envir=pkgEnv) - #sqlrepos <<- NULL + pkgEnv = pos.to.env(match('package:irutils', search())) } diff --git a/man/cacheQuery.Rd b/man/cacheQuery.Rd deleted file mode 100644 index d25a9aa..0000000 --- a/man/cacheQuery.Rd +++ /dev/null @@ -1,16 +0,0 @@ -\name{cacheQuery} -\alias{cacheQuery} -\title{This will first look in the given directory for a CSV version of the file, if -it exists, that will be read and returned. Otherwise it will execute the query -and then saves a CSV file.} -\usage{ - cacheQuery(query = NULL, dir = getwd(), filename = NULL, - ...) -} -\description{ - This will first look in the given directory for a CSV - version of the file, if it exists, that will be read and - returned. Otherwise it will execute the query and then - saves a CSV file. -} - diff --git a/man/execQuery.Rd b/man/execQuery.Rd deleted file mode 100644 index 7e0bfe1..0000000 --- a/man/execQuery.Rd +++ /dev/null @@ -1,19 +0,0 @@ -\name{execQuery} -\alias{execQuery} -\title{Executes the specified query and returns a data frame. This function currently -supports RODBC, RSQLite, and RMySQL. For other databases, use getQuery() and -execute the SQL statement using the appropriate database connection.} -\usage{ - execQuery(query = NULL, connection = NULL, ...) -} -\description{ - Executes the specified query and returns a data frame. - This function currently supports RODBC, RSQLite, and - RMySQL. For other databases, use getQuery() and execute - the SQL statement using the appropriate database - connection. -} -\author{ - Jason Bryer -} - diff --git a/man/getParameters.Rd b/man/getParameters.Rd deleted file mode 100644 index adbc5bd..0000000 --- a/man/getParameters.Rd +++ /dev/null @@ -1,14 +0,0 @@ -\name{getParameters} -\alias{getParameters} -\title{Returns the parameters that must be set for the given query.} -\usage{ - getParameters(query) -} -\description{ - Returns the parameters that must be set for the given - query. -} -\author{ - Jason Bryer -} - diff --git a/man/getQueries.Rd b/man/getQueries.Rd deleted file mode 100644 index 5aad4d4..0000000 --- a/man/getQueries.Rd +++ /dev/null @@ -1,14 +0,0 @@ -\name{getQueries} -\alias{getQueries} -\title{Returns a list of available queries in the current repository.} -\usage{ - getQueries() -} -\description{ - Returns a list of available queries in the current - repository. -} -\author{ - Jason Bryer -} - diff --git a/man/getQuery.Rd b/man/getQuery.Rd deleted file mode 100644 index 8849f8d..0000000 --- a/man/getQuery.Rd +++ /dev/null @@ -1,13 +0,0 @@ -\name{getQuery} -\alias{getQuery} -\title{Returns the query as a string.} -\usage{ - getQuery(query = NULL, ...) -} -\description{ - Returns the query as a string. -} -\author{ - Jason Bryer -} - diff --git a/man/getQueryDesc.Rd b/man/getQueryDesc.Rd deleted file mode 100644 index 72b5fd4..0000000 --- a/man/getQueryDesc.Rd +++ /dev/null @@ -1,13 +0,0 @@ -\name{getQueryDesc} -\alias{getQueryDesc} -\title{Returns the query as a string. For internal use only.} -\usage{ - getQueryDesc(query = NULL, ...) -} -\description{ - Returns the query as a string. For internal use only. -} -\author{ - Jason Bryer -} - diff --git a/man/getSQLRepos.Rd b/man/getSQLRepos.Rd deleted file mode 100644 index 83dc682..0000000 --- a/man/getSQLRepos.Rd +++ /dev/null @@ -1,13 +0,0 @@ -\name{getSQLRepos} -\alias{getSQLRepos} -\title{Returns the current directory containing SQL files.} -\usage{ - getSQLRepos() -} -\description{ - Returns the current directory containing SQL files. -} -\author{ - Jason Bryer -} - diff --git a/man/setSQLRepos.Rd b/man/setSQLRepos.Rd deleted file mode 100644 index 2a574fd..0000000 --- a/man/setSQLRepos.Rd +++ /dev/null @@ -1,13 +0,0 @@ -\name{setSQLRepos} -\alias{setSQLRepos} -\title{Sets the current directory containing SQL files.} -\usage{ - setSQLRepos(repos) -} -\description{ - Sets the current directory containing SQL files. -} -\author{ - Jason Bryer -} - diff --git a/vignettes/irutils.Rnw b/vignettes/irutils.Rnw index 09ce9b3..4400841 100644 --- a/vignettes/irutils.Rnw +++ b/vignettes/irutils.Rnw @@ -63,30 +63,6 @@ ls('package:irutils') -\section{Database Access} - -For many Institutional Research offices the institutions student information system (SIS) is the most common source of data. Since virtually all SIS systems are backed by a database, extracting data requires extracting data using queries. Typically the language used to extract data is called structured query language (SQL) regardless if the database is provided by Oracle, Microsoft, or an open source options such as MySQL and PostgreSQL. There are a number of functions in this package that will faciliate extracting data from these databases directly into R. - -The database access functions provide an interface to a directory of SQL scripts. SQL scripts are simply a plain text file containing the query. The directory containing these files can be determined or set using the \texttt{getSQLRepos} and \texttt{setSQLRepos} functions, repsectively. - -<>= -getSQLRepos() -@ - -By convention, all SQL files must use a \texttt{.sql} file extension. The \texttt{getQueries} function will return a list of all the queries available in the current reposistory. - -<>= -getQueries() -@ - -The \texttt{getQueryDesc} and \texttt{getParameters} functions will provide some details about the query in question. In particular, the latter will return the parameters that are required for the query to execute. - -<>= -getQueryDesc('TestQuery') -getParameters('TestQuery') -@ - -There are two functions available for executing the query. The \texttt{execQuery} will execute the query and return a data frame. The \texttt{cacheQuery} however, will first look in the specified directory (by default the \texttt{dir} parameter is set to \texttt{getwd()}) for a CSV file that matches the currently request query. That is, the file name (which is returned when this function is executed) is built using a combination of the query name and parameters to uniquely identify it. This is useful when using Sweave and \LaTeX for document preparation where the function may be executed multiple times but the data does not change. It is considerably faster to read data from a flat file then it is to query the database each time. \subsection{Creating Your Own Query}