version 2.2.7

cran · Aug 27, 2017 · 9c9f3ef · 9c9f3ef
1 parent 9ccd7a3
commit 9c9f3ef
Show file tree

Hide file tree

Showing 463 changed files with 770 additions and 1,132 deletions.
diff --git a/DESCRIPTION b/DESCRIPTION
@@ -2,42 +2,37 @@ Package: qdap
 Type: Package
 Title: Bridging the Gap Between Qualitative Data and Quantitative
         Analysis
-Version: 2.2.5
-Date: 2016-06-15
-Authors@R: c(person("Bryan", "Goodrich", role = "ctb"), person("Dason",
-        "Kurkiewicz", role = "ctb"), person("Tyler", "Rinker", email =
-        "tyler.rinker@gmail.com", role = c("aut", "cre")))
+Version: 2.2.7
+Date: 2017-08-26
+Authors@R: c(person("Bryan", "Goodrich", role = "ctb"), person("Dason", "Kurkiewicz", role = "ctb"),
+            person("Tyler", "Rinker", email = "tyler.rinker@gmail.com", role = c("aut", "cre")))
 Maintainer: Tyler Rinker <tyler.rinker@gmail.com>
 Depends: R (>= 3.1.0), qdapDictionaries (>= 1.0.2), qdapRegex (>=
         0.1.2), qdapTools (>= 1.3.1), RColorBrewer
 Imports: chron, dplyr (>= 0.3), gdata, gender (>= 0.5.1), ggplot2 (>=
-        0.9.3.1), grid, gridExtra, igraph, methods, NLP, openNLP (>=
+        2.1.0), grid, gridExtra, igraph, methods, NLP, openNLP (>=
         0.2-1), parallel, plotrix, RCurl, reports, reshape2, scales,
         stringdist, tidyr, tm (>= 0.6.2), tools, venneuler, wordcloud,
         xlsx, XML
 Suggests: koRpus, knitr, lda, proxy, stringi, SnowballC, testthat
 LazyData: TRUE
 VignetteBuilder: knitr
-Description: Automates many of the tasks associated with quantitative
-        discourse analysis of transcripts containing discourse
-        including frequency counts of sentence types, words, sentences,
-        turns of talk, syllables and other assorted analysis tasks. The
-        package provides parsing tools for preparing transcript data.
-        Many functions enable the user to aggregate data by any number
-        of grouping variables, providing analysis and seamless
-        integration with other R packages that undertake higher level
-        analysis and visualization of text.  This affords the user a
-        more efficient and targeted analysis.  'qdap' is designed for
-        transcript analysis, however, many functions are applicable to
-        other areas of Text Mining/Natural Language Processing.
+Description: Automates many of the tasks associated with quantitative discourse analysis of transcripts
+            containing discourse including frequency counts of sentence types, words, sentences, turns of
+            talk, syllables and other assorted analysis tasks. The package provides parsing tools for
+            preparing transcript data. Many functions enable the user to aggregate data by any number of
+            grouping variables, providing analysis and seamless integration with other R packages that
+            undertake higher level analysis and visualization of text.  This affords the user a more
+            efficient and targeted analysis.  'qdap' is designed for transcript analysis, however, many
+            functions are applicable to other areas of Text Mining/Natural Language Processing.
 License: GPL-2
 URL: http://trinker.github.com/qdap/
 BugReports: http://github.com/trinker/qdap/issues
-RoxygenNote: 5.0.1
+RoxygenNote: 6.0.1
 NeedsCompilation: no
-Packaged: 2016-06-16 00:52:04 UTC; Tyler
+Packaged: 2017-08-26 16:04:28 UTC; Tyler
 Author: Bryan Goodrich [ctb],
   Dason Kurkiewicz [ctb],
   Tyler Rinker [aut, cre]
 Repository: CRAN
-Date/Publication: 2016-06-16 18:19:05
+Date/Publication: 2017-08-27 16:21:02 UTC
diff --git a/MD5 b/MD5
diff --git a/NEWS b/NEWS
@@ -17,7 +17,7 @@ And constructed with the following guidelines:
 * Bug fixes and misc. changes bumps the patch
 
 
-CHANGES IN qdap VERSION 2.2.5
+CHANGES IN qdap VERSION 2.2.5-
 ----------------------------------------------------------------
 
 
@@ -28,6 +28,13 @@ BUG FIXES
   of the alphabet.  The dictionary automatically uses `assume.first.correct=FALSE`
   if this occurs.  Reported by @CallumH of StackOverflow: 
   http://stackoverflow.com/q/33516466/1000343  See issue #217 for details.
+
+* `check_spelling_interactive` replace substrings rather then bounded words.
+  This was caught by @chrisjacques  See issue #221
+
+* `replace_abbreviation` threw an error because `data.frame` converts character
+  to factor by default and `nchar` no longer works on factor.  This was caught 
+  by @karilint  See issue #225
 
 NEW FEATURES
 

diff --git a/R/as.tdm.R b/R/as.tdm.R
@@ -686,12 +686,14 @@ as.Corpus.default <- function(text.var, grouping.var = NULL, demographic.vars,
     ## Split apart by grouping variables and collapse text
     LST <- sapply(split(DF[, "text.var"], DF[, "grouping"]), 
         paste, collapse = " ")
-    LST_DF <- qdapTools::list2df(LST, "text.var", "grouping")
+    # LST_DF <- qdapTools::list2df(LST, "text.var", "grouping")
+    LST_DF <- qdapTools::list2df(LST, "text.var", "id")
+
+    # ## Use the tm package to convert to a Corpus
+    # mycorpus <- tm::VCorpus(tm::DataframeSource(LST_DF), 
+    #     readerControl=list(reader=qdap_tm_reader))
+    mycorpus <- replace_ids(tm::Corpus(tm::DataframeSource(LST_DF)), LST_DF[['id']])
 
-    ## Use the tm package to convert to a Corpus
-    mycorpus <- tm::VCorpus(tm::DataframeSource(LST_DF), 
-        readerControl=list(reader=qdap_tm_reader))
-
     ## Add metadata info
     NLP::meta(mycorpus, "MetaID") <- names(LST)
     NLP::meta(mycorpus, "labels") <- names(LST)
@@ -1000,12 +1002,18 @@ tm_tdm_interface2 <- function(text.var, grouping.var, stopwords, char2space,
     LST <- sapply(split(DF[, "text.var"], DF[, "grouping"]), 
         paste, collapse = " ")
 
-    LST_DF <- qdapTools::list2df(LST, "text.var", "grouping")
-
+    # LST_DF <- qdapTools::list2df(LST, "text.var", "id")
+    #
     ## Use the tm package to convert to a Corpus
-    mycorpus <- tm::VCorpus(tm::DataframeSource(LST_DF), 
-        readerControl=list(reader=qdap_tm_reader))
-
+    # mycorpus <- tm::VCorpus(tm::DataframeSource(LST_DF), 
+    #     readerControl=list(reader=qdap_tm_reader))
+    #
+    ## Updated approach per tm changes 8/16/2017
+
+    LST_DF <- qdapTools::list2df(LST, "text.var", "id")
+    mycorpus <- replace_ids(tm::Corpus(tm::DataframeSource(LST_DF)), LST_DF[['id']])
+
+
     ## Add metadata info
     NLP::meta(mycorpus, "MetaID") <- names(LST)
     NLP::meta(mycorpus, "labels") <- names(LST)
@@ -1039,6 +1047,15 @@ tm_tdm_interface2 <- function(text.var, grouping.var, stopwords, char2space,
 
 }
 
+replace_ids <- function(corpus, ids){
+
+    stopifnot(length(corpus$content) == length(ids))
+    corpus$content <- Map(function(x, y) {
+        x$meta$id <- y
+        x
+    }, corpus$content, ids)
+    corpus
+}
 
 tm_dtm_interface2 <- function(text.var, grouping.var, stopwords, char2space, 
     apostrophe.remove, ...){
@@ -1078,12 +1095,15 @@ tm_dtm_interface2 <- function(text.var, grouping.var, stopwords, char2space,
     LST <- sapply(split(DF[, "text.var"], DF[, "grouping"]), 
         paste, collapse = " ")
 
-    LST_DF <- qdapTools::list2df(LST, "text.var", "grouping")
+    # LST_DF <- qdapTools::list2df(LST, "text.var", "grouping")
+    # 
+    # ## Use the tm package to convert to a Corpus
+    # mycorpus <- tm::VCorpus(tm::DataframeSource(LST_DF), 
+    #     readerControl=list(reader=qdap_tm_reader))
+
+    LST_DF <- qdapTools::list2df(LST, "text.var", "id")
+    mycorpus <- replace_ids(tm::Corpus(tm::DataframeSource(LST_DF)), LST_DF[['id']])
 
-    ## Use the tm package to convert to a Corpus
-    mycorpus <- tm::VCorpus(tm::DataframeSource(LST_DF), 
-        readerControl=list(reader=qdap_tm_reader))
-
     ## Add metadata info
     NLP::meta(mycorpus, "MetaID") <- names(LST)
     NLP::meta(mycorpus, "labels") <- names(LST)
@@ -1123,11 +1143,12 @@ tm_dtm_interface2 <- function(text.var, grouping.var, stopwords, char2space,
 #' @method as.Corpus TermDocumentMatrix 
 as.Corpus.TermDocumentMatrix <- function(text.var, ...){
 
-    LST_DF <- qdapTools::list2df(mat2word_list(text.var), "text.var", "grouping")
+    LST_DF <- qdapTools::list2df(mat2word_list(text.var), "text.var", "di")
 
     ## Use the tm package to convert to a Corpus
-    tm::VCorpus(tm::DataframeSource(LST_DF), 
-        readerControl=list(reader=qdap_tm_reader))
+    # tm::VCorpus(tm::DataframeSource(LST_DF), 
+    #     readerControl=list(reader=qdap_tm_reader))
+    replace_ids(tm::Corpus(tm::DataframeSource(LST_DF)), LST_DF[['id']])
 
 }
 

diff --git a/R/automated_readability_index.R b/R/automated_readability_index.R
@@ -903,7 +903,7 @@ word_counts <- function(DF, x, y, z = NULL, g, alpha = .3) {
         ggplot2::xlab(NMS[1]) +
         ggplot2::theme_minimal() + 
         ggplot2::theme(panel.grid = ggplot2::element_blank(),
-            panel.margin = unit(1, "lines")) +
+            panel.spacing = unit(1, "lines")) +
         ggplot2::annotate("segment", x=-Inf, xend=Inf, y=-Inf, yend=-Inf)+
         ggplot2::annotate("segment", x=-Inf, xend=-Inf, y=-Inf, yend=Inf)
 
@@ -935,7 +935,7 @@ word_counts2 <- function(DF, x, y, z = NULL, g, alpha = .3) {
         ggplot2::xlab(NMS[1]) +
         ggplot2::theme_minimal() + 
         ggplot2::theme(panel.grid = ggplot2::element_blank(),
-            panel.margin = unit(1, "lines")) +
+            panel.spacing = unit(1, "lines")) +
         ggplot2::annotate("segment", x=-Inf, xend=Inf, y=-Inf, yend=-Inf)+
         ggplot2::annotate("segment", x=-Inf, xend=-Inf, y=-Inf, yend=Inf)
 

diff --git a/R/check_spelling.R b/R/check_spelling.R
@@ -44,7 +44,7 @@
 #' @rdname check_spelling
 #' @seealso \code{\link[stringdist]{stringdist}}
 #' @references \url{http://stackoverflow.com/a/24454727/1000343} \cr
-#' \url{http://journal.r-project.org/archive/2011-2/RJournal_2011-2_Hornik+Murdoch.pdf}
+#' \url{https://journal.r-project.org/archive/2011-2/RJournal_2011-2_Hornik+Murdoch.pdf}
 #' @note A possible misspelled word is defined as not found in the 
 #' \code{dictionary}.  
 #' @examples
@@ -398,12 +398,12 @@ check_spelling_interactive.character <- function(text.var, range = 2,
 
     output <- output[apply(output, 1, function(x) x[1] != x[2]), ]    
 
-    out <- mgsub(output[[1]], output[[2]], text.var, ignore.case = TRUE, fixed=FALSE)
+    out <- mgsub(paste0("\\b", output[[1]], "\\b"), output[[2]], text.var, ignore.case = TRUE, fixed=FALSE)
 
     class(out)  <- c("check_spelling_interactive", class(out))
     attributes(out)[["replacements"]] <- output
     attributes(out)[["correct"]] <- function(text.var) {
-            mgsub(output[[1]], output[[2]], text.var, ignore.case = TRUE, fixed=FALSE)
+            mgsub(paste0("\\b", output[[1]], "\\b"), output[[2]], text.var, ignore.case = TRUE, fixed=FALSE)
         }
     message("\nSpelling Check Complete!\n")
     out
@@ -468,12 +468,12 @@ check_spelling_interactive.factor <- function(text.var, range = 2,
 
     output <- output[apply(output, 1, function(x) x[1] != x[2]), ]
 
-    out <- mgsub(output[[1]], output[[2]], text.var, ignore.case = TRUE, fixed=FALSE)
+    out <- mgsub(paste0("\\b", output[[1]], "\\b"), output[[2]], text.var, ignore.case = TRUE, fixed=FALSE)
 
     class(out)  <- c("check_spelling_interactive", class(out))
     attributes(out)[["replacements"]] <- output
     attributes(out)[["correct"]] <- function(text.var) {
-            mgsub(output[[1]], output[[2]], text.var, ignore.case = TRUE, fixed=FALSE)
+            mgsub(paste0("\\b", output[[1]], "\\b"), output[[2]], text.var, ignore.case = TRUE, fixed=FALSE)
         }
     message("\nSpelling Check Complete!\n")    
     out
@@ -526,13 +526,13 @@ check_spelling_interactive.check_spelling <- function(text.var, range = 2,
 
     output <- output[apply(output, 1, function(x) x[1] != x[2]), ]
 
-    out <- mgsub(output[[1]], output[[2]], attributes(text.var)[["text.var"]], 
+    out <- mgsub(paste0("\\b", output[[1]], "\\b"), output[[2]], attributes(text.var)[["text.var"]], 
         ignore.case = TRUE, fixed=FALSE)
 
     class(out)  <- c("check_spelling_interactive", class(out))
     attributes(out)[["replacements"]] <- output
     attributes(out)[["correct"]] <- function(text.var) {
-            mgsub(output[[1]], output[[2]], text.var, ignore.case = TRUE, fixed=FALSE)
+            mgsub(paste0("\\b", output[[1]], "\\b"), output[[2]], text.var, ignore.case = TRUE, fixed=FALSE)
         }
     message("\nSpelling Check Complete!\n")
     out
@@ -570,7 +570,7 @@ check_spelling_interactive_helper <- function(out, suggests, click,
         }
 
         if (ans == "1") {
-            message("\n","Enter Repalcement:","\n")  
+            message("\n","Enter Replacement:","\n")  
             repl <- readLines(n=1)
 
             while (repl %in% c("0", "!")) {

diff --git a/R/colcomb2class.R b/R/colcomb2class.R
@@ -21,6 +21,7 @@
 #' and percents.
 #' @export
 #' @examples
+#' \dontrun{
 #' ## `termco` example
 #' ml <- list(
 #'     cat1 = c(" the ", " a ", " an "),
@@ -47,6 +48,7 @@
 #' ## data.frame example
 #' dat4 <- data.frame(X=LETTERS[1:5], matrix(sample(0:5, 20, TRUE), ncol = 4))
 #' colcomb2class(dat4, list(new = c("X1", "X4")))
+#' }
 colcomb2class <- function(dataframe, combined.columns, class = "list", 
     percent = TRUE, digits = 2, elim.old = TRUE, zero.replace = 0, 
     override = FALSE) {

diff --git a/R/discourse_map.R b/R/discourse_map.R
@@ -370,9 +370,8 @@ animated_discourse_map <- function(DF, edge.constant, sep = "_",
         weight[, "prop_wc"] <- edge.constant*weight[, "prop_wc"]
         cols <- igraph_weights[[i]][, c("from", "to", "color"), drop=FALSE]
         wkey <- colpaste2df(weight, 1:2, sep = sep, keep.orig=FALSE)[, 2:1]
-        edges <- bracketX(utils::capture.output(E(grp))[-c(1:2)])
-        ekey <- paste2(do.call(rbind, lapply(strsplit(edges, "->"), Trim)), 
-            sep = sep)
+        el <- ends(grp, E(grp), names=TRUE)
+        ekey <- paste(sep=sep, el[,1], el[,2])
         ckey <- colpaste2df(cols, 1:2, sep = sep, keep.orig=FALSE)[, 2:1]
 
         E(grp)$width <- NAer(ekey %l% wkey)

diff --git a/R/dispersion_plot.R b/R/dispersion_plot.R
@@ -290,7 +290,7 @@ dispersion_plot <- function(text.var, match.terms, grouping.var = NULL,
     dat2 <- dat2[!is.na(dat2[["word"]]), ]
 
     the_plot <- ggplot(data = dat2, aes(x = word.num, y = grouping)) + 
-        geom_point(aes(position="dodge", color = summary), 
+        geom_point(aes(color = summary), 
             shape = symbol, size = size) + 
         theme_bw() + 
         theme(panel.background = element_rect(fill = bg.color), 

diff --git a/R/gantt_plot.R b/R/gantt_plot.R
@@ -49,7 +49,7 @@
 #'     list(act, newb), size = 4))
 #' 
 #' library(ggplot2); library(scales); library(RColorBrewer); library(grid)
-#' z + theme(panel.margin = unit(1, "lines")) + scale_colour_grey()
+#' z + theme(panel.spacing = unit(1, "lines")) + scale_colour_grey()
 #' z + scale_colour_brewer(palette="Dark2")
 #' 
 #' ## Fill Variable Example

diff --git a/R/lexical_classification.R b/R/lexical_classification.R
@@ -1018,7 +1018,7 @@ plot.lexical_classification_preprocessed <- function(x, jitter=.1,
         ggplot2::facet_wrap(~group, ncol=ncol) +
         ggplot2::theme_minimal() + 
         ggplot2::theme(panel.grid = ggplot2::element_blank(),
-            panel.margin = grid::unit(1, "lines")) +
+            panel.spacing = grid::unit(1, "lines")) +
         ggplot2::annotate("segment", x=-Inf, xend=Inf, y=-Inf, yend=-Inf)+
         ggplot2::annotate("segment", x=-Inf, xend=-Inf, y=-Inf, yend=Inf) +
         ggplot2::ylab("Content Rate") + 

diff --git a/R/new_project.R b/R/new_project.R
@@ -47,7 +47,7 @@
 #' }
 #' \item{DOCUMENTATION}{ - A directory to store documents related to the project}
 #' \item{PLOTS}{ - A directory to store plots}
-#' \item{REPORTS}{ - A directory with report and presentation related tools.  Please see the \cr \href{https://dl.dropbox.com/u/61803503/packages/REPORT_WORKFLOW_GUIDE.pdf}{REPORT_WORKFLOW_GUIDE.pdf} for more details}
+#' \item{REPORTS}{ - A directory with report and presentation related tools.}
 #' \item{SCRIPTS}{ - A directory to store scripts; already contains the following:
 #' \itemize{
 #'     \item{01_clean_data.R}{ - initial cleaning of raw transcripts}

diff --git a/R/qdap-package.R b/R/qdap-package.R
@@ -453,5 +453,5 @@ NULL
 #' @usage data(hamlet) 
 #' @format A data frame with 2007 rows and 7 variables 
 #' @references 
-#' \url{http://www.gutenberg.org}
+#' http://www.gutenberg.org
 NULL