Skip to content

Commit

Permalink
version 0.99.12
Browse files Browse the repository at this point in the history
  • Loading branch information
kbenoit authored and cran-robot committed Oct 6, 2017
1 parent 008748a commit ef2eb67
Show file tree
Hide file tree
Showing 27 changed files with 500 additions and 534 deletions.
14 changes: 7 additions & 7 deletions DESCRIPTION
@@ -1,5 +1,5 @@
Package: quanteda
Version: 0.99.9
Version: 0.99.12
Title: Quantitative Analysis of Textual Data
Description: A fast, flexible, and comprehensive framework for
quantitative text analysis in R. Provides functionality for corpus management,
Expand All @@ -17,7 +17,7 @@ Authors@R: c( person("Kenneth", "Benoit", email = "kbenoit@lse.ac.uk", role =
person("Benjamin", "Lauderdale", email = "B.E.lauderdale@lse.ac.uk", role = "ctb"),
person("Will", "Lowe", email = "wlowe@princeton.edu", role = "ctb") )
License: GPL-3
Depends: R (>= 3.2.2), methods
Depends: R (>= 3.4.0), methods
Imports: utils, stats, Matrix (>= 1.2), data.table (>= 1.9.6),
SnowballC, wordcloud, Rcpp (>= 0.12.12), RcppParallel,
RSpectra, stringi, fastmatch, ggplot2 (>= 2.2.0), XML, yaml,
Expand All @@ -41,9 +41,9 @@ Collate: 'RcppExports.R' 'View.R' 'bootstrap_dfm.R'
'dfm_compress.R' 'dfm_group.R' 'dfm_lookup.R' 'dfm_sample.R'
'dfm_select.R' 'dfm_subset.R' 'dfm_trim.R' 'dfm_weight.R'
'dictionaries-deprecated.R' 'dictionaries-liwc_old.R'
'dictionaries.R' 'docvars.R' 'fcm-methods.R' 'fcm.R'
'joinTokens-deprecated.R' 'kwic.R' 'nfunctions.R' 'nscrabble.R'
'nsyllable.R' 'phrases.R' 'plots-deprecated.R'
'dictionaries.R' 'docnames.R' 'docvars.R' 'fcm-methods.R'
'fcm.R' 'joinTokens-deprecated.R' 'kwic.R' 'nfunctions.R'
'nscrabble.R' 'nsyllable.R' 'phrases.R' 'plots-deprecated.R'
'quanteda-documentation.R' 'quanteda_options.R'
'readtext-methods.R' 'regex2fixed.R' 'resample.R'
'selectFeatures-old.R' 'selectFeatures.R' 'settings.R'
Expand All @@ -65,7 +65,7 @@ RcppModules: ngramMaker
RoxygenNote: 6.0.1
SystemRequirements: C++11
NeedsCompilation: yes
Packaged: 2017-09-22 12:15:11 UTC; kbenoit
Packaged: 2017-10-06 11:30:23 UTC; kbenoit
Author: Kenneth Benoit [aut, cre, cph],
Kohei Watanabe [ctb],
Paul Nulty [ctb],
Expand All @@ -75,4 +75,4 @@ Author: Kenneth Benoit [aut, cre, cph],
Will Lowe [ctb]
Maintainer: Kenneth Benoit <kbenoit@lse.ac.uk>
Repository: CRAN
Date/Publication: 2017-09-22 20:24:23 UTC
Date/Publication: 2017-10-06 16:23:12 UTC
51 changes: 26 additions & 25 deletions MD5
@@ -1,24 +1,24 @@
12d368f10074b5e5d63f877f672a61a2 *DESCRIPTION
e35b01f610dbc49eb3f24cb914559906 *NAMESPACE
6a765cb189ea6f3b397e2c9d966b9801 *NEWS.md
e55b1881d7b2bb2811f0927438671e15 *R/RcppExports.R
a05d97f4130fc5ab363daea8ad3f9165 *DESCRIPTION
a8954be17848612d7904ea9dc31ecb23 *NAMESPACE
48c8de55f473e395ad713033664fa299 *NEWS.md
cc139df1b977f656ccb17dba35cdd2f4 *R/RcppExports.R
634ce9131cc73676f92df037745adb5a *R/View.R
a53db5a2f859b2183eecbf2b1e4ef0af *R/bootstrap_dfm.R
2c6cd4d6d94b55da6edd660e6679591f *R/character-methods.R
fec0fd4b82008b42516aaec5603d1217 *R/convert.R
140627a427306b9be91083b870e2f85f *R/corpus-deprecated.R
ac2fc69448f088fe7cc1eadb601d9fde *R/corpus-methods-base.R
59ce910711f7f1e880c72a06cf5b9f81 *R/corpus-methods-quanteda.R
e0f10856b41f87fafcf63a377310241e *R/corpus-methods-base.R
a6b51237ebe2520ee5fc24e46a26cb35 *R/corpus-methods-quanteda.R
618612bfa5e5fed18f512a7a91fdc41a *R/corpus-methods-tm.R
5400e13c81d9f0eb5a05242e4a5e25a8 *R/corpus.R
fbb87ccb8c7ad3aff41c0fb7dc0c345e *R/corpus.R
8aeaf022987855b149670512d7b82e56 *R/corpus_reshape.R
4e424b9ba696bec42dc6dd78e886ee1a *R/corpus_sample.R
2c0b1152b74c7a30f4d80f79ce03af5d *R/corpus_segment.R
d6345b254ee2da42f846ac2d2079d60d *R/corpus_subset.R
ec13a0006501f6342d251d8efee4d53d *R/corpus_trim.R
a0ed73ebd6343ad67ce1d29de391703f *R/corpuszip.R
baa4fb25b1499efb514b0adddc3d0322 *R/data-deprecated.R
05ab2271599d9c5a7ae5b685073e83c6 *R/data-documentation.R
f4a79cbf124013743955564accc2ffb9 *R/data-documentation.R
9b740cd34d3b08a36a7f38a68c304f57 *R/dfm-classes.R
22437a7fc8fe2c80e40cc6da9c20a5f8 *R/dfm-deprecated.R
275f76db518d3ad60bda5200aa13eddd *R/dfm-methods.R
Expand All @@ -32,10 +32,11 @@ dd0dbd74f1e30a7540161f187c26fe22 *R/dfm_lookup.R
2c466f5ee97eb22faaf7f143611befd3 *R/dfm_select.R
6b2aaef0b0e4b4da46c4ed57aa043d79 *R/dfm_subset.R
9f5a8b068bde5694f218f44f0215a951 *R/dfm_trim.R
61337dbe4125ae811bb77211d8f9573d *R/dfm_weight.R
f94d07887fa36ee590a731c6997889f5 *R/dfm_weight.R
4d7f8b85ffd6fa6fc78d22d24b9ea5e3 *R/dictionaries-deprecated.R
a4e88e8f7dc7a28622aa777b8f494c4b *R/dictionaries-liwc_old.R
0995a3f234d76e7da2144f2cf6a8817e *R/dictionaries.R
0c0cd61a48377f872ae28601782864ff *R/docnames.R
dbcf851832284ed8426e63beedee687a *R/docvars.R
fe705c6c1764be8560ab3680be86c5c2 *R/fcm-methods.R
7d82ba976d451e832a7d082853fc371a *R/fcm.R
Expand All @@ -49,7 +50,7 @@ b6dc566e4fe3172fd60d62ccc07639b8 *R/nscrabble.R
87ace6b17cf4a40e5730b1207c2ecfbc *R/quanteda-documentation.R
289071d7d7eeb6702280aabddbeee338 *R/quanteda_options.R
721f2dfee7c6f4f4c569cfe44d5f8b81 *R/readtext-methods.R
ecdc5e55d1f51f81696199cc98a2054c *R/regex2fixed.R
2fcbeec071f0f8591b8ce837a031c412 *R/regex2fixed.R
57926b8f9d6bfa20da87e4e5958eab4f *R/resample.R
454329c69ba314915cc5327a0b0e196c *R/selectFeatures-old.R
1d570423f3273428bf3c94fbb654344c *R/selectFeatures.R
Expand All @@ -69,7 +70,7 @@ d60653e1d9c6238dd71990059a86552d *R/textmodel_wordfish.R
98179b588c8bbd238f01e3dd78141b4b *R/textplot_wordcloud.R
88e955da767ee726808ba26006adc6ce *R/textplot_xray.R
c943e8d3c0da5dd274c638d0475d0bf7 *R/textstat-deprecated.R
210a7042eb8a90118f68f9763df00041 *R/textstat_collocations.R
84f0d3503268c3c54038342c0e5b0f19 *R/textstat_collocations.R
91129204292b499e90d1ee9d400f5913 *R/textstat_dist.R
831157b319ddea184d83a30008822e98 *R/textstat_frequency.R
75c89cfdc0f78278a2ae47a803042893 *R/textstat_keyness.R
Expand All @@ -88,10 +89,10 @@ f1920f8995cf59ce0dda6e9f3c6b841b *R/tokens_segment.R
ad6e14b6c1c136b188136dc1046b1afd *R/tokens_select.R
ff1dda48149456d755ad54b0988feff2 *R/tolower-deprecated.R
7b4fb1c5c9aebcfeee95a54f54c93135 *R/tolower-functions.R
1b4eb15a4d38c3ef54829996717e0cf8 *R/utils.R
0520fec5f82a2933992b817e33dd1231 *R/utils.R
8f0cdff0abb44344c98e3fe7a4c1412f *R/wordstem.R
6854e17354f2581384fc1cfbad601e54 *R/zzz.R
555cde1ea75a2da33e0346e2f8903f96 *README.md
3c3465019c39422c288acdd0943d40b4 *README.md
d3108ab258b491f7a923fef028efafa2 *build/vignette.rds
489c06f1acb941659bf046b65e88c094 *data/data_char_sampletext.RData
7c1ab5382c589d7bb1cccf05ca414347 *data/data_char_stopwords.RData
Expand All @@ -108,7 +109,7 @@ eacf5e08e1131326f401ea96e9743bab *inst/CITATION
d32239bcb673463ab874e80d47fae504 *inst/LICENSE.txt
7fa62503a7808579a782c955b10d7121 *inst/doc/quickstart.R
10eee17642924dfd6a1af6fd5f88fd45 *inst/doc/quickstart.Rmd
7fcb6a2b76ee2a613e33d08e728961f9 *inst/doc/quickstart.html
44d6b3932d6e5ae190fd15bbeb4ad7cb *inst/doc/quickstart.html
f7aae1179ace7e60997f454bf1dcbc6c *man/View.Rd
76c4077af6b5d80251115e70ae49526c *man/applyDictionary.Rd
b7e863a2f88686cff9edbba3d1b75f88 *man/as.corpus.Rd
Expand All @@ -133,7 +134,7 @@ a9bfe47aed2dc80d8234aca041694bbc *man/collocations.Rd
25b5acef84a7dcf58e39ef2905e98341 *man/convert-wrappers.Rd
a7fc62e0a94d3308476a14593f956b99 *man/convert.Rd
ae3963f17c6f4d39d3c4e9657cf492ea *man/corpus-class.Rd
b709c6abdbba832da9ead5a1ac24f288 *man/corpus.Rd
6313e3d828406df4f27ced95fefa5d03 *man/corpus.Rd
8bf0629ed152fde9395ab8a89be2c5f2 *man/corpus_reshape.Rd
570cc8780957cff0d18ad89c303f6acf *man/corpus_sample.Rd
d39b898fec5c279c9fad5a310dabd40e *man/corpus_segment.Rd
Expand All @@ -148,7 +149,7 @@ eab13fcf6fdd65bd6a145f24789339da *man/corpus_trimsentences.Rd
4c4336109fffaa7b62b6177cc295b79f *man/data_corpus_inaugural.Rd
5e981b8a6adaad56e41fc29fdf544bc3 *man/data_corpus_irishbudget2010.Rd
4e257a33ea3e009699e484d4a94ebccf *man/data_dfm_lbgexample.Rd
c82d69e0276c1249c63727d63037a507 *man/data_dictionary_LSD2015.Rd
667b4c7df18b791e4075cb2737794da8 *man/data_dictionary_LSD2015.Rd
15e8e495267f8b743e1dcba3797a9805 *man/deprecated-textstat.Rd
28ff861e9ba2b49522f2f8841a8fce08 *man/dfm-class.Rd
0561dd4414bd0193d01e237a42ff7f32 *man/dfm-internal.Rd
Expand All @@ -167,7 +168,7 @@ b9ac47f2d6e1c7bb29c9298d293d3249 *man/dfm_lookup.Rd
90414059c20a256858d02e0fc84195e5 *man/dictionary-class.Rd
68d48e050a2011d04d16cb78f404087f *man/dictionary.Rd
72d88ff160e5540a440a528e600050ea *man/docfreq.Rd
d7b1f6bef6e8d35afb61748cbd9f143b *man/docnames.Rd
59903834aa45538f3ed90919f3d816cd *man/docnames.Rd
142a6ab942b0f11a05b9f33b838559c6 *man/docvars.Rd
f9a5436e398e76e2a0b7a0942b7fbcc6 *man/escape_regex.Rd
d0007a5aea6cfa845796dd7647e9be98 *man/fcm-class.Rd
Expand Down Expand Up @@ -248,7 +249,7 @@ a9a6ba8acbc534fb234be53c98ac6167 *man/textstat_collocations.Rd
8335189eebca69d15953a86dddbc27e2 *man/textstat_readability.Rd
8f4a83a6a48ef12a2517e7fad6c5fea4 *man/textstat_simil.Rd
cb4dd6e87adfdba261cc46b8714f432d *man/tf.Rd
fec0c7ae57398a0188cdbfe482768dac *man/tfidf.Rd
df99585d49fa5e35eb80197cdf8a99f2 *man/tfidf.Rd
ef0826f11fccc775900f9a454c45e6f6 *man/toLower.Rd
fad2172c93ddc1c05e66c70e4b8bcc0c *man/tokenize.Rd
29a3271549a589cc507c24f08f9c79a9 *man/tokens.Rd
Expand All @@ -269,22 +270,22 @@ b4a805c4eca840efeb664efe7b1a4055 *man/weight.Rd
468cb462db1dbeea9d718aae57a6dbb9 *man/wordstem.Rd
6e70c551e7baa2d1c0d07666bea3d756 *src/Makevars
43eba54da933deffa1f1c4e295a92d9c *src/Makevars.win
25fbee2fdd34e25afa51ba3f46f97188 *src/RcppExports.cpp
f6837cccbda39f2c0b006033bc1dbda5 *src/RcppExports.cpp
7acb74eb0748b2f3d9dc2e2a0a6b4525 *src/ca_mt.cpp
a1c0a49300f98d8c1fdd5b2345f41e78 *src/dev.h
42e905133ca1e157de60b8885ee96acd *src/dist_mt.cpp
fedc7a73e4b89f0b6e65db6942a9b129 *src/fcm_mt.cpp
317c471362c09af7b0a570487a2bfdb4 *src/quanteda.h
6c587b54578522704c2dc6466287272f *src/recompile.h
5329252f32fe27b3980fdb39e72bb01f *src/sequences_mt_.cpp
4c085f940398a22134cf2fe1ea267e5e *src/sequences_mt_.cpp
db575b29a94859d131940aed05b74c4f *src/tokens_compound_mt.cpp
6cdd618d3900b1bb06804421344be14a *src/tokens_detect_mt.cpp
e6782d690891b7ea3e935fbd8321db15 *src/tokens_kwic_mt.cpp
82d5a7f67fa9862a4575ebbdccaca8de *src/tokens_lookup_mt.cpp
4d0108efb75f912932ec46a59ead544c *src/tokens_ngrams_mt.cpp
07fbfd9c46a6d32c5fab8038198e59d2 *src/tokens_recompile_mt.cpp
ed1516f5e1097f6ab525c8fbe556dec6 *src/tokens_replace_mt.cpp
842976d9e7a5a3f8feff40c80bb4744c *src/tokens_segment_mt.cpp
809d30a587efe73bbdef8d2a632d7e7f *src/tokens_segment_mt.cpp
adb8da98186abd7e7c4af0af14dbd5fc *src/tokens_select_mt.cpp
3c84f43e987970725c248640ca1f8e5a *src/utility.cpp
1e1589cf57123c2bf8404718f47e24fa *src/wordfish_.cpp
Expand Down Expand Up @@ -323,7 +324,7 @@ a6e16a8841cc3c491d9d613c6ba7df19 *tests/testthat/sample_text.txt
09c4618d92c67bbe070c976eaecfd8dc *tests/testthat/test-bootstrap.R
6a101a8bfe49ab4db6a5ba8094165126 *tests/testthat/test-convert.R
51d11e77b608ac06559923ebdaddbe1e *tests/testthat/test-corpus-compress.R
6854ed9bc389af225ae17c98c343fce4 *tests/testthat/test-corpus.R
6ca669afe61f174dcd54122ac47cff15 *tests/testthat/test-corpus.R
1f8a16263cfa2a300e9546ee1f799bab *tests/testthat/test-corpus_reshape.R
b600abaf3417d0b8e0c04a7eedac469e *tests/testthat/test-corpus_sample.R
99d4f1546a751289c89eacfa812a7c87 *tests/testthat/test-corpus_segment.R
Expand All @@ -335,9 +336,9 @@ defd3bc747937bc2da2a90126393d596 *tests/testthat/test-dfm.R
30e7c68bc6ad5da9353289c86e6ba520 *tests/testthat/test-dfm_select.R
81a8c8c8422c7e98661cc61dccd24530 *tests/testthat/test-dfm_subset.R
c748cce7255f86be3fb4fd3fec2cabd7 *tests/testthat/test-dfm_trim.R
b5fce3f4545a329ea1eb2ebe857ef5c4 *tests/testthat/test-dfm_weight.R
77521bc059c67ca7f6411b8af6d90bd7 *tests/testthat/test-dfm_weight.R
78437104dff5f5b69ac7ea26095223ec *tests/testthat/test-dictionaries.R
90396bf40a564eb462b4ff0cb54e834e *tests/testthat/test-docnames.R
9380878c9b125c7672779b466962c7c5 *tests/testthat/test-docnames.R
9f01088cae3fc761b71b8f4e9e71d389 *tests/testthat/test-docvars.R
f455fb58cbaf40e2e808be82869f8fb6 *tests/testthat/test-fcm.R
861ccaeb0fb5c5e6cb78ccb1af1f7942 *tests/testthat/test-fcm_methods.R
Expand All @@ -356,7 +357,7 @@ d0b6b4cb303a48d40e20e184c809edb1 *tests/testthat/test-textmodel_NB.R
e14acecf436ee89146c745b8519da7a4 *tests/testthat/test-textmodel_wordfish.R
1385379fa427c28e42a5e518a1eca7c9 *tests/testthat/test-textmodel_wordscores.R
e176433de517c1be4020cc1d8379b367 *tests/testthat/test-texts.R
953385fd657732d9553e903c761c8656 *tests/testthat/test-textstat_collocations.R
794a66cd8b7556c6b50d2701924a11f7 *tests/testthat/test-textstat_collocations.R
f34a36c74dcd039f6cfbeb3552960090 *tests/testthat/test-textstat_dist.R
9f673ad9f3fffe76872b48f1d8f989a6 *tests/testthat/test-textstat_frequency.R
557bd006e868ce9829ca9558a093866c *tests/testthat/test-textstat_keyness.R
Expand Down
2 changes: 2 additions & 0 deletions NAMESPACE
Expand Up @@ -13,6 +13,8 @@ S3method("[[<-",corpus)
S3method("[[<-",tokens)
S3method("docnames<-",corpus)
S3method("docnames<-",corpuszip)
S3method("docnames<-",dfm)
S3method("docnames<-",tokens)
S3method("docvars<-",corpus)
S3method("docvars<-",dfm)
S3method("docvars<-",tokenizedTexts)
Expand Down
11 changes: 11 additions & 0 deletions NEWS.md
@@ -1,5 +1,16 @@
# quanteda 0.99

## Changes since v0.99.9

### New Features

* Added methods for changing the docnames of tokens and dfm objects (#987).

### Bug fixes and stability enhancements

* The computation of tfidf has been more thoroughly described in the documentation for this function (#997).


## Changes since v0.99

### New Features
Expand Down
4 changes: 2 additions & 2 deletions R/RcppExports.R
Expand Up @@ -41,8 +41,8 @@ qatd_cpp_fcm <- function(texts_, n_types, count, window, weights, ordered, tri,
.Call(`_quanteda_qatd_cpp_fcm`, texts_, n_types, count, window, weights, ordered, tri, nvec)
}

qatd_cpp_sequences <- function(texts_, types_, count_min, sizes_, method, smoothing) {
.Call(`_quanteda_qatd_cpp_sequences`, texts_, types_, count_min, sizes_, method, smoothing)
qatd_cpp_sequences <- function(texts_, types_, words_ignore_, count_min, sizes_, method, smoothing) {
.Call(`_quanteda_qatd_cpp_sequences`, texts_, types_, words_ignore_, count_min, sizes_, method, smoothing)
}

qatd_cpp_tokens_compound <- function(texts_, comps_, types_, delim_, join) {
Expand Down
9 changes: 4 additions & 5 deletions R/corpus-methods-base.R
Expand Up @@ -174,12 +174,11 @@ tail.corpus <- function(x, n = 6L, ...) {
metacorpus(c1, field) <- paste(metacorpus(c1, field), metacorpus(c2, field))
}

row.names <- c(rownames(c1$documents), rownames(c2$documents))
c1$documents <- data.frame(
data.table::rbindlist(list(c1$documents, c2$documents), use.names = TRUE, fill = TRUE)
)
#rowname <- c(rownames(c1$documents), rownames(c2$documents))
c1$documents <- rbind(c1$documents, c2$documents)

# Put rownames back in because the hadleyverse discards them
rownames(c1$documents) <- make.unique(row.names, sep='')
#rownames(c1$documents) <- make.unique(rowname, sep='')

# settings
### currently just use the c1 settings
Expand Down
56 changes: 0 additions & 56 deletions R/corpus-methods-quanteda.R
Expand Up @@ -180,59 +180,3 @@ texts.character <- function(x, groups = NULL, spacer = " ") {
as.character.corpus <- function(x, ...) {
texts(x)
}

#' get or set document names
#'
#' Get or set the document names of a \link{corpus}, \link{tokens}, or \link{dfm} object.
#' @param x the object with docnames
#' @export
#' @return \code{docnames} returns a character vector of the document names
#' @seealso \code{\link{featnames}}
#' @examples
#' # query the document names of a corpus
#' docnames(data_corpus_irishbudget2010)
#'
#' # query the document names of a tokens object
#' docnames(tokens(data_char_ukimmig2010))
#'
#' # query the document names of a dfm
#' docnames(dfm(data_corpus_inaugural[1:5]))
#'
#' @keywords corpus dfm
docnames <- function(x) {
UseMethod("docnames")
}

#' @noRd
#' @export
docnames.corpus <- function(x) {
# didn't use accessor documents() because didn't want to pass
# that large object
if (is.null(rownames(x$documents))) {
paste0('text', seq_len(ndoc(x)))
} else {
rownames(x$documents)
}
}

#' @param value a character vector of the same length as \code{x}
#' @return \code{docnames <-} assigns new values to the document names of an object.
#' @export
#' @examples
#' # reassign the document names of the inaugural speech corpus
#' docnames(data_corpus_inaugural) <- paste("Speech", 1:ndoc(data_corpus_inaugural), sep="")
#'
#' @rdname docnames
"docnames<-" <- function(x, value) {
UseMethod("docnames<-")
}

#' @noRd
#' @export
"docnames<-.corpus" <- function(x, value) {
if (!is.corpus(x))
stop("docnames<- only valid for corpus objects.")
rownames(x$documents) <- value
return(x)
}

0 comments on commit ef2eb67

Please sign in to comment.