Skip to content

Commit

Permalink
version 0.90
Browse files Browse the repository at this point in the history
  • Loading branch information
kbenoit authored and cran-robot committed Jun 3, 2023
1 parent b8bd174 commit c10710c
Show file tree
Hide file tree
Showing 11 changed files with 41 additions and 106 deletions.
6 changes: 3 additions & 3 deletions DESCRIPTION
Original file line number Diff line number Diff line change
@@ -1,5 +1,5 @@
Package: readtext
Version: 0.82
Version: 0.90
Type: Package
Title: Import and Handling for Plain and Formatted Text Files
Authors@R: c( person("Kenneth", "Benoit", email = "kbenoit@lse.ac.uk", role =
Expand All @@ -25,7 +25,7 @@ LazyData: TRUE
VignetteBuilder: knitr
RoxygenNote: 7.2.3
NeedsCompilation: no
Packaged: 2023-04-06 07:33:28 UTC; kbenoit
Packaged: 2023-06-03 16:53:33 UTC; kbenoit
Author: Kenneth Benoit [aut, cre, cph],
Adam Obeng [aut],
Kohei Watanabe [ctb],
Expand All @@ -34,4 +34,4 @@ Author: Kenneth Benoit [aut, cre, cph],
Stefan Müller [ctb]
Maintainer: Kenneth Benoit <kbenoit@lse.ac.uk>
Repository: CRAN
Date/Publication: 2023-04-06 08:00:02 UTC
Date/Publication: 2023-06-03 17:30:02 UTC
18 changes: 8 additions & 10 deletions MD5
Original file line number Diff line number Diff line change
@@ -1,24 +1,24 @@
2585d7d1a618fea232eff4e133aa400e *DESCRIPTION
0e9837068f198497821087fcc0141306 *NAMESPACE
44a5bc385a8ffe17ab6049da88c46050 *NEWS.md
2e8f66947863e9668efa098c60694599 *DESCRIPTION
2a9aad74ee91f090319822abd90e454a *NAMESPACE
d66c08943e0c7b95ddad22ee0db7e169 *NEWS.md
82e67223e0d183311e939e72101bc758 *R/data.R
19d9450f25f93038ab2bb861c04417e0 *R/encoding.R
e85eb081256d8315c17f7ed8e8e0029f *R/get-functions.R
1b342ca0615d4481c3e6cd8b71c20c06 *R/nexis.R
a9edb84c4333bfcecd740285c68e63c2 *R/quanteda-methods.R
d80aaa1d0a6aab58239abadd7e79dd48 *R/quanteda-methods.R
e1dcc814843dfa28ba45f6c7b6933543 *R/readtext-methods.R
1b29240359589c6854ffdbd357caf7d5 *R/readtext-package.R
2290605f9616eec87c8a593de43790f3 *R/readtext.R
b348d381c0640f8c63d83443cd0ef02d *R/readtext_options.R
977f7ad0fe198d849a9c505473f930ad *R/utils.R
ae20b98d0ec92f3c945e3a28e7e7f05d *R/zzz.R
fba5d8a3bab958c845222c33f190922f *README.md
a45cd788f25fdbbaa738793a35cf96a3 *build/vignette.rds
fc84d20d0ff5d9942f74eb653a8ec253 *README.md
681511d74263fb590b147a1ceea50c98 *build/vignette.rds
c9f29bdb8cd5223a346d3c2a2c71512a *data/data_char_encodedtexts.rda
d32239bcb673463ab874e80d47fae504 *inst/LICENSE.txt
867187beed7edb0ae6f5b38ba71eac78 *inst/doc/readtext_vignette.R
efc0ace01769db1aff92c36894bdf703 *inst/doc/readtext_vignette.Rmd
e17a738acc3c5acd9e35f58282e902eb *inst/doc/readtext_vignette.html
13982452978ea9ec0020dd0d355215ae *inst/doc/readtext_vignette.html
19b51d415cf3974ebe55ee693cff2aef *inst/extdata/csv/inaugCorpus.csv
c7c1de428544b4f6951551ae5db03794 *inst/extdata/data_files_encodedtexts.zip
9ce9a26c657050487faf8578db7bbb89 *inst/extdata/json/inaugural_sample.json
Expand Down Expand Up @@ -88,8 +88,6 @@ d124e6dc06b759c88e402a590cb7f87e *man/basename_unique.Rd
8775ff22964f23cb904cb34d70abf914 *man/cache_remote.Rd
f25d60023eb7dad2007d685a00b713fd *man/data_char_encodedtexts.Rd
0f25f41994f6df47bd92a5f09ea7deb7 *man/data_files_encodedtexts.Rd
7cdd586406823671d2a99cd714152d83 *man/docnames.Rd
10b8f7eca40d99db14dbca44514d1f2b *man/docvars.Rd
42b88e6e43babcadc585b8fbee99fee7 *man/encoding.Rd
7e69a682bb64367afa7ec6ae48ca940b *man/get_nexis_html.Rd
92b45c9a0a6b79bd00297ccfc73e215c *man/get_temp.Rd
Expand Down Expand Up @@ -199,7 +197,7 @@ e272d61e00293b8f5d09ef9bea2c809c *tests/testthat/test-encoding.R
6ff1bfb34b911c6b880a155f48245c49 *tests/testthat/test-nexis.R
8d59d23d17bd58ac41920108f4458df9 *tests/testthat/test-readtext-methods.R
a6430f63a9edfe0052460cbe22eb2b03 *tests/testthat/test-readtext-uppercaseextensions.R
3d7aaad97c4ad381b45094a9056b4761 *tests/testthat/test-readtext.R
8dd4cce916ed707013cda33358dbd5c3 *tests/testthat/test-readtext.R
2481ea2eb36e78d3c24506556c403ccf *tests/testthat/test-readtext_options.R
f0c09c80d56dec49b78b6021b4a6a9f2 *tests/testthat/test-utils.R
9d20ecec3e3bb49265b3954350ccdd88 *vignettes/mystyle.css
Expand Down
4 changes: 0 additions & 4 deletions NAMESPACE
Original file line number Diff line number Diff line change
@@ -1,16 +1,12 @@
# Generated by roxygen2: do not edit by hand

S3method(as.character,readtext)
S3method(docnames,readtext)
S3method(docvars,readtext)
S3method(encoding,character)
S3method(encoding,readtext)
S3method(format,trunc_mat_tibble)
S3method(print,readtext)
S3method(print,trunc_mat_tibble)
S3method(texts,readtext)
export(docnames)
export(docvars)
export(encoding)
export(readtext)
export(readtext_options)
Expand Down
6 changes: 5 additions & 1 deletion NEWS.md
Original file line number Diff line number Diff line change
@@ -1,4 +1,8 @@
readtext v0.81
readtext v0.90
==============
* Removes the deprecated quanteda functions: `docvars()`, `docnames()`, `texts()`.

readtext v0.82
==============
* Moves some quanteda functions to this package: `docvars()`, `docnames()`, `texts()`
* Updates print method to use **pillar** instead of tibble
Expand Down
30 changes: 0 additions & 30 deletions R/quanteda-methods.R
Original file line number Diff line number Diff line change
Expand Up @@ -23,33 +23,3 @@ texts.readtext <- function(x, ...) {
.Deprecated("Use as.character() instead")
as.character(x)
}

#' Extract document variables from a readtext object
#'
#' Returns document variables from a readtext object.
#' @param x a [readtext] object whose document variables will be extracted
#' @returns a data.frame of document variables
#' @export
docvars <- function(x) {
UseMethod("docvars")
}

#' @export
docvars.readtext <- function(x) {
as.data.frame(x[, -c(match(c("doc_id", "text"), colnames(x)))])
}

#' Extract document names from a readtext object
#'
#' Returns document names from a readtext object.
#' @param x a readtext object whose document names will be extracted
#' @returns a character vector of document names
#' @export
docnames <- function(x) {
UseMethod("docnames")
}

#' @export
docnames.readtext <- function(x) {
x[["doc_id"]]
}
4 changes: 2 additions & 2 deletions README.md
Original file line number Diff line number Diff line change
Expand Up @@ -5,7 +5,7 @@

[![CRAN
Version](https://www.r-pkg.org/badges/version/readtext)](https://CRAN.R-project.org/package=readtext)
[![](https://img.shields.io/badge/devel%20version-0.82-royalblue.svg)](https://github.com/quanteda/readtext)
[![](https://img.shields.io/badge/devel%20version-0.90-royalblue.svg)](https://github.com/quanteda/readtext)
[![Downloads](https://cranlogs.r-pkg.org/badges/readtext)](https://CRAN.R-project.org/package=readtext)
[![Total
Downloads](https://cranlogs.r-pkg.org/badges/grand-total/readtext?color=orange)](https://CRAN.R-project.org/package=readtext)
Expand Down Expand Up @@ -132,7 +132,7 @@ all docvars and other meta-data.

``` r
library("quanteda")
## Package version: 3.2.4
## Package version: 3.3.1
## Unicode version: 14.0
## ICU version: 71.1
## Parallel computing: 10 of 10 threads used.
Expand Down
Binary file modified build/vignette.rds
Binary file not shown.
10 changes: 5 additions & 5 deletions inst/doc/readtext_vignette.html
Original file line number Diff line number Diff line change
Expand Up @@ -718,16 +718,16 @@ <h1>3. Inter-operability with quanteda</h1>
<span id="cb13-8"><a href="#cb13-8" aria-hidden="true" tabindex="-1"></a><span class="fu">summary</span>(corpus_csv, <span class="dv">5</span>)</span>
<span id="cb13-9"><a href="#cb13-9" aria-hidden="true" tabindex="-1"></a>}</span>
<span id="cb13-10"><a href="#cb13-10" aria-hidden="true" tabindex="-1"></a><span class="do">## Loading required package: quanteda</span></span>
<span id="cb13-11"><a href="#cb13-11" aria-hidden="true" tabindex="-1"></a><span class="do">## Package version: 3.2.4</span></span>
<span id="cb13-11"><a href="#cb13-11" aria-hidden="true" tabindex="-1"></a><span class="do">## Package version: 3.3.1</span></span>
<span id="cb13-12"><a href="#cb13-12" aria-hidden="true" tabindex="-1"></a><span class="do">## Unicode version: 14.0</span></span>
<span id="cb13-13"><a href="#cb13-13" aria-hidden="true" tabindex="-1"></a><span class="do">## ICU version: 71.1</span></span>
<span id="cb13-14"><a href="#cb13-14" aria-hidden="true" tabindex="-1"></a><span class="do">## Parallel computing: 10 of 10 threads used.</span></span>
<span id="cb13-15"><a href="#cb13-15" aria-hidden="true" tabindex="-1"></a><span class="do">## See https://quanteda.io for tutorials and examples.</span></span>
<span id="cb13-16"><a href="#cb13-16" aria-hidden="true" tabindex="-1"></a><span class="do">## </span></span>
<span id="cb13-17"><a href="#cb13-17" aria-hidden="true" tabindex="-1"></a><span class="do">## Attaching package: &#39;quanteda&#39;</span></span>
<span id="cb13-18"><a href="#cb13-18" aria-hidden="true" tabindex="-1"></a><span class="do">## The following objects are masked from &#39;package:readtext&#39;:</span></span>
<span id="cb13-18"><a href="#cb13-18" aria-hidden="true" tabindex="-1"></a><span class="do">## The following object is masked from &#39;package:readtext&#39;:</span></span>
<span id="cb13-19"><a href="#cb13-19" aria-hidden="true" tabindex="-1"></a><span class="do">## </span></span>
<span id="cb13-20"><a href="#cb13-20" aria-hidden="true" tabindex="-1"></a><span class="do">## docnames, docvars, texts</span></span>
<span id="cb13-20"><a href="#cb13-20" aria-hidden="true" tabindex="-1"></a><span class="do">## texts</span></span>
<span id="cb13-21"><a href="#cb13-21" aria-hidden="true" tabindex="-1"></a><span class="do">## Corpus consisting of 5 documents, showing 5 documents:</span></span>
<span id="cb13-22"><a href="#cb13-22" aria-hidden="true" tabindex="-1"></a><span class="do">## </span></span>
<span id="cb13-23"><a href="#cb13-23" aria-hidden="true" tabindex="-1"></a><span class="do">## Text Types Tokens Sentences Year President FirstName</span></span>
Expand Down Expand Up @@ -891,8 +891,8 @@ <h2>4.2 Read files with different encodings</h2>
<span id="cb20-5"><a href="#cb20-5" aria-hidden="true" tabindex="-1"></a><span class="do">## Corpus consisting of 36 documents, showing 5 documents:</span></span>
<span id="cb20-6"><a href="#cb20-6" aria-hidden="true" tabindex="-1"></a><span class="do">## </span></span>
<span id="cb20-7"><a href="#cb20-7" aria-hidden="true" tabindex="-1"></a><span class="do">## Text Types Tokens Sentences document</span></span>
<span id="cb20-8"><a href="#cb20-8" aria-hidden="true" tabindex="-1"></a><span class="do">## IndianTreaty_English_UTF-16LE.txt 617 2577 152 IndianTreaty</span></span>
<span id="cb20-9"><a href="#cb20-9" aria-hidden="true" tabindex="-1"></a><span class="do">## IndianTreaty_English_UTF-8-BOM.txt 645 3092 150 IndianTreaty</span></span>
<span id="cb20-8"><a href="#cb20-8" aria-hidden="true" tabindex="-1"></a><span class="do">## IndianTreaty_English_UTF-16LE.txt 618 2577 152 IndianTreaty</span></span>
<span id="cb20-9"><a href="#cb20-9" aria-hidden="true" tabindex="-1"></a><span class="do">## IndianTreaty_English_UTF-8-BOM.txt 647 3085 150 IndianTreaty</span></span>
<span id="cb20-10"><a href="#cb20-10" aria-hidden="true" tabindex="-1"></a><span class="do">## UDHR_Arabic_ISO-8859-6.txt 753 1555 86 UDHR</span></span>
<span id="cb20-11"><a href="#cb20-11" aria-hidden="true" tabindex="-1"></a><span class="do">## UDHR_Arabic_UTF-8.txt 753 1555 86 UDHR</span></span>
<span id="cb20-12"><a href="#cb20-12" aria-hidden="true" tabindex="-1"></a><span class="do">## UDHR_Arabic_WINDOWS-1256.txt 753 1555 86 UDHR</span></span>
Expand Down
17 changes: 0 additions & 17 deletions man/docnames.Rd

This file was deleted.

17 changes: 0 additions & 17 deletions man/docvars.Rd

This file was deleted.

35 changes: 18 additions & 17 deletions tests/testthat/test-readtext.R
Original file line number Diff line number Diff line change
Expand Up @@ -2,7 +2,7 @@
# TODO: Check and remove extranous codes # TODO: recurse file listing for e.g. remote ZIP file
# TODO: readtext with csv doesn"t seem to require text_field

context("test readtext.R")
library("quanteda")

test_that("test readtext with single filename", {
fox <- c(fox.txt = "The quick brown fox jumps over the lazy dog.")
Expand Down Expand Up @@ -169,19 +169,19 @@ test_that("test csv files", {

test_that("test tab files", {
testreadtext <- readtext("../data/tab/test.tab", text_field = "text")
expect_that(
docvars(testreadtext),
equals(data.frame(list(colour = c("green", "red"), number = c(42, 99)),
stringsAsFactors = FALSE))
expect_equal(
docvars(corpus(testreadtext)),
data.frame(list(colour = c("green", "red"), number = c(42, 99)),
stringsAsFactors = FALSE)
)
expect_that(
as.character(testreadtext),
equals(c(test.tab.1 = "Lorem ipsum.", test.tab.2 = "Dolor sit"))
expect_equal(
testreadtext$text,
unname(c(test.tab.1 = "Lorem ipsum.", test.tab.2 = "Dolor sit"))
)

expect_error(
readtext("../data/tab/test.tab", text_field = "nonexistant"),
"There is no field called nonexistant"
readtext("../data/tab/test.tab", text_field = "nonexistent"),
"There is no field called nonexistent"
)

})
Expand Down Expand Up @@ -825,13 +825,14 @@ test_that("tests for ODS files", {
readtext("../data/ods/test.ods", text_field = "text"))),
c("The quick", "brown fox", "jumps over", "the lazy dog.")
)
expect_equal(
docvars(readtext("../data/ods/test.ods", text_field = "text")),
data.frame(list(
colour = c("orange", "blue", "pink", "pink"),
number = c(0, NA, NA, NA),
taste = c(NA, NA, "sweet", "umami")
), stringsAsFactors = FALSE)
expect_identical(
readtext("../data/ods/test.ods", text_field = "text"),
structure(list(doc_id = c("test.ods.1", "test.ods.2", "test.ods.3",
"test.ods.4"), text = c("The quick", "brown fox", "jumps over",
"the lazy dog."), colour = c("orange", "blue", "pink", "pink"
), number = c(0L, NA, NA, NA), taste = c(NA, NA, "sweet", "umami"
)), row.names = c(NA, -4L), class = c("readtext", "data.frame"
))
)

})
Expand Down

0 comments on commit c10710c

Please sign in to comment.