diff --git a/DESCRIPTION b/DESCRIPTION new file mode 100644 index 0000000..7e9f103 --- /dev/null +++ b/DESCRIPTION @@ -0,0 +1,23 @@ +Package: genCountR +Title: Interacting with Roberts and Utych's (2019) Gendered Language + Dictionary +Version: 1.0.0 +Authors@R: + person("Damon", "Roberts", , "damon.charles.roberts@gmail.com", role = c("aut", "cre"), + comment = c(ORCID = "0000-0002-4360-3675")) +Description: Allows users to generate a gendered language score according to the gendered language dictionary in Roberts and Utych (2019) . +License: MIT + file LICENSE +Encoding: UTF-8 +RoxygenNote: 7.2.3 +Depends: R (>= 2.10) +LazyData: true +Suggests: devtools, knitr, rmarkdown, testthat +VignetteBuilder: knitr +URL: https://gencounter.app.damoncroberts.com, + https://damoncharlesroberts.github.io/genCountR/ +NeedsCompilation: no +Packaged: 2023-12-05 00:37:50 UTC; dcr +Author: Damon Roberts [aut, cre] () +Maintainer: Damon Roberts +Repository: CRAN +Date/Publication: 2023-12-05 17:50:04 UTC diff --git a/LICENSE b/LICENSE new file mode 100644 index 0000000..224b0e6 --- /dev/null +++ b/LICENSE @@ -0,0 +1,2 @@ +YEAR: 2023 +COPYRIGHT HOLDER: genCountR authors diff --git a/MD5 b/MD5 new file mode 100644 index 0000000..f60b1fc --- /dev/null +++ b/MD5 @@ -0,0 +1,31 @@ +99aa1790afcd2fd3ec2db69637636e3c *DESCRIPTION +57eb66a14d63272aae4d53d18caf55f9 *LICENSE +f24b374b29900adc0b9e7c185d616119 *NAMESPACE +b765f6481b5e07835af5da7c50a6219a *R/dict.R +df6cc46bc7fae1a55b713f3d5065b35a *R/genCountR-package.R +1d57ac357049fbef5296852f25b3e272 *R/gen_count.R +1e0e9d71d0eb04f2fff2596b05518c9b *R/gen_score.R +7861d5546b952689049afa0b1a860aa6 *R/text_clean.R +81d94cf11fd9fde737f3b5123fc28f91 *R/word_count.R +3f9bcf526e32af4bc36db5ff5622d0f9 *README.md +439bf689fa27cf9affd0335332142165 *build/partial.rdb +142303c55feaab697db426ba88b0ec1b *build/vignette.rds +edf70d7dc769d86a8f9236738ce4bbbb *data/dict.rda +28022ec175d3d4803457da88ed0a078d *inst/doc/gen_count.R +42386983344f1fd6443025bac9a2b278 *inst/doc/gen_count.Rmd +76d1c5e3b730c2b94dbfe174eb6aaafb *inst/doc/gen_count.html +9364e385a39a7cd688b50d7a145ced41 *inst/doc/gen_score.R +98487cc4e52fd35ad5e7e6d1fc66bede *inst/doc/gen_score.Rmd +7985d9186596a36a40f5bb6753e6b6c4 *inst/doc/gen_score.html +0db43a890a369b941b461bd4bdfa7b8d *man/dict.Rd +fcff4adeefbbdd9d5b1305b3ac72581f *man/genCountR-package.Rd +bdcfb9d1c7c6727975f148a724efe1d8 *man/gen_count.Rd +21cd66b3097a82a5c8ea2eae7cfc4793 *man/gen_score.Rd +5c49d7fccd1d918bf03e9014a9f4d88f *man/text_clean.Rd +bb7ccdaae30ad4f6227128c2d026b8bc *man/word_count.Rd +2910db24b4be2d49f537dc589a6e855a *tests/testthat/test-gen-count.R +0a8d88c14d44dca66890f2198ad75721 *tests/testthat/test-gen-score.R +33d4edd9765607946fece4908ea8414a *tests/testthat/test-text-clean.R +8697ed5ff0ec37fb3471a3c4a45dfc27 *tests/testthat/test-word-count.R +42386983344f1fd6443025bac9a2b278 *vignettes/gen_count.Rmd +98487cc4e52fd35ad5e7e6d1fc66bede *vignettes/gen_score.Rmd diff --git a/NAMESPACE b/NAMESPACE new file mode 100644 index 0000000..40841ed --- /dev/null +++ b/NAMESPACE @@ -0,0 +1,4 @@ +# Generated by roxygen2: do not edit by hand + +export(gen_count) +export(gen_score) diff --git a/R/dict.R b/R/dict.R new file mode 100644 index 0000000..ef289c9 --- /dev/null +++ b/R/dict.R @@ -0,0 +1,15 @@ +#' @title +#' Data from Gendered Language Dictionary Developed by Roberts and Utych (2019) +#' +#' @description +#' Each word in the dataset contains a rating by human coders. See details of dataset in the original paper. +#' +#' @format A tibble with 701 rows and 15 columns: +#' \describe{ +#' \item{Word}{The word to match} +#' \item{POS}{Part Of Speech} +#' \item{mean.a}{Mean score provided by all participants} +#' \item{std.dev.a}{Standard deviation of score provided by all participants} +#' } +#' @source \url{https://journals.sagepub.com/doi/10.1177/1065912919874883} +"dict" \ No newline at end of file diff --git a/R/genCountR-package.R b/R/genCountR-package.R new file mode 100644 index 0000000..a65cf64 --- /dev/null +++ b/R/genCountR-package.R @@ -0,0 +1,6 @@ +#' @keywords internal +"_PACKAGE" + +## usethis namespace: start +## usethis namespace: end +NULL diff --git a/R/gen_count.R b/R/gen_count.R new file mode 100644 index 0000000..465b2f2 --- /dev/null +++ b/R/gen_count.R @@ -0,0 +1,48 @@ +#' @title gen_count +#' +#' @description +#' Counts the number of masculine and feminine words in the document +#' +#' @details +#' Takes the number of words that are loosely categorized as Masculine, Feminine, or Neutral based on Roberts and Utych's (2019) definition. +#' Feminine words had a score below 2.5, Neutral words had a score higher than 2.5 and lower than 5.5, Masculine words had a score higher than 5.5. +#' +#' @param +#' text (string): A string object. +#' +#' @return +#' data.frame with each word from the dictionary matched with the text and its number of occurances. +#' +#' @examples +#' text <- 'This person was a heroine due to their fighting during the war.' +#' result_df <- genCountR::gen_count(text) +#' +#' @export +gen_count <- function( + text +) { + # Clean the text in the document + word_list <- text_clean(text) + + # Execute the function to count words + df <- word_count(word_list) + + # Merge with dictionary for their score + df <- merge(df, genCountR::dict, how="inner", by.x = "word", by.y = "Word") + + # Create column that assigns label + result_df <- data.frame( + word = df$word + , count = df$count + , score = df$mean.a + ) + result_df["classified"] <- ifelse( + result_df$score < 2.5, "Feminine" # if the score is below 2.5, feminine + , ifelse( + result_df$score >= 5.5, "Masculine" # if the score is above or equal to 5.5, masculine + , "Neutral" # all other scores should be labelled neutral + ) + ) + # Return the dataframe + return(result_df) +} \ No newline at end of file diff --git a/R/gen_score.R b/R/gen_score.R new file mode 100644 index 0000000..638254f --- /dev/null +++ b/R/gen_score.R @@ -0,0 +1,64 @@ +#' @title gen_score +#' +#' @description +#' Calculates the score of the supplied text string based on the Gendered Language Dictionary created by Roberts and Utych (2019). +#' +#' @details +#' Takes the matched words and their occurrence in the supplied text, finds the score for those matched words in the dictionary, sums those scores up and then divides it by the total number of words in the dictionary. +#' +#' @param +#' text (string): A string object +#' +#' @return +#' list object with avg_score of the supplied text string, total_score of supplied text string, and data.frame of matches +#' +#' @examples +#' text <- 'Hero. hero Heroine. heroine, Prison. Prisom.' +#' result <- genCountR::gen_score(text) +#' result$avg_score +#' result$total_score +#' result$df +#' +#' @export +gen_score <- function( + text +) { + # Create empty list object + result <- list( + "avg_score" = numeric(0) + , "total_score" = numeric(0) + , "df" = data.frame( + "word" = character(0) + , "count" = integer(0) + , "score" = numeric(0) + , "total_score" = numeric(0) + ) + ) + # Clean the text in the document + word_list <- text_clean(text) + + # Execute the function to count words + df <- word_count(word_list) + + # Merge the score + df <- base::merge(df, genCountR::dict, how="inner", by.x = "word", by.y = "Word") + + # Create a data.frame for the score + result[["df"]] <- data.frame( + "word" = df["word"] + , "count" = df["count"] + , "score" = df["mean.a"] + ) + + # Calculate total row-wise score + result[["df"]]["total_score"] <- (result[["df"]]["count"] * result[["df"]]["mean.a"]) + + # Get the total score for the document + result[["total_score"]] <- sum(result[["df"]]["total_score"]) + + # Get the average score for the document + result[["avg_score"]] <- (result[["total_score"]]/length(word_list)) + + # Return result + return(result) +} \ No newline at end of file diff --git a/R/text_clean.R b/R/text_clean.R new file mode 100644 index 0000000..9c6bd27 --- /dev/null +++ b/R/text_clean.R @@ -0,0 +1,29 @@ +#' @title text_clean +#' +#' @description +#' Cleans the supplied text string and converts it into a list of individual words. +#' +#' @details +#' Takes the string, converts all the characters to lower case, removes punctuation, and splits the string into individual words. +#' +#' @param +#' text (string): A string object. +#' +#' @return +#' list of each word in all lower case and without punctuation. +#' +text_clean <- function( + text +){ + # Convert all characters to lower case + text_clean <- base::tolower(text) + + # Remove punctuation + text_clean <- base::gsub("[[:punct:]]", " ", text_clean) + + # Split into list + word_list <- base::strsplit(text_clean, "\\s+")[[1]] + + # Return word list + return(word_list) +} \ No newline at end of file diff --git a/R/word_count.R b/R/word_count.R new file mode 100644 index 0000000..a8c03cd --- /dev/null +++ b/R/word_count.R @@ -0,0 +1,35 @@ +#' @title word_count +#' +#' @description +#' Count number of times a particular word from the dictionary shows up in a document. +#' +#' @details +#' Takes the number of words that are loosely categorized as Masculine, Feminine, or Neutral based on Roberts and Utych's (2019) definition. +#' Feminine words had a score below 2.5, Neutral words had a score higher than 2.5 and lower than 5.5, Masculine words had a score higher than 5.5. +#' +#' @param +#' word_item (vector or array): A vector or array of words from the text wanting to be matched to dictionary. +#' +#' @return +#' data.frame object of with count of masculine, feminine, and masculine words. +#' +#' +word_count <- function( + word_item +) { + # Filter the words that are in the document + unique_words <- unique(word_item) + matched <- unique_words[unique_words %in% genCountR::dict$Word] + + # Count number of times the word shows up + count <- sapply(matched, function(w) sum(word_item == w)) + + # Create a dataframe for this current row + row_df <- base::data.frame( + word = matched + , count = count + ) + + # Return the row + return(row_df) +} \ No newline at end of file diff --git a/README.md b/README.md new file mode 100644 index 0000000..547d9e0 --- /dev/null +++ b/README.md @@ -0,0 +1,36 @@ +

genCountR R Package

+

+ + GitHub last commit + + GitHub issues + + GitHub pull requests + +

+ +I developed this R package for researchers and other users to be able to utilize the Gendered Language Dictionary developed by [Roberts and Utych (2019)](https://journals.sagepub.com/doi/full/10.1177/1065912919874883). + +The package allows users to take a loaded document in R and will then count the number of words contained in the document, and it will then create a score based on the average score of all of the words in the document that matches with those in the Gendered Language Dictionary. + +This package is a quick side project that I worked on during my dissertation. So use at your own risk. + +If there are issues with the package or if you'd like to request new features, please do so on the issues tab of this repository. + +### Vignettes + +See [https://gencounter.app.damoncroberts.com](https://gencounter.app.damoncroberts.com) + +### Webapp + +The alternate webapp for this R package has been depreciated. + +### Project Contributors: + + diff --git a/build/partial.rdb b/build/partial.rdb new file mode 100644 index 0000000..c7c2cee Binary files /dev/null and b/build/partial.rdb differ diff --git a/build/vignette.rds b/build/vignette.rds new file mode 100644 index 0000000..98f046f Binary files /dev/null and b/build/vignette.rds differ diff --git a/data/dict.rda b/data/dict.rda new file mode 100644 index 0000000..3e95fa0 Binary files /dev/null and b/data/dict.rda differ diff --git a/inst/doc/gen_count.R b/inst/doc/gen_count.R new file mode 100644 index 0000000..7881d54 --- /dev/null +++ b/inst/doc/gen_count.R @@ -0,0 +1,16 @@ +## ----include = FALSE---------------------------------------------------------- +knitr::opts_chunk$set( + collapse = TRUE, + comment = "#>" +) + +## ----------------------------------------------------------------------------- +# Load the package +library(genCountR) + +# Pass a string to be analyzed +str <- "This person was a hero. They were a prisoner of war and I, as President, got them out." + +# Use the gen_count() function on the str +gen_count(str) + diff --git a/inst/doc/gen_count.Rmd b/inst/doc/gen_count.Rmd new file mode 100644 index 0000000..af07ec0 --- /dev/null +++ b/inst/doc/gen_count.Rmd @@ -0,0 +1,32 @@ +--- +title: "gen_count" +output: rmarkdown::html_vignette +vignette: > + %\VignetteIndexEntry{gen_count} + %\VignetteEngine{knitr::rmarkdown} + %\VignetteEncoding{UTF-8} +--- + +```{r, include = FALSE} +knitr::opts_chunk$set( + collapse = TRUE, + comment = "#>" +) +``` + +The `gen_count` function is a really helpful function for those who may not necessarily want to generate a score based on the Gendered Language Dictionary in Roberts and Utych ([2019](https://journals.sagepub.com/doi/10.1177/1065912919874883)). Rather, the function will enable you to simply count the occurances of a word in your supplied text with that of the dictionary along with the corresponding score of that word in the Dicitionary and whether it would be classified as Masculine, Neutral, or Feminine as according to Robert and Utych's ([2019](https://journals.sagepub.com/doi/10.1177/1065912919874883)) definition in the original paper. + +First, you should supply some text as a string to the function. Then, once you execute the function, it should return a `data.frame` with 4 columns: the word that was matched, the count of occurances the word appeared in your supplied text, the score of that word according to the dictionary, and the classification of the word. + +Here is an example of how that function works. + +```{r} +# Load the package +library(genCountR) + +# Pass a string to be analyzed +str <- "This person was a hero. They were a prisoner of war and I, as President, got them out." + +# Use the gen_count() function on the str +gen_count(str) +``` diff --git a/inst/doc/gen_count.html b/inst/doc/gen_count.html new file mode 100644 index 0000000..4c593e3 --- /dev/null +++ b/inst/doc/gen_count.html @@ -0,0 +1,387 @@ + + + + + + + + + + + + + + +gen_count + + + + + + + + + + + + + + + + + + + + + + + + + + +

gen_count

+ + + +

The gen_count function is a really helpful function for +those who may not necessarily want to generate a score based on the +Gendered Language Dictionary in Roberts and Utych (2019). +Rather, the function will enable you to simply count the occurances of a +word in your supplied text with that of the dictionary along with the +corresponding score of that word in the Dicitionary and whether it would +be classified as Masculine, Neutral, or Feminine as according to Robert +and Utych’s (2019) +definition in the original paper.

+

First, you should supply some text as a string to the function. Then, +once you execute the function, it should return a +data.frame with 4 columns: the word that was matched, the +count of occurances the word appeared in your supplied text, the score +of that word according to the dictionary, and the classification of the +word.

+

Here is an example of how that function works.

+
# Load the package
+library(genCountR)
+
+# Pass a string to be analyzed
+str <- "This person was a hero. They were a prisoner of war and I, as President, got them out."
+
+# Use the gen_count() function on the str
+gen_count(str)
+#>       word count    score classified
+#> 1     hero     1 5.615385  Masculine
+#> 2 prisoner     1 5.272727    Neutral
+ + + + + + + + + + + diff --git a/inst/doc/gen_score.R b/inst/doc/gen_score.R new file mode 100644 index 0000000..19afa98 --- /dev/null +++ b/inst/doc/gen_score.R @@ -0,0 +1,16 @@ +## ----include = FALSE---------------------------------------------------------- +knitr::opts_chunk$set( + collapse = TRUE, + comment = "#>" +) + +## ----------------------------------------------------------------------------- +# Load the package +library(genCountR) + +# Pass a string to be analyzed +str <- "This person was a hero. They were a prisoner of war and I, as President, got them out." + +# Use the gen_count() function on the str +gen_score(str) + diff --git a/inst/doc/gen_score.Rmd b/inst/doc/gen_score.Rmd new file mode 100644 index 0000000..f532483 --- /dev/null +++ b/inst/doc/gen_score.Rmd @@ -0,0 +1,30 @@ +--- +title: "gen_score" +output: rmarkdown::html_vignette +vignette: > + %\VignetteIndexEntry{gen_score} + %\VignetteEngine{knitr::rmarkdown} + %\VignetteEncoding{UTF-8} +--- + +```{r, include = FALSE} +knitr::opts_chunk$set( + collapse = TRUE, + comment = "#>" +) +``` + +To find an overall score of the “genderedness” of some text, you can use the genCountR::gen_score() function. This function takes the count of words that match with those in the dictionary, sum up the scores of all of those matched occurrences and then calculates the average “document” score by dividing by the number of words passed in the string (“document”). + +It also returns more information than the average score. It will return a list object including the average score (avg_score), but also the total score total_score for the document before normalized based on the length of text, but also the data.frame (df) that reports which words were matched, how often they occurred, the score, and the total score provided by each word. + +```{r} +# Load the package +library(genCountR) + +# Pass a string to be analyzed +str <- "This person was a hero. They were a prisoner of war and I, as President, got them out." + +# Use the gen_count() function on the str +gen_score(str) +``` \ No newline at end of file diff --git a/inst/doc/gen_score.html b/inst/doc/gen_score.html new file mode 100644 index 0000000..dd99e87 --- /dev/null +++ b/inst/doc/gen_score.html @@ -0,0 +1,390 @@ + + + + + + + + + + + + + + +gen_score + + + + + + + + + + + + + + + + + + + + + + + + + + +

gen_score

+ + + +

To find an overall score of the “genderedness” of some text, you can +use the genCountR::gen_score() function. This function takes the count +of words that match with those in the dictionary, sum up the scores of +all of those matched occurrences and then calculates the average +“document” score by dividing by the number of words passed in the string +(“document”).

+

It also returns more information than the average score. It will +return a list object including the average score (avg_score), but also +the total score total_score for the document before normalized based on +the length of text, but also the data.frame (df) that reports which +words were matched, how often they occurred, the score, and the total +score provided by each word.

+
# Load the package
+library(genCountR)
+
+# Pass a string to be analyzed
+str <- "This person was a hero. They were a prisoner of war and I, as President, got them out."
+
+# Use the gen_count() function on the str
+gen_score(str)
+#> $avg_score
+#> [1] 0.6048951
+#> 
+#> $total_score
+#> [1] 10.88811
+#> 
+#> $df
+#>       word count   mean.a total_score
+#> 1     hero     1 5.615385    5.615385
+#> 2 prisoner     1 5.272727    5.272727
+ + + + + + + + + + + diff --git a/man/dict.Rd b/man/dict.Rd new file mode 100644 index 0000000..1d6f5eb --- /dev/null +++ b/man/dict.Rd @@ -0,0 +1,25 @@ +% Generated by roxygen2: do not edit by hand +% Please edit documentation in R/dict.R +\docType{data} +\name{dict} +\alias{dict} +\title{Data from Gendered Language Dictionary Developed by Roberts and Utych (2019)} +\format{ +A tibble with 701 rows and 15 columns: +\describe{ +\item{Word}{The word to match} +\item{POS}{Part Of Speech} +\item{mean.a}{Mean score provided by all participants} +\item{std.dev.a}{Standard deviation of score provided by all participants} +} +} +\source{ +\url{https://journals.sagepub.com/doi/10.1177/1065912919874883} +} +\usage{ +dict +} +\description{ +Each word in the dataset contains a rating by human coders. See details of dataset in the original paper. +} +\keyword{datasets} diff --git a/man/genCountR-package.Rd b/man/genCountR-package.Rd new file mode 100644 index 0000000..8db9ea2 --- /dev/null +++ b/man/genCountR-package.Rd @@ -0,0 +1,23 @@ +% Generated by roxygen2: do not edit by hand +% Please edit documentation in R/genCountR-package.R +\docType{package} +\name{genCountR-package} +\alias{genCountR} +\alias{genCountR-package} +\title{genCountR: Interacting with Roberts and Utych's (2019) Gendered Language Dictionary} +\description{ +Allows users to generate a gendered language score according to the gendered language dictionary in Roberts and Utych (2019) \doi{10.1177/1065912919874883}. +} +\seealso{ +Useful links: +\itemize{ + \item \url{https://gencounter.app.damoncroberts.com} + \item \url{https://damoncharlesroberts.github.io/genCountR/} +} + +} +\author{ +\strong{Maintainer}: Damon Roberts \email{damon.charles.roberts@gmail.com} (\href{https://orcid.org/0000-0002-4360-3675}{ORCID}) + +} +\keyword{internal} diff --git a/man/gen_count.Rd b/man/gen_count.Rd new file mode 100644 index 0000000..d4ee64c --- /dev/null +++ b/man/gen_count.Rd @@ -0,0 +1,26 @@ +% Generated by roxygen2: do not edit by hand +% Please edit documentation in R/gen_count.R +\name{gen_count} +\alias{gen_count} +\title{gen_count} +\usage{ +gen_count(text) +} +\arguments{ +\item{text}{(string): A string object.} +} +\value{ +data.frame with each word from the dictionary matched with the text and its number of occurances. +} +\description{ +Counts the number of masculine and feminine words in the document +} +\details{ +Takes the number of words that are loosely categorized as Masculine, Feminine, or Neutral based on Roberts and Utych's (2019) definition. +Feminine words had a score below 2.5, Neutral words had a score higher than 2.5 and lower than 5.5, Masculine words had a score higher than 5.5. +} +\examples{ +text <- 'This person was a heroine due to their fighting during the war.' +result_df <- genCountR::gen_count(text) + +} diff --git a/man/gen_score.Rd b/man/gen_score.Rd new file mode 100644 index 0000000..f4b3002 --- /dev/null +++ b/man/gen_score.Rd @@ -0,0 +1,28 @@ +% Generated by roxygen2: do not edit by hand +% Please edit documentation in R/gen_score.R +\name{gen_score} +\alias{gen_score} +\title{gen_score} +\usage{ +gen_score(text) +} +\arguments{ +\item{text}{(string): A string object} +} +\value{ +list object with avg_score of the supplied text string, total_score of supplied text string, and data.frame of matches +} +\description{ +Calculates the score of the supplied text string based on the Gendered Language Dictionary created by Roberts and Utych (2019). +} +\details{ +Takes the matched words and their occurrence in the supplied text, finds the score for those matched words in the dictionary, sums those scores up and then divides it by the total number of words in the dictionary. +} +\examples{ +text <- 'Hero. hero Heroine. heroine, Prison. Prisom.' +result <- genCountR::gen_score(text) +result$avg_score +result$total_score +result$df + +} diff --git a/man/text_clean.Rd b/man/text_clean.Rd new file mode 100644 index 0000000..a38d2e3 --- /dev/null +++ b/man/text_clean.Rd @@ -0,0 +1,20 @@ +% Generated by roxygen2: do not edit by hand +% Please edit documentation in R/text_clean.R +\name{text_clean} +\alias{text_clean} +\title{text_clean} +\usage{ +text_clean(text) +} +\arguments{ +\item{text}{(string): A string object.} +} +\value{ +list of each word in all lower case and without punctuation. +} +\description{ +Cleans the supplied text string and converts it into a list of individual words. +} +\details{ +Takes the string, converts all the characters to lower case, removes punctuation, and splits the string into individual words. +} diff --git a/man/word_count.Rd b/man/word_count.Rd new file mode 100644 index 0000000..c6c2b6f --- /dev/null +++ b/man/word_count.Rd @@ -0,0 +1,21 @@ +% Generated by roxygen2: do not edit by hand +% Please edit documentation in R/word_count.R +\name{word_count} +\alias{word_count} +\title{word_count} +\usage{ +word_count(word_item) +} +\arguments{ +\item{word_item}{(vector or array): A vector or array of words from the text wanting to be matched to dictionary.} +} +\value{ +data.frame object of with count of masculine, feminine, and masculine words. +} +\description{ +Count number of times a particular word from the dictionary shows up in a document. +} +\details{ +Takes the number of words that are loosely categorized as Masculine, Feminine, or Neutral based on Roberts and Utych's (2019) definition. +Feminine words had a score below 2.5, Neutral words had a score higher than 2.5 and lower than 5.5, Masculine words had a score higher than 5.5. +} diff --git a/tests/testthat/test-gen-count.R b/tests/testthat/test-gen-count.R new file mode 100644 index 0000000..dcbfe56 --- /dev/null +++ b/tests/testthat/test-gen-count.R @@ -0,0 +1,30 @@ +# Title: test of gen_count + +# Notes + #* Description + #** Testing Script for the gen_count() function + #* Updated + #** 2023-12-01 + #** dcr + +# Setup + #* Load handy functions for testing +library(testthat) +devtools::load_all() + +# Text for testing +text <- 'This person was a heroine due to their fighting during the war.' + +# Testing + #* Check that it returns dataframe +test_that("Check that it returns dataframe", { + expect_s3_class(gen_count(text), "data.frame") +}) + #* Check that it returns 3 rows +test_that("Check that it returns three rows", { + expect_true(nrow(gen_count(text)) == 3) +}) + #* Check that it returns 4 columns +test_that("Check that it returns four columns", { + expect_length(gen_count(text), 4) +}) \ No newline at end of file diff --git a/tests/testthat/test-gen-score.R b/tests/testthat/test-gen-score.R new file mode 100644 index 0000000..05165bc --- /dev/null +++ b/tests/testthat/test-gen-score.R @@ -0,0 +1,38 @@ +# Title: test of gen_count + +# Notes + #* Description + #** Testing Script for the gen_count() function + #* Updated + #** 2023-12-02 + #** dcr + +# Setup + #* Load handy functions for testing +library(testthat) +devtools::load_all() + +# Text for testing +text <- 'Hero. hero Heroine. heroine, Prison. Prisom.' + +# Testing + #* Check that it returns a list object +test_that("check that it returns a list object", { + expect_type(gen_score(text), "list") +}) + #* Check that it returns 3 elements in the list +test_that("check that it returns three elements", { + expect_length(gen_score(text), 3) +}) + #* Check that the first element returns a numeric +test_that("check that the first element is numeric", { + expect_type(gen_score(text)[[1]], "double") +}) + #* Check that the second element returns a numeric +test_that("check that the second element is numeric", { + expect_type(gen_score(text)[[2]], "double") +}) + #* Check that the third element returns a data.frame +test_that("check that the third element is data.frame", { + expect_s3_class(gen_score(text)[[3]], "data.frame") +}) diff --git a/tests/testthat/test-text-clean.R b/tests/testthat/test-text-clean.R new file mode 100644 index 0000000..07d5ae5 --- /dev/null +++ b/tests/testthat/test-text-clean.R @@ -0,0 +1,26 @@ +# Title: test of gen_count + +# Notes + #* Description + #** Testing Script for the gen_count() function + #* Updated + #** 2023-12-02 + #** dcr + +# Setup + #* Load handy functions for testing +library(testthat) +devtools::load_all() + +# Text for testing +text <- 'Hero. hero Heroine. heroine, Prison. Prisom.' + +# Tests + #* Check that it returns list object +test_that("check that it returns list object", { + expect_type(text_clean(text), "character") +}) + #* Check that it is of length 6 +test_that("Check that it is of length 6", { + expect_length(text_clean(text), 6) +}) diff --git a/tests/testthat/test-word-count.R b/tests/testthat/test-word-count.R new file mode 100644 index 0000000..f2fbe2f --- /dev/null +++ b/tests/testthat/test-word-count.R @@ -0,0 +1,27 @@ +# Title: test of gen_count + +# Notes + #* Description + #** Testing Script for the gen_count() function + #* Updated + #** 2023-12-01 + #** dcr + +# Setup + #* Load handy functions for testing +library(testthat) +devtools::load_all() + +# Text for testing +word_list <- c("hero", "heroine", "prison") + +# Tests + #* Correct type returned +test_that("Check to make sure that it returns a data.frame", +{ + expect_s3_class(word_count(word_list), "data.frame") +}) + #* Check contents of it +test_that("Check that there are three rows", { + expect_true(nrow(word_count(word_list)) == 3) +}) diff --git a/vignettes/gen_count.Rmd b/vignettes/gen_count.Rmd new file mode 100644 index 0000000..af07ec0 --- /dev/null +++ b/vignettes/gen_count.Rmd @@ -0,0 +1,32 @@ +--- +title: "gen_count" +output: rmarkdown::html_vignette +vignette: > + %\VignetteIndexEntry{gen_count} + %\VignetteEngine{knitr::rmarkdown} + %\VignetteEncoding{UTF-8} +--- + +```{r, include = FALSE} +knitr::opts_chunk$set( + collapse = TRUE, + comment = "#>" +) +``` + +The `gen_count` function is a really helpful function for those who may not necessarily want to generate a score based on the Gendered Language Dictionary in Roberts and Utych ([2019](https://journals.sagepub.com/doi/10.1177/1065912919874883)). Rather, the function will enable you to simply count the occurances of a word in your supplied text with that of the dictionary along with the corresponding score of that word in the Dicitionary and whether it would be classified as Masculine, Neutral, or Feminine as according to Robert and Utych's ([2019](https://journals.sagepub.com/doi/10.1177/1065912919874883)) definition in the original paper. + +First, you should supply some text as a string to the function. Then, once you execute the function, it should return a `data.frame` with 4 columns: the word that was matched, the count of occurances the word appeared in your supplied text, the score of that word according to the dictionary, and the classification of the word. + +Here is an example of how that function works. + +```{r} +# Load the package +library(genCountR) + +# Pass a string to be analyzed +str <- "This person was a hero. They were a prisoner of war and I, as President, got them out." + +# Use the gen_count() function on the str +gen_count(str) +``` diff --git a/vignettes/gen_score.Rmd b/vignettes/gen_score.Rmd new file mode 100644 index 0000000..f532483 --- /dev/null +++ b/vignettes/gen_score.Rmd @@ -0,0 +1,30 @@ +--- +title: "gen_score" +output: rmarkdown::html_vignette +vignette: > + %\VignetteIndexEntry{gen_score} + %\VignetteEngine{knitr::rmarkdown} + %\VignetteEncoding{UTF-8} +--- + +```{r, include = FALSE} +knitr::opts_chunk$set( + collapse = TRUE, + comment = "#>" +) +``` + +To find an overall score of the “genderedness” of some text, you can use the genCountR::gen_score() function. This function takes the count of words that match with those in the dictionary, sum up the scores of all of those matched occurrences and then calculates the average “document” score by dividing by the number of words passed in the string (“document”). + +It also returns more information than the average score. It will return a list object including the average score (avg_score), but also the total score total_score for the document before normalized based on the length of text, but also the data.frame (df) that reports which words were matched, how often they occurred, the score, and the total score provided by each word. + +```{r} +# Load the package +library(genCountR) + +# Pass a string to be analyzed +str <- "This person was a hero. They were a prisoner of war and I, as President, got them out." + +# Use the gen_count() function on the str +gen_score(str) +``` \ No newline at end of file