version 0.4.0

cran · Jun 24, 2023 · 43c8ac9 · 43c8ac9
1 parent ba5515c
commit 43c8ac9
Show file tree

Hide file tree

Showing 3 changed files with 14 additions and 14 deletions.
diff --git a/DESCRIPTION b/DESCRIPTION
@@ -1,7 +1,7 @@
 Package: disclosuR
 Type: Package
 Title: Text Conversion from Nexis Uni PDFs to R Data Frames
-Version: 0.0.1.0
+Version: 0.4.0
 Date: 2023-06-05
 Authors@R: 
   person("Jonas", "Röttger", role = c("aut", "cre"), 
@@ -13,8 +13,8 @@ Imports: dplyr, lubridate, pdftools, qdap, SentimentAnalysis, stringi,
 Encoding: UTF-8
 RoxygenNote: 7.2.3
 NeedsCompilation: no
-Packaged: 2023-06-13 14:07:12 UTC; U711123
+Packaged: 2023-06-24 15:42:48 UTC; U711123
 Author: Jonas Röttger [aut, cre]
 Maintainer: Jonas Röttger <jonas.roettger@gmx.net>
 Repository: CRAN
-Date/Publication: 2023-06-13 16:40:02 UTC
+Date/Publication: 2023-06-24 16:00:02 UTC
diff --git a/MD5 b/MD5
@@ -1,6 +1,6 @@
-164578cd964cf380ab6d0270959be74f *DESCRIPTION
+771e3232e37f1abda5a7e28d1694fe70 *DESCRIPTION
 31e161b8095f5a667f6904a78e3deea2 *NAMESPACE
-40c46d732b68cd9ceed66bf75795c1d3 *R/2023-04-24_DisclosuR.R
+781862e90313add3d2bdca84bbf24acc *R/2023-04-24_DisclosuR.R
 259d5342067a5ba2341fab15c16f1459 *inst/WORDLIST.txt
 f470cc088223f949820f220ac0729ca3 *inst/examples/earnings_calls/earnings_example_01.pdf
 197018204b535d1316a02044aff6bf63 *inst/examples/earnings_calls/earnings_example_02.pdf

diff --git a/R/2023-04-24_DisclosuR.R b/R/2023-04-24_DisclosuR.R
@@ -58,7 +58,7 @@ conference_call_segmenter <- function(file,
 
     # get end of date and date
     date_end <- "Copyright"
-    date <- stringr::str_match(str_replace_all(str_squish(text), "[\r\n]" , ""), paste("Wire", "\\s*(.*?)\\s*", date_end, sep = ""))[[2]]
+    date <- stringr::str_match(stringr::str_replace_all(str_squish(text), "[\r\n]" , ""), paste("Wire", "\\s*(.*?)\\s*", date_end, sep = ""))[[2]]
 
     # convert the string to a date variable
     date <- as.Date(date, "%B %d, %Y %A")
@@ -625,9 +625,9 @@ newswire_segmenter <- function(file,
 
 
     # reformat text
-    text <- str_replace_all(text, "[\r\n]" , " ")
-    text <- str_replace_all(text, "[\r\n]" , " ")
-    text <- str_squish(text)
+    text <- stringr::str_replace_all(text, "[\r\n]" , " ")
+    text <- stringr::str_replace_all(text, "[\r\n]" , " ")
+    text <- stringr::str_squish(text)
 
     # get newswire
     newswires <- c("Canada NewsWire", "PR Newswire", "ENP Newswire", "States News Service", "Marketwire",
@@ -664,10 +664,10 @@ newswire_segmenter <- function(file,
 
     # get end of date and date
     date_end <- "Copyright"
-    date <- str_match(text, paste(newswire, "\\s*(.*?)\\s*", date_end, sep = ""))[[2]]
+    date <- stringr::str_match(text, paste(newswire, "\\s*(.*?)\\s*", date_end, sep = ""))[[2]]
 
     # convert data to real date
-    date <- str_split(date, pattern = week_days)[[1]][1]
+    date <- stringr::str_split(date, pattern = week_days)[[1]][1]
     date <- as.character(date)
     date <- as.Date(date, format = "%B %d, %Y")
 
@@ -832,7 +832,7 @@ newswire_segmenter <- function(file,
           category <- keywords$Category[j]
           keywords_list <- unlist(strsplit(keywords$Keywords[j], "\\|"))
           # count the number of matches in the text column of press_data_temp
-          count <- sum(str_count(press_data_temp$preprocessed_title[i], stringr::regex(keywords_list, ignore_case = TRUE)))
+          count <- sum(stringr::str_count(press_data_temp$preprocessed_title[i], stringr::regex(keywords_list, ignore_case = TRUE)))
           # store the count for this category
           counts[j] <- count
           # update the category column in press_data_temp
@@ -843,7 +843,7 @@ newswire_segmenter <- function(file,
 
       # add the most frequent column name to a new column
       # Create new column to store column names with highest values
-      press_data_temp$category_Graffin <- apply(press_data_temp[, 34:ncol(press_data_temp)], 1, function(row) {
+      press_data_temp$category_Graffin <- apply(press_data_temp[, which(names(press_data_temp) == "preprocessed_title"):ncol(press_data_temp)], 1, function(row) {
           # Check if all values in the row are zero
           if(all(row == 0)){
             return("Others")
@@ -893,7 +893,7 @@ newswire_segmenter <- function(file,
 
     # Use grepl() to check if any of the terms are found in category_Graffin
     press_data_temp <- press_data_temp %>%
-      mutate(valence_category = ifelse(
+      dplyr::mutate(valence_category = ifelse(
         grepl(terms_positive, .data$category_Graffin), "positive",
         ifelse(grepl(terms_negative, .data$category_Graffin), "negative",
                ifelse(grepl(terms_neutral, .data$category_Graffin), "neutral",