Skip to content

Commit

Permalink
version 0.4.0
Browse files Browse the repository at this point in the history
  • Loading branch information
jonasroettger authored and cran-robot committed Jun 24, 2023
1 parent ba5515c commit 43c8ac9
Show file tree
Hide file tree
Showing 3 changed files with 14 additions and 14 deletions.
6 changes: 3 additions & 3 deletions DESCRIPTION
Original file line number Diff line number Diff line change
@@ -1,7 +1,7 @@
Package: disclosuR
Type: Package
Title: Text Conversion from Nexis Uni PDFs to R Data Frames
Version: 0.0.1.0
Version: 0.4.0
Date: 2023-06-05
Authors@R:
person("Jonas", "Röttger", role = c("aut", "cre"),
Expand All @@ -13,8 +13,8 @@ Imports: dplyr, lubridate, pdftools, qdap, SentimentAnalysis, stringi,
Encoding: UTF-8
RoxygenNote: 7.2.3
NeedsCompilation: no
Packaged: 2023-06-13 14:07:12 UTC; U711123
Packaged: 2023-06-24 15:42:48 UTC; U711123
Author: Jonas Röttger [aut, cre]
Maintainer: Jonas Röttger <jonas.roettger@gmx.net>
Repository: CRAN
Date/Publication: 2023-06-13 16:40:02 UTC
Date/Publication: 2023-06-24 16:00:02 UTC
4 changes: 2 additions & 2 deletions MD5
Original file line number Diff line number Diff line change
@@ -1,6 +1,6 @@
164578cd964cf380ab6d0270959be74f *DESCRIPTION
771e3232e37f1abda5a7e28d1694fe70 *DESCRIPTION
31e161b8095f5a667f6904a78e3deea2 *NAMESPACE
40c46d732b68cd9ceed66bf75795c1d3 *R/2023-04-24_DisclosuR.R
781862e90313add3d2bdca84bbf24acc *R/2023-04-24_DisclosuR.R
259d5342067a5ba2341fab15c16f1459 *inst/WORDLIST.txt
f470cc088223f949820f220ac0729ca3 *inst/examples/earnings_calls/earnings_example_01.pdf
197018204b535d1316a02044aff6bf63 *inst/examples/earnings_calls/earnings_example_02.pdf
Expand Down
18 changes: 9 additions & 9 deletions R/2023-04-24_DisclosuR.R
Original file line number Diff line number Diff line change
Expand Up @@ -58,7 +58,7 @@ conference_call_segmenter <- function(file,

# get end of date and date
date_end <- "Copyright"
date <- stringr::str_match(str_replace_all(str_squish(text), "[\r\n]" , ""), paste("Wire", "\\s*(.*?)\\s*", date_end, sep = ""))[[2]]
date <- stringr::str_match(stringr::str_replace_all(str_squish(text), "[\r\n]" , ""), paste("Wire", "\\s*(.*?)\\s*", date_end, sep = ""))[[2]]

# convert the string to a date variable
date <- as.Date(date, "%B %d, %Y %A")
Expand Down Expand Up @@ -625,9 +625,9 @@ newswire_segmenter <- function(file,


# reformat text
text <- str_replace_all(text, "[\r\n]" , " ")
text <- str_replace_all(text, "[\r\n]" , " ")
text <- str_squish(text)
text <- stringr::str_replace_all(text, "[\r\n]" , " ")
text <- stringr::str_replace_all(text, "[\r\n]" , " ")
text <- stringr::str_squish(text)

# get newswire
newswires <- c("Canada NewsWire", "PR Newswire", "ENP Newswire", "States News Service", "Marketwire",
Expand Down Expand Up @@ -664,10 +664,10 @@ newswire_segmenter <- function(file,

# get end of date and date
date_end <- "Copyright"
date <- str_match(text, paste(newswire, "\\s*(.*?)\\s*", date_end, sep = ""))[[2]]
date <- stringr::str_match(text, paste(newswire, "\\s*(.*?)\\s*", date_end, sep = ""))[[2]]

# convert data to real date
date <- str_split(date, pattern = week_days)[[1]][1]
date <- stringr::str_split(date, pattern = week_days)[[1]][1]
date <- as.character(date)
date <- as.Date(date, format = "%B %d, %Y")

Expand Down Expand Up @@ -832,7 +832,7 @@ newswire_segmenter <- function(file,
category <- keywords$Category[j]
keywords_list <- unlist(strsplit(keywords$Keywords[j], "\\|"))
# count the number of matches in the text column of press_data_temp
count <- sum(str_count(press_data_temp$preprocessed_title[i], stringr::regex(keywords_list, ignore_case = TRUE)))
count <- sum(stringr::str_count(press_data_temp$preprocessed_title[i], stringr::regex(keywords_list, ignore_case = TRUE)))
# store the count for this category
counts[j] <- count
# update the category column in press_data_temp
Expand All @@ -843,7 +843,7 @@ newswire_segmenter <- function(file,

# add the most frequent column name to a new column
# Create new column to store column names with highest values
press_data_temp$category_Graffin <- apply(press_data_temp[, 34:ncol(press_data_temp)], 1, function(row) {
press_data_temp$category_Graffin <- apply(press_data_temp[, which(names(press_data_temp) == "preprocessed_title"):ncol(press_data_temp)], 1, function(row) {
# Check if all values in the row are zero
if(all(row == 0)){
return("Others")
Expand Down Expand Up @@ -893,7 +893,7 @@ newswire_segmenter <- function(file,

# Use grepl() to check if any of the terms are found in category_Graffin
press_data_temp <- press_data_temp %>%
mutate(valence_category = ifelse(
dplyr::mutate(valence_category = ifelse(
grepl(terms_positive, .data$category_Graffin), "positive",
ifelse(grepl(terms_negative, .data$category_Graffin), "negative",
ifelse(grepl(terms_neutral, .data$category_Graffin), "neutral",
Expand Down

0 comments on commit 43c8ac9

Please sign in to comment.