/
get_lu_table.R
96 lines (90 loc) · 4.79 KB
/
get_lu_table.R
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
#' Retrieve the (top-n most frequent) Lexical Units of a metaphor
#'
#' @description The function is designed to easily retrieve a tibble data frame of the top-n most frequent Lexical Units (LU) of the prominent metaphors discussed in Rajeg (2019, Chapter 5 and 6).
#' @param metaphor regular expressions for the relevant metaphor.
#' @param top_n_only logical; the default is \code{TRUE} with the number specified in \code{top_n_limit} argument. If \code{FALSE}, it prints all the LU for the metaphor.
#' @param top_n_limit integer; how many most frequent LU should be printed? The default is \code{10}.
#' @param df the data frame for the thesis (\code{phd_data_metaphor}). This argument is not put first because the main attention when using the function is on varying the metaphor (hence, coming first) rather than on varying the database.
#' @param submapping_perc logical; wheter to include the percentages of the LU by submappings. If \code{TRUE}, the function only print out the percentages of the LU by submappings.
#' To include the submapping types, specify the \code{incl_submappings} argument to \code{TRUE} as well (the default is \code{FALSE}).
#' @param incl_submappings logical; whether to include the submappings inferred from the LU (\code{TRUE}) or not (\code{FALSE} -- the default).
#' @return A tbl_df.
#' @examples
#' get_lu_table(metaphor = "desired goal$", df = phd_data_metaphor)
#' @importFrom stringr regex
#' @importFrom dplyr top_n
#' @importFrom dplyr %>%
#' @importFrom dplyr rename
#' @importFrom rlang .data
#' @importFrom rlang :=
#' @export
#' @references Rajeg, G. P. W. (2019). \emph{Metaphorical profiles and near-synonyms: A corpus-based study of Indonesian words for HAPPINESS}. PhD Thesis. Monash University. Melbourne, Australia. \url{https://doi.org/10.26180/5cac231a97fb1}.
#'
get_lu_table <- function(metaphor = "regular expressions",
top_n_only = TRUE,
top_n_limit = 10,
df = NULL,
submapping_perc = FALSE,
incl_submappings = FALSE) {
assertthat::assert_that(!is.null(df), msg = "The `df` argument is NULL; please specify it with `phd_data_metaphor`!")
perc_overall <- dplyr::quo(perc_overall)
perc_by_submappings <- dplyr::quo(perc_by_submappings)
if (submapping_perc == FALSE) {
out <- df %>%
dplyr::filter(stringr::str_detect(.data$metaphors, stringr::regex(metaphor, ignore_case = TRUE))) %>%
dplyr::count(.data$lu, .data$lu_gloss, sort = TRUE) %>%
dplyr::select(Lexical_units = .data$lu, Gloss = .data$lu_gloss, n = n) %>%
dplyr::mutate(Lexical_units = stringr::str_c("*", .data$Lexical_units, "*", sep = ""),
Perc_overall = round(n/sum(n)*100, 2))
if (top_n_only==TRUE) {
out <- out %>%
dplyr::top_n(top_n_limit, n) %>%
dplyr::rename(N = n)
return(out)
} else {
out <- out %>%
dplyr::rename(N = n)
return(out)
}
} else { # if one wishes to include the percentage of the submappings of a metaphor
out <- df %>%
dplyr::filter(stringr::str_detect(.data$metaphors, stringr::regex(metaphor, ignore_case = TRUE))) %>%
dplyr::count(.data$submappings, .data$lu, .data$lu_gloss, sort = TRUE) %>%
dplyr::mutate(!!dplyr::quo_name(perc_overall) := round(n/sum(n)*100, 2)) %>%
dplyr::group_by(.data$submappings) %>%
dplyr::mutate(!!dplyr::quo_name(perc_by_submappings) := round(n/sum(n)*100, 2)) %>%
dplyr::ungroup() %>%
dplyr::arrange(dplyr::desc(n))
if (incl_submappings == FALSE) {
out <- out %>%
dplyr::select(Lexical_units = .data$lu,
Gloss = .data$lu_gloss,
n = .data$n,
.data$perc_overall,
.data$perc_by_submappings) %>%
dplyr::mutate(Lexical_units = stringr::str_c("*", .data$Lexical_units, "*", sep = "")) %>%
dplyr::rename(Perc_overall = perc_overall,
Perc_by_submappings = perc_by_submappings)
} else {
out <- out %>%
dplyr::select(Submappings = .data$submappings,
Lexical_units = .data$lu,
Gloss = .data$lu_gloss,
dplyr::everything()) %>%
dplyr::mutate(Lexical_units = stringr::str_c("*", .data$Lexical_units, "*", sep = ""),
Submappings = stringr::str_c('<span style="font-variant:small-caps;">', .data$Submappings, '</span>', sep = '')) %>%
dplyr::rename(Perc_overall = perc_overall,
Perc_by_submappings = perc_by_submappings)
}
if (top_n_only==TRUE) {
out <- out %>%
dplyr::top_n(top_n_limit, n) %>%
dplyr::rename(N = n)
return(out)
} else {
out <- out %>%
dplyr::rename(N = n)
return(out)
}
}
}