/
utils.R
336 lines (261 loc) · 7.34 KB
/
utils.R
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
98
99
100
101
102
103
104
105
106
107
108
109
110
111
112
113
114
115
116
117
118
119
120
121
122
123
124
125
126
127
128
129
130
131
132
133
134
135
136
137
138
139
140
141
142
143
144
145
146
147
148
149
150
151
152
153
154
155
156
157
158
159
160
161
162
163
164
165
166
167
168
169
170
171
172
173
174
175
176
177
178
179
180
181
182
183
184
185
186
187
188
189
190
191
192
193
194
195
196
197
198
199
200
201
202
203
204
205
206
207
208
209
210
211
212
213
214
215
216
217
218
219
220
221
222
223
224
225
226
227
228
229
230
231
232
233
234
235
236
237
238
239
240
241
242
243
244
245
246
247
248
249
250
251
252
253
254
255
256
257
258
259
260
261
262
263
264
265
266
267
268
269
270
271
272
273
274
275
276
277
278
279
280
281
282
283
284
285
286
287
288
289
290
291
292
293
294
295
296
297
298
299
300
301
302
303
304
305
306
307
308
309
310
311
312
313
314
315
316
317
318
319
320
321
322
323
324
325
326
327
328
329
330
331
332
333
334
335
336
# GENERAL UTILITY FUNCTIONS
# NONE OF THESE FUNCTIONS ARE EXPORTED
# Not in operator
#
# For convenience, rather than always `!(x, %in% y)`
#
# @param x A scalar or vector
# @param y A scalar or vector
#
# @return TRUE if x is not in y, FALSE otherwise
'%nin%' <- function(x,y){
!('%in%'(x,y))
}
# rbind two lists with different names into a data frame
#
# Performs an `rbind()` operation on two named lists or vectors that do not need to share the same names, but
# will match the names and fill any missing cols with `NA`s.
#
# @param x1 A named list or named vector
# @param x2 Another named list or named vector
#
# @examples
# #
#
# @return Data frame
rbind_fill <- function(x1, x2){
if(is.null(names(x1)) || is.null(names(x2))){
stop("names of x1 or x2 is NULL")
}
# make to dfs
x1 <- as.data.frame(as.list(x1))
x2 <- as.data.frame(as.list(x2))
# fill with NAs
x1[setdiff(names(x2), names(x1))] <- NA
x2[setdiff(names(x1), names(x2))] <- NA
rbind(x1, x2)
}
# Remove empty components from list
#
# Short cut for removing any empty components of a list
#
# @param l A list
#
# @examples
# #
#
# @return List with empty bits removed
tidy_list <- function(l){
l[lengths(l) > 0]
}
# Check availability of function
#
# Checks if a function is available, and returns an error if not.
#
# @param f_name A string to use to check whether a function exists with that name.
#
# @return Nothing or error
check_fname <- function(f_name){
if(!(exists(f_name, mode = "function"))){
stop("function '", f_name, "' not found. must be an accessible function.")
}
}
# Set default arg
#
# A shortcut
#
# @param x The argument
# @param x_default The default to set
#
# @return the parameter
set_default <- function(x, x_default){
if(is.null(x)){
x_default
} else {
x
}
}
# Data frame or matrix to long form
#
# This is a substitute function for tidyr's 'pivot_longer' to avoid dependencies, and behaves in more or
# less the same way.
#
# If `cols` is not specified, assumes a square correlation matrix to convert to long form. If `cols` is
# specified, this behaves like pivot_longer's "cols" argument.
#
# @param X A data frame or square correlation matrix
# @param cols Columns to pivot into longer format.
#
# @importFrom utils stack
#
# @return A long format data frame
lengthen <- function(X, cols = NULL){
# make df
X <- as.data.frame(X)
if(!is.null(cols)){
stopifnot(all(cols %in% names(X)))
X_ <- X[cols]
X <- X[names(X) %nin% cols]
X$V_to_pivot <- rownames(X)
} else {
X_ <- X
}
# stack and add names
X1 <- cbind(utils::stack(X_), rownames(X_))
names(X1) <- c("Value", "V2", "V1")
X1$V2 <- as.character(X1$V2)
X1 <- rev(X1)
if(!is.null(cols)){
X1 <- merge(X, X1, by.x = "V_to_pivot", by.y = "V1", all = TRUE)
X1 <- X1[names(X1) != "V_to_pivot"]
names(X1)[names(X1) == "V2"] <- "name"
}
X1
}
# Make long df wide
#
# This is a quick function for making a long-format data frame wide. It is limited in scope, assumes
# that the input is a data frame with three columns: one of which is numeric, and the other two are
# character vectors. The numeric column will be widened, and the other two columns will be used
# for row and column names.
#
# @param X a long format data frame
#
# @importFrom utils unstack
#
# @return A wide format data frame
widen <- function(X){
stopifnot(ncol(X) == 3)
# make df
X <- as.data.frame(X)
# find numeric col
num_cols <- sapply(X, is.numeric)
if(sum(num_cols) > 1){
stop("More than one numeric column found")
}
if(sum(num_cols) == 0){
stop("No numeric columns found.")
}
# rearrange to get numeric col first
X <- X[c(which(num_cols), which(!num_cols))]
# order
X <- X[order(X[[3]], X[[2]]),]
# unstack and add row names
Xw <- utils::unstack(X[1:2])
row.names(Xw) <- unique(X[[3]])
Xw
}
#' Convert iCodes to iNames
#'
#' @param coin A coin
#' @param iCodes A vector of iCodes
#'
#' @return Vector of iNames
#' @export
icodes_to_inames <- function(coin, iCodes){
stopifnot(is.coin(coin))
iMeta <- coin$Meta$Ind
stopifnot(all(iCodes %in% iMeta$iCode))
iMeta$iName[match(iCodes, iMeta$iCode)]
}
#' Convert uCodes to uNames
#'
#' @param coin A coin
#' @param uCodes A vector of uCodes
#'
#' @return Vector of uNames
#' @export
ucodes_to_unames <- function(coin, uCodes){
stopifnot(is.coin(coin))
uMeta <- coin$Meta$Unit
stopifnot(all(uCodes %in% uMeta$uCode))
uMeta$uName[match(uCodes, uMeta$uCode)]
}
# Splits data frame into numeric and non-numeric columns
#
# @param x A data frame with numeric and non-numeric columns.
#
# @return A list with `.$not_numeric` containing a data frame with non-numeric columns, and `.$numeric` being
# a data frame containing only numeric columns.
#
# @examples
# #
split_by_numeric <- function(x){
stopifnot(is.data.frame(x))
# numeric cols
numeric_cols <- sapply(x, is.numeric)
if(sum(numeric_cols) == 0){
stop("No numeric cols found in the data frame.")
}
list(not_numeric = x[!numeric_cols],
numeric = x[numeric_cols])
}
# this function adjusts an iData dataset by directions, this is for use e.g.
# in correlation plotting.
# Just works with in-coin directions at the moment.
# iData can have non-numeric columns like uCode, uName etc, but any numeric
# cols will be required to have a corresponding direction entry in iMeta.
directionalise <- function(iData, coin){
imeta <- coin$Meta$Ind[coin$Meta$Ind$Type == "Indicator", ]
df_out <- lapply(names(iData), function(iCode){
x <- iData[[iCode]]
if(is.numeric(x)){
if(iCode %nin% imeta$iCode){
stop("Name of numeric column in iData does not have an entry in iMeta found in coin. Column: ", iCode)
}
iData[iCode]*imeta$Direction[imeta$iCode == iCode]
} else {
x
}
})
df_out <- as.data.frame(df_out)
names(df_out) <- names(iData)
#stopifnot(identical(names(df_out), names(iData)))
df_out
}
# X is a df
# cols specifies the names of TWO columns in X
# from which to remove duplicate pairs
remove_duplicate_corrs <- function(X, cols){
X1 = X[,cols]
duplicated_rows <- duplicated(t(apply(X1, 1, sort)))
X[!duplicated_rows, ]
}
# convert integer columns to numeric (intended for iData)
df_int_2_numeric <- function(X){
# convert integer cols to numeric (iData)
rnames <- row.names(X)
col_names <- names(X)
X <- lapply(col_names, function(col_name){
x <- X[[col_name]]
if(is.integer(x)){
message("iData column '", col_name, "' converted from integer to numeric.")
as.numeric(x)
} else x
}) |> as.data.frame()
row.names(X) <- rnames
names(X) <- col_names
X
}
# FOR TESTS ---------------------------------------------------------------
# function that imputes using mean, but then adds an NA - used in imputation testing
NA_imputer <- function(x){
NA_location <- which(is.na(x))
x_imp <- i_mean(x)
if(length(NA_location) > 0){
insert_NA_at <- NA_location[1]
x_imp[insert_NA_at] <- NA
}
x_imp
}
# A silly aggregation function used only for unit tests - takes weights and chucks
# them away, then makes up some numbers for the aggregation
# Takes a data frame as input.
silly_aggregate <- function(x, w, start_at = 1){
message("Weights received and thrown away: ", toString(w))
1:nrow(x) + (start_at - 1)
}
# same but with no weights... here just takes first column
silly_aggregate_no_wts <- function(x){
as.numeric(x[,1])
}