-
Notifications
You must be signed in to change notification settings - Fork 0
/
CTVsuggest.R
70 lines (55 loc) · 3.21 KB
/
CTVsuggest.R
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
#' Output CRAN Task View suggestions
#'
#' The `CTVsuggest()` function takes a Task View name and argument `n`,
#' then outputs a [data.frame] containing the top `n` recommendations for the chosen Task View.
#'
#' The predicted probabilities are computed from the model object constructed with the [`CTVsuggestTrain::Train_model()`](https://dylandijk.github.io/CTVsuggestTrain/reference/Train_model.html) function.
#'
#' @param taskview A character vector with one element, must be one of the [Task Views available](https://github.com/cran-task-views/ctv#available-task-views)
#' @param n An integer that decides the number of suggestions to show.
#' @param ignore A character vector of package names that you want to ignore from output suggestions.
#' @param package A string, that is a package name that is on CRAN.
#' @param ranktaskview A character vector with one element, must be one of the [Task Views available](https://github.com/cran-task-views/ctv#available-task-views)
#'
#' @return A [data.frame] with suggested packages and there classification probability.
#'
#' @export
#'
#'
#' @examples
#' # Output top 5 suggestions for the Econometrics Task View,
#' # whilst hiding the GVARX package from suggestions.
#' CTVsuggest(taskview = "Econometrics", n = 5, ignore = "GVARX")
#'
#' # Output predicted probabilities for the task view assignment of the doc2vec package
#' CTVsuggest(package = "doc2vec")
CTVsuggest = function(taskview = "Econometrics", n = 5, ignore = NULL, package = NA, ranktaskview = NA){
if(!is.na(package)){
# Outputting probability vector for a package
load(url("https://github.com/DylanDijk/CTVsuggestTrain/blob/main/OUTPUT/predicted_probs_all.rda?raw=true"))
package_prob = as.matrix(predicted_probs_all[package,-which(colnames(predicted_probs_all) == "Packages"), drop = F])
package_prob = round(package_prob,4)
package_prob = package_prob[,order(package_prob)]
return(package_prob)
} else if (!is.na(ranktaskview)) {
# ranking packages within a Task View
load(url("https://github.com/DylanDijk/CTVsuggestTrain/blob/main/OUTPUT/predicted_probs_all.rda?raw=true"))
# need to load in packages from that Task View
tvdb = CTVsuggest:::download_taskview_data()
task_view_packages = Reduce(c,RWsearch::tvdb_pkgs(char = ranktaskview, tvdb = tvdb))
task_view_packages = unique(task_view_packages)
pckgs_ranked = predicted_probs_all[task_view_packages,ranktaskview, drop = F]
pckgs_ranked = pckgs_ranked[order(pckgs_ranked[,paste0(ranktaskview)], decreasing = T),,drop = F]
return(pckgs_ranked)
} else {
# Outputting packages with highest probabilities for a Task View
load(url("https://github.com/DylanDijk/CTVsuggestTrain/blob/main/OUTPUT/predicted_probs_for_suggestions.rda?raw=true"))
suggestions = predicted_probs_for_suggestions[,c(paste0(taskview), "Packages"), drop = F][order(predicted_probs_for_suggestions[,paste0(taskview)], decreasing = T),, drop = F][1:(n+length(ignore)),]
if(!is.null(ignore)){
suggestions = suggestions[!(rownames(suggestions) %in% ignore),]
}
suggestions = suggestions[1:n,]
return(suggestions)
}
}
# load(url("https://github.com/DylanDijk/CTVsuggestTrain/blob/main/OUTPUT/model_accuracy.rda?raw=true"))