/
ds.completeCases.R
190 lines (178 loc) · 9.19 KB
/
ds.completeCases.R
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
98
99
100
101
102
103
104
105
106
107
108
109
110
111
112
113
114
115
116
117
118
119
120
121
122
123
124
125
126
127
128
129
130
131
132
133
134
135
136
137
138
139
140
141
142
143
144
145
146
147
148
149
150
151
152
153
154
155
156
157
158
159
160
161
162
163
164
165
166
167
168
169
170
171
172
173
174
175
176
177
178
179
180
181
182
183
184
185
186
187
188
#'
#' @title Identifies complete cases in server-side R objects
#' @description Selects complete cases of a data frame,
#' matrix or vector that contain missing values.
#' @details In the case of a data frame or matrix, \code{ds.completeCases} deletes
#' all rows containing one or more missing values. However \code{ds.completeCases}
#' in vectors only deletes the observation recorded as NA.
#'
#' Server function called: \code{completeCasesDS}
#'
#' @param x1 a character denoting the name of the input object which can be a data frame,
#' matrix or vector.
#' @param newobj a character string that provides the name for the complete-cases object
#' that is stored on the data servers. If the user does not specify a name, then the function
#' generates a name for the generated object that is the name of the input object with the
#' suffix "_complete.cases"
#' @param datasources a list of \code{\link{DSConnection-class}} objects obtained after login.
#' If the \code{datasources} argument is not specified, the default set of connections will be
#' used: see \code{\link{datashield.connections_default}}.
#' @return \code{ds.completeCases} generates a modified data frame, matrix or vector from which
#' all rows containing at least one NA have been deleted. The output object is stored on the
#' server-side. Only two validity messages are returned to the client-side indicating the name
#' of the \code{newobj} that has been created in each data source and if it is in a valid form.
#' @examples
#' \dontrun{
#' ## Version 6, for version 5 see the Wiki
#' # Connecting to the Opal servers
#'
#' require('DSI')
#' require('DSOpal')
#' require('dsBaseClient')
#'
#' builder <- DSI::newDSLoginBuilder()
#' builder$append(server = "study1",
#' url = "http://192.168.56.100:8080/",
#' user = "administrator", password = "datashield_test&",
#' table = "CNSIM.CNSIM1", driver = "OpalDriver")
#' builder$append(server = "study2",
#' url = "http://192.168.56.100:8080/",
#' user = "administrator", password = "datashield_test&",
#' table = "CNSIM.CNSIM2", driver = "OpalDriver")
#' builder$append(server = "study3",
#' url = "http://192.168.56.100:8080/",
#' user = "administrator", password = "datashield_test&",
#' table = "CNSIM.CNSIM3", driver = "OpalDriver")
#' logindata <- builder$build()
#'
#' # Log onto the remote Opal training servers
#' connections <- DSI::datashield.login(logins = logindata, assign = TRUE, symbol = "D")
#'
#' # Select complete cases from different R objects
#'
#' ds.completeCases(x1 = "D", #data frames in the Opal servers
#' #(see above the connection to the Opal servers)
#' newobj = "D.completeCases", # name for the output object
#' # that is stored in the Opal servers
#' datasources = connections) # All Opal servers are used
#' # (see above the connection to the Opal servers)
#'
#' ds.completeCases(x1 = "D$LAB_TSC", #vector (variable) of the data frames in the Opal servers
#' #(see above the connection to the Opal servers)
#' newobj = "LAB_TSC.completeCases", #name for the output variable
#' #that is stored in the Opal servers
#' datasources = connections[2]) #only the second Opal server is used ("study2")
#'
#' # Clear the Datashield R sessions and logout
#' datashield.logout(connections)
#' }
#'
#' @author DataSHIELD Development Team
#' @export
#'
ds.completeCases <- function(x1=NULL, newobj=NULL, datasources=NULL){
# if no connection login details are provided look for 'connection' objects in the environment
if(is.null(datasources)){
datasources <- datashield.connections_find()
}
# ensure datasources is a list of DSConnection-class
if(!(is.list(datasources) && all(unlist(lapply(datasources, function(d) {methods::is(d,"DSConnection")}))))){
stop("The 'datasources' were expected to be a list of DSConnection-class objects", call.=FALSE)
}
# check if a value has been provided for x1
if(is.null(x1)){
return("Error: x1 must be a character string naming a serverside data.frame, matrix or vector")
}
# check if the input object is defined in all the studies
isDefined(datasources, x1)
# rename target object for transfer (not strictly necessary as string will pass parser anyway)
# but maintains consistency with other functions
x1.transmit <- x1
# if no value specified for output object, then specify a default
if(is.null(newobj)){
newobj <- paste0(x1,"_complete.cases")
}
# CALL THE MAIN SERVER SIDE FUNCTION
calltext <- call("completeCasesDS", x1.transmit)
DSI::datashield.assign(datasources, newobj, calltext)
#############################################################################################################
#DataSHIELD CLIENTSIDE MODULE: CHECK KEY DATA OBJECTS SUCCESSFULLY CREATED #
#
#SET APPROPRIATE PARAMETERS FOR THIS PARTICULAR FUNCTION #
test.obj.name<-newobj #
#
#TRACER #
#return(test.obj.name) #
#} #
#
#
# CALL SEVERSIDE FUNCTION #
calltext <- call("testObjExistsDS", test.obj.name) #
#
object.info<-DSI::datashield.aggregate(datasources, calltext) #
#
# CHECK IN EACH SOURCE WHETHER OBJECT NAME EXISTS #
# AND WHETHER OBJECT PHYSICALLY EXISTS WITH A NON-NULL CLASS #
num.datasources<-length(object.info) #
#
#
obj.name.exists.in.all.sources<-TRUE #
obj.non.null.in.all.sources<-TRUE #
#
for(j in 1:num.datasources){ #
if(!object.info[[j]]$test.obj.exists){ #
obj.name.exists.in.all.sources<-FALSE #
} #
if(is.null(object.info[[j]]$test.obj.class) || ("ABSENT" %in% object.info[[j]]$test.obj.class)){ #
obj.non.null.in.all.sources<-FALSE #
} #
} #
#
if(obj.name.exists.in.all.sources && obj.non.null.in.all.sources){ #
#
return.message<- #
paste0("A data object <", test.obj.name, "> has been created in all specified data sources") #
#
#
}else{ #
#
return.message.1<- #
paste0("Error: A valid data object <", test.obj.name, "> does NOT exist in ALL specified data sources") #
#
return.message.2<- #
paste0("It is either ABSENT and/or has no valid content/class,see return.info above") #
#
return.message.3<- #
paste0("Please use ds.ls() to identify where missing") #
#
#
return.message<-list(return.message.1,return.message.2,return.message.3) #
#
} #
#
calltext <- call("messageDS", test.obj.name) #
studyside.message<-DSI::datashield.aggregate(datasources, calltext) #
#
no.errors<-TRUE #
for(nd in 1:num.datasources){ #
if(studyside.message[[nd]]!="ALL OK: there are no studysideMessage(s) on this datasource"){ #
no.errors<-FALSE #
} #
} #
#
#
if(no.errors){ #
validity.check<-paste0("<",test.obj.name, "> appears valid in all sources") #
return(list(is.object.created=return.message,validity.check=validity.check)) #
} #
#
if(!no.errors){ #
validity.check<-paste0("<",test.obj.name,"> invalid in at least one source. See studyside.messages:") #
return(list(is.object.created=return.message,validity.check=validity.check, #
studyside.messages=studyside.message)) #
} #
#
#END OF CHECK OBJECT CREATED CORECTLY MODULE #
#############################################################################################################
}
#ds.completeCases