Skip to content

Commit

Permalink
version 0.2.0
Browse files Browse the repository at this point in the history
  • Loading branch information
Qiang Liu authored and cran-robot committed Mar 22, 2024
1 parent 8668e82 commit 69b497e
Show file tree
Hide file tree
Showing 16 changed files with 653 additions and 15 deletions.
8 changes: 4 additions & 4 deletions DESCRIPTION
Original file line number Diff line number Diff line change
@@ -1,19 +1,19 @@
Package: ggscidca
Type: Package
Title: Plotting Decision Curve Analysis with Coloured Bars
Version: 0.1.9
Version: 0.2.0
Authors@R: person("Qiang", "Liu", email = "dege857@163.com",
role = c("aut", "cre"))
Maintainer: Qiang Liu <dege857@163.com>
Description: Decision curve analysis is a method for evaluating and comparing prediction models that incorporates clinical consequences, requires only the data set on which the models are tested, and can be applied to models that have either continuous or dichotomous results. The 'ggscidca' package adds coloured bars of discriminant relevance to the traditional decision curve. Improved practicality and aesthetics. This method was described by Balachandran VP (2015) <doi:10.1016/S1470-2045(14)71116-7>.
License: GPL-3
Encoding: UTF-8
LazyData: true
Imports: ggplot2, randomForest, reshape2, survival
Imports: cmprsk, ggplot2, randomForest, reshape2, survival
Depends: R (>= 2.10)
RoxygenNote: 7.2.1
NeedsCompilation: no
Packaged: 2024-03-14 01:19:21 UTC; Administrator
Packaged: 2024-03-21 01:59:11 UTC; Administrator
Author: Qiang Liu [aut, cre]
Repository: CRAN
Date/Publication: 2024-03-14 03:10:02 UTC
Date/Publication: 2024-03-21 03:40:07 UTC
21 changes: 15 additions & 6 deletions MD5
Original file line number Diff line number Diff line change
@@ -1,22 +1,31 @@
e8c8e643e5f942db5d6a1b4d43cbdeed *DESCRIPTION
0ae91dabaeebab62c6cacfbebc4e35c5 *NAMESPACE
4bc52e9bdf13de3862ea75c70cf2b072 *DESCRIPTION
d88d23b1e4e69a559700b75361bcca23 *NAMESPACE
d77e1d853cb533696b2f0bf6f27a671f *R/Breastcancer.R
be594cc1baf788f2b97e62805d45587e *R/LIRI.R
96259fc01e5cd2a160e23cc7f78447b0 *R/cmprskstdca.R
216e0a9c482690414e928153b1cfce54 *R/dca.R
64d253fd767cb121c25c62beeb8d0501 *R/getplot.R
69d3ce40350fe7fdefcaa60b6398979e *R/modeldata.R
4894bfe873f8792f797e3da57c81237b *R/df_surv.R
ff36222a8a48f35cc2caa0995a13aef0 *R/getplot.R
c2bdde46902a031e12f189f547cc80ea *R/modeldata.R
f4e22d1188cae05460354b1f9efe3dae *R/newcrr.R
9cbc6ae9632447b4dd5587c1c1d69725 *R/randomForest.R
757ce5f883b1a71c4d566b0cd5360291 *R/scidca.R
730c2e1837333916df5cf12280406d4c *R/scidca.R
e2f97717caa81608958814694882445a *R/scidca.coxph.R
8f26c83de76cb7abd1fcd47c39779895 *R/scidca.glm.R
385f7f2656c07857e706d13911b3330b *R/scidcacrr.R
3a958c01db3ecaf4179a316c60ee0034 *R/stdca.R
c4af709827f34a6df89eed06c075abf6 *data/Breastcancer.rda
6e8f5f20eeaab8924e4ef80d4feca852 *data/LIRI.rda
d89019686b5f23d8738274b99a263abc *data/df_surv.rda
0b2b5d5b3ab9826c04de07e7c6ea316d *man/Breastcancer.Rd
8f905fd0b5c3968c6b9a74371d2c3f0f *man/LIRI.Rd
cdc57f4900129d168d8b9c091505c3d4 *man/cmprskstdca.Rd
e48b987fad6882a9d1869de7b0c5c435 *man/dca.Rd
25bf1e452a16854e46c4584ba06cc90a *man/scidca.Rd
ce805d5c53d0e298a9a190d290010fae *man/df_surv.Rd
eb982f5ea321b43364537d07877813bb *man/newcrr.Rd
99bfe234e8f04e16becb416a0bb913d4 *man/scidca.Rd
ec90f280a5427b019433f1cc43d187a2 *man/scidca.coxph.Rd
aeab0bcae7320f3504e009eb5c2115d8 *man/scidca.crr.Rd
008e5ed3e094aca79b762ba4f96e210e *man/scidca.glm.Rd
94db8e9ed54a5054bb8e66c055b0e864 *man/scidca.randomForest.Rd
727365562c13586856edffa3c24309ed *man/stdca.Rd
4 changes: 4 additions & 0 deletions NAMESPACE
Original file line number Diff line number Diff line change
@@ -1,14 +1,18 @@
# Generated by roxygen2: do not edit by hand

S3method(scidca,coxph)
S3method(scidca,crr)
S3method(scidca,glm)
S3method(scidca,randomForest)
export(cmprskstdca)
export(dca)
export(newcrr)
export(scidca)
export(stdca)
import("ggplot2")
import("reshape2")
import("survival")
importFrom("cmprsk","cuminc")
importFrom("randomForest","randomForest")
importFrom("stats","median")
importFrom("stats","predict")
272 changes: 272 additions & 0 deletions R/cmprskstdca.R
Original file line number Diff line number Diff line change
@@ -0,0 +1,272 @@
#'@title cmprskstdca
#'@name cmprskstdca
#'@description Generate data for plotting survival analysis decision curves.
#'@details This function was created and written by Dr Andrew Vickers to generate decision curve data.
#'@param data a data frame containing the variables in the model.
#'@param outcome the outcome, response variable. Must be a variable contained within the data frame specified in data=.
#'@param predictors the predictor variable(s). Must be a variable(s) contained within the data frame specified in data=.
#'@param probability specifies whether or not each of the independent variables are probabilities. The default is TRUE.
#'@param xstart starting value for x-axis (threshold probability) between 0 and 1. The default is 0.01.
#'@param xstop stopping value for x-axis (threshold probability) between 0 and 1. The default is 0.99.
#'@param xby increment for threshold probability. The default is 0.01.
#'@param ymin minimum bound for graph.
#'@param harm specifies the harm(s) associated with the independent variable(s). The default is none.
#'@param graph specifies whether or not to display graph of net benefits. The default is TRUE.
#'@param intervention plot net reduction in interventions
#'@param interventionper number of net reduction in interventions per interger. The default is 100
#'@param loess.span specifies the degree of smoothing. The default is 0.10.
#'@param timepoint specifies the time point at which the decision curve analysis is performed.
#'@param cmprsk if evaluating outcome in presence of a competing risk. The default is FALSE
#'@param smooth specifies whether or not to smooth net benefit curve. The default is FALSE.
#'@param ttoutcome Enter the time variable in your data.
#'@export
#'
#'@return Returns a data for plotting a decision curve.
#'
#'
#'

utils::globalVariables(c('complete.cases',
'coxph',
'survfit',
'cuminc',
'timepoints',
'loess',
'lines',
'legend'
))




cmprskstdca <- function(data, outcome, ttoutcome, timepoint, predictors, xstart=0.01, xstop=0.99, xby=0.01,
ymin=-0.05, probability=NULL, harm=NULL,graph=TRUE, intervention=FALSE,
interventionper=100, smooth=FALSE,loess.span=0.10,cmprsk=FALSE) {

#ONLY KEEPING COMPLETE CASES
data=data[complete.cases(data[c(outcome,ttoutcome,predictors)]),c(outcome,ttoutcome,predictors)]

# outcome MUST BE CODED AS 0 AND 1
if ((length(data[!(data[outcome]==0 | data[outcome]==1),outcome])>0) & cmprsk==FALSE) {
stop("outcome must be coded as 0 and 1")
}

# xstart IS BETWEEN 0 AND 1
if (xstart<0 | xstart>1) {
stop("xstart must lie between 0 and 1")
}

# xstop IS BETWEEN 0 AND 1
if (xstop<0 | xstop>1) {
stop("xstop must lie between 0 and 1")
}

# xby IS BETWEEN 0 AND 1
if (xby<=0 | xby>=1) {
stop("xby must lie between 0 and 1")
}

# xstart IS BEFORE xstop
if (xstart>=xstop) {
stop("xstop must be larger than xstart")
}

#STORING THE NUMBER OF PREDICTORS SPECIFIED
pred.n=length(predictors)

#IF probability SPECIFIED ENSURING THAT EACH PREDICTOR IS INDICATED AS A T OR F
if (length(probability)>0 & pred.n!=length(probability)) {
stop("Number of probabilities specified must be the same as the number of predictors being checked.")
}


#IF harm SPECIFIED ENSURING THAT EACH PREDICTOR HAS A SPECIFIED HARM
if (length(harm)>0 & pred.n!=length(harm)) {
stop("Number of harms specified must be the same as the number of predictors being checked.")
}

#INITIALIZING DEFAULT VALUES FOR PROBABILITES AND HARMS IF NOT SPECIFIED
if (length(harm)==0) {
harm=rep(0,pred.n)
}
if (length(probability)==0) {
probability=rep(TRUE,pred.n)
}

# THE PREDICTOR NAMES CANNOT BE EQUAL TO all OR none.
if (length(predictors[predictors=="all" | predictors=="none"])) {
stop("Prediction names cannot be equal to all or none.")
}

#CHECKING THAT EACH probability ELEMENT IS EQUAL TO T OR F,
#AND CHECKING THAT PROBABILITIES ARE BETWEEN 0 and 1
#IF NOT A PROB THEN CONVERTING WITH A COX REGRESSION
for(m in 1:pred.n) {
if (probability[m]!=TRUE & probability[m]!=FALSE) {
stop("Each element of probability vector must be TRUE or FALSE")
}
if (probability[m]==TRUE & (max(data[predictors[m]])>1 | min(data[predictors[m]])<0)) {
stop(paste(predictors[m],"must be between 0 and 1 OR sepcified as a non-probability in the probability option",sep=" "))
}
if(probability[m]==FALSE) {
model=NULL
pred=NULL
model=coxph(Surv(data.matrix(data[ttoutcome]),data.matrix(data[outcome]==1)) ~ data.matrix(data[predictors[m]]))
pred=data.frame(1-c(summary(survfit(model, newdata=data), time=timepoint)$surv))
names(pred)=predictors[m]
data=cbind(data[names(data)!=predictors[m]],pred)
#print(paste(predictors[m],"converted to a probability with Cox regression. Due to linearity and proportional hazards assumption, miscalibration may occur.",sep=" "))
}
}

######### CALCULATING NET BENEFIT #########
N=dim(data)[1]

# getting the probability of the event for all subjects
# this is used for the net benefit associated with treating all patients
if(cmprsk==FALSE) {
km.cuminc=survfit(Surv(data.matrix(data[ttoutcome]),data.matrix(data[outcome]))~1)
pd=1 - summary(km.cuminc, times=timepoint)$surv
} else {
cr.cuminc=cuminc(data[[ttoutcome]],data[[outcome]])
pd=timepoints(cr.cuminc, times=timepoint)$est[1]
}

#creating dataset that is one line per threshold for the treat all and treat none strategies;
# CREATING DATAFRAME THAT IS ONE LINE PER THRESHOLD PER all AND none STRATEGY
nb=data.frame(seq(from=xstart, to=xstop, by=xby))
names(nb)="threshold"
interv=nb
error=NULL

nb["all"]=pd - (1-pd)*nb$threshold/(1-nb$threshold)
nb["none"]=0

# CYCLING THROUGH EACH PREDICTOR AND CALCULATING NET BENEFIT
for(m in 1:pred.n){
nb[predictors[m]]=NA

for(t in 1:length(nb$threshold)){
#calculating number of true and false postives;
px=sum(data[predictors[m]]>nb$threshold[t])/N

if (px==0){
error=rbind(error,paste(predictors[m],": No observations with risk greater than ",nb$threshold[t]*100,"%",sep=""))
break
} else {
#calculate risk using Kaplan Meier
if(cmprsk==FALSE) {
km.cuminc=survfit(Surv(data.matrix(data[data[predictors[m]]>nb$threshold[t],ttoutcome]),data.matrix(data[data[predictors[m]]>nb$threshold[t],outcome]))~1)
pdgivenx=(1 - summary(km.cuminc, times=timepoint,extend = TRUE)$surv)
if(length(pdgivenx)==0){
error=rbind(error,paste(predictors[m],": No observations with risk greater than ",nb$threshold[t]*100,"% that have followup through the timepoint selected",sep=""))
break
}
#calculate risk using competing risk
} else {
cr.cuminc=cuminc(data[[ttoutcome]][data[[predictors[m]]]>nb$threshold[t]],data[[outcome]][data[[predictors[m]]]>nb$threshold[t]])
pdgivenx=timepoints(cr.cuminc, times=timepoint)$est[1]
if(is.na(pdgivenx)){
error=rbind(error,paste(predictors[m],": No observations with risk greater than ",nb$threshold[t]*100,"% that have followup through the timepoint selected",sep=""))
break
}
}
#calculating NB based on calculated risk
nb[t,predictors[m]]=pdgivenx*px - (1-pdgivenx)*px*nb$threshold[t]/(1-nb$threshold[t]) - harm[m]

}
}
interv[predictors[m]]=(nb[predictors[m]] - nb["all"])*interventionper/(interv$threshold/(1-interv$threshold))
}
if(length(error)>0){
#print(paste(error,", and therefore net benefit not calculable in this range.",sep=""))
}

# CYCLING THROUGH EACH PREDICTOR AND SMOOTH NET BENEFIT AND INTERVENTIONS AVOIDED
for(m in 1:pred.n) {
if (smooth==TRUE){
lws=loess(data.matrix(nb[!is.na(nb[[predictors[m]]]),predictors[m]]) ~ data.matrix(nb[!is.na(nb[[predictors[m]]]),"threshold"]),span=loess.span)
nb[!is.na(nb[[predictors[m]]]),paste(predictors[m],"_sm",sep="")]=lws$fitted

lws=loess(data.matrix(interv[!is.na(nb[[predictors[m]]]),predictors[m]]) ~ data.matrix(interv[!is.na(nb[[predictors[m]]]),"threshold"]),span=loess.span)
interv[!is.na(nb[[predictors[m]]]),paste(predictors[m],"_sm",sep="")]=lws$fitted
}
}


# PLOTTING GRAPH IF REQUESTED
if (graph==TRUE) {
# PLOTTING INTERVENTIONS AVOIDED IF REQUESTED
if(intervention==TRUE) {
# initialize the legend label, color, and width using the standard specs of the none and all lines
legendlabel <- NULL
legendcolor <- NULL
legendwidth <- NULL
legendpattern <- NULL

#getting maximum number of avoided interventions
ymax=max(interv[predictors],na.rm = TRUE)

#INITIALIZING EMPTY PLOT WITH LABELS
plot(x=nb$threshold, y=nb$all, type="n" ,xlim=c(xstart, xstop), ylim=c(ymin, ymax), xlab="Threshold probability", ylab=paste("Net reduction in interventions per",interventionper,"patients"))

#PLOTTING INTERVENTIONS AVOIDED FOR EACH PREDICTOR
for(m in 1:pred.n) {
if (smooth==TRUE){
lines(interv$threshold,data.matrix(interv[paste(predictors[m],"_sm",sep="")]),col=m,lty=2)
} else {
lines(interv$threshold,data.matrix(interv[predictors[m]]),col=m,lty=2)
}

# adding each model to the legend
legendlabel <- c(legendlabel, predictors[m])
legendcolor <- c(legendcolor, m)
legendwidth <- c(legendwidth, 1)
legendpattern <- c(legendpattern, 2)
}

} else {
# PLOTTING NET BENEFIT IF REQUESTED
# initialize the legend label, color, and width using the standard specs of the none and all lines
legendlabel <- c("None", "All")
legendcolor <- c(17, 8)
legendwidth <- c(2, 2)
legendpattern <- c(1, 1)

#getting maximum net benefit
ymax=max(nb[names(nb)!="threshold"],na.rm = TRUE)

# inializing new benfit plot with treat all option
plot(x=nb$threshold, y=nb$all, type="l", col=8, lwd=2 ,xlim=c(xstart, xstop), ylim=c(ymin, ymax), xlab="Threshold probability", ylab="Net benefit")
# adding treat none option
lines(x=nb$threshold, y=nb$none,lwd=2)
#PLOTTING net benefit FOR EACH PREDICTOR
for(m in 1:pred.n) {
if (smooth==TRUE){
lines(nb$threshold,data.matrix(nb[paste(predictors[m],"_sm",sep="")]),col=m,lty=2)
} else {
lines(nb$threshold,data.matrix(nb[predictors[m]]),col=m,lty=2)
}
# adding each model to the legend
legendlabel <- c(legendlabel, predictors[m])
legendcolor <- c(legendcolor, m)
legendwidth <- c(legendwidth, 1)
legendpattern <- c(legendpattern, 2)
}
}
# then add the legend
legend("topright", legendlabel, cex=0.8, col=legendcolor, lwd=legendwidth, lty=legendpattern)

}

#RETURNING RESULTS
results=list()
results$N=N
results$predictors=data.frame(cbind(predictors,harm,probability))
names(results$predictors)=c("predictor","harm.applied","probability")
results$interventions.avoided.per=interventionper
results$net.benefit=nb
results$interventions.avoided=interv
return(results)
}

15 changes: 15 additions & 0 deletions R/df_surv.R
Original file line number Diff line number Diff line change
@@ -0,0 +1,15 @@
#' @title A data for competitive risk modelling.
#'
#' @description A data for competitive risk modelling.
#'
#' @name df_surv
#'
#' @docType data
#'
#' @usage data(df_surv)
#'
#' @keywords df_surv
#' @examples
#' data(df_surv)
#'
'df_surv'
2 changes: 1 addition & 1 deletion R/getplot.R
Original file line number Diff line number Diff line change
Expand Up @@ -174,8 +174,8 @@ getplot<-function(data,pyh=NULL,relcol="#c01e35",irrelcol="#0151a2",relabel="Nom
p3<-p3+geom_label(data=point_df,
aes(x=x,y=y,label=label),nudge_x = nudge_x, nudge_y = nudge_y,size=po.text.size,
fill = po.text.fill,col=po.text.col)
p<-p3
}
p<-p3
}
p
}
Expand Down

0 comments on commit 69b497e

Please sign in to comment.