# Sensitivity analysis
For research gaps found, we simulate what would have been the mapping of RCTs within regions across diseases if the misclassification of RCTs towards groups of diseases was corrected, given the sensitivities and specificities of the classifier to identify each group of disease.

To estimate the performances of the classifier for each group of diseases, we dispose a test set with 2,763 trials manually classified towards the 27-class grouping of diseases used in this work. The test set is described at Atal et al. BMC Bioinformatics 2016.

The method used is based on the method presented at Fox et al. Int J Epidemiol 2005.

To do so, for each disease for which we found a local research gap we will:

1. Estimate the sensitivity and specificity of the classifier to identify the disease
2. Estimate the sensitivity and specificity of the classifier to identify other studies relevant to the burden of diseases
3. Derive triangular distributions simulating the corresponding sensitivities and specificities
4. Doing N=60k times the following simulation
    * Randomly choose a sens and spec for identifying the disease and identifying another disease (no correlation between both)
    * Derive Positive and Negative Predictive Values (PPV and NPV) for each.
    * Simulate the correction of the classification based on PPVs and NPVs
    * Derive the proportion of RCTs concerning the disease among all RCTs concerning the burden of disease in the region
5. Derive 95% upper-bond simulation interval of the proportion of RCTs concerning the disease among all RCTs concerning the burden of diseases

## 1. Estimating sensitivities and specificities based on test set

In [2]:
test_set <- read.table("/media/igna/Elements/HotelDieu/Cochrane/MetaMapBurden/Paper_classifier/NCT_data_classified_to28cats.txt")
dim(test_set)

In [3]:
#We supress injuries from trials concerning the burden of diseases
test_set$GBDnp <- sapply(strsplit(as.character(test_set$GBDnp),"&&"),function(x){paste(x[x!="28"],collapse="&")})
test_set$GBD28 <- sapply(strsplit(as.character(test_set$GBD28),"&"),function(x){paste(x[x!="28"],collapse="&")})

In [9]:
tst <- strsplit(test_set$GBDnp,"&")
alg <- strsplit(test_set$GBD28,"&")
tst <- lapply(tst,as.numeric)
alg <- lapply(alg,as.numeric)

In [31]:
source('Evaluation_metrics.R')

In [37]:
dis <- 1:27
Mgbd <- read.table("/home/igna/Desktop/Programs GBD/Classifier_Trial_GBD/Databases/Taxonomy_DL/GBD_data/GBD_ICD.txt")

In [57]:
#For each category in 1:27, sens and spec of finding the disease and of finding another disease
set.seed(7212)

dis <- as.character(1:27)

PERF_F  <- data.frame()
for(i in dis){
    ALG <- lapply(alg,function(x){rs <- c()
                                  if(i%in%x) rs <- c(1)
                                  if(sum(setdiff(dis,i)%in%x)!=0) rs <- c(rs,2)
                                  return(rs)
                                      })

    DT <- lapply(tst,function(x){rs <- c()
                                if(i%in%x) rs <- c(1)
                                if(sum(setdiff(dis,i)%in%x)!=0) rs <- c(rs,2)
                                return(rs)
                                    })

    CM <- conf_matrix(ALG,DT,c(1,2))

    PERF <- CM[,1]+CM[,4]

    LR <- apply(CM,1,function(x){m <- matrix(x[c(1,2,4,3)],ncol=2,byrow=TRUE)
                                 metr.ci(m)})

    lr <- unlist(lapply(LR,function(x){
        c(x$sens,x$lower.sens,x$upper.sens,
          paste(c(format(round(100*x$sens,1),nsmall=1),
                  " [",format(round(100*x$lower.sens,1),nsmall=1),
                  "-",format(round(100*x$upper.sens,1),nsmall=1),"]"),
                collapse=""),
          x$spec,x$lower.spec,x$upper.spec,
          paste(c(format(round(100*x$spec,1),nsmall=1),
                  " [",format(round(100*x$lower.spec,1),nsmall=1),"-",
                  format(round(100*x$upper.spec,1),nsmall=1),"]"),
                collapse=""))
    }))
                                    
    lr <- matrix(lr,ncol=16,byrow=TRUE)

    PERF <- cbind(PERF[1],lr)
    PERF <- data.frame(PERF)
    PERF$GBD <- as.character(Mgbd$cause_name)[as.numeric(i)]

    names(PERF) <- c("Nb",
                     "Sens_dis","Sens_dis_low","Sens_dis_up",
                     "Sensitiviy_dis",
                     "Spec_dis","Spec_dis_low","Spec_dis_up",
                     "Specificity_dis",
                     "Sens_oth","Sens_oth_low","Sens_oth_up",
                     "Sensitiviy_oth",
                     "Spec_oth","Spec_oth_low","Spec_oth_up",
                     "Specificity_oth",
                     "GBD")

    PERF_F <- rbind(PERF_F,PERF)
}


In [58]:
PERF_F <- PERF_F[,c(18,1:17)]
PERF_F  <- cbind(dis,PERF_F)

In [59]:
head(PERF_F)

Unnamed: 0,dis,GBD,Nb,Sens_dis,Sens_dis_low,Sens_dis_up,Sensitiviy_dis,Spec_dis,Spec_dis_low,Spec_dis_up,Specificity_dis,Sens_oth,Sens_oth_low,Sens_oth_up,Sensitiviy_oth,Spec_oth,Spec_oth_low,Spec_oth_up,Specificity_oth
1,1,Tuberculosis,16,0.875,0.719423456130283,0.885370783468889,87.5 [71.9-88.5],0.999271933017838,0.997949399364193,0.999200033295566,99.9 [99.8-99.9],0.934554973821989,0.928653123483889,0.939002607858828,93.5 [92.9-93.9],0.56687898089172,0.543597039339523,0.589078822950315,56.7 [54.4-58.9]
11,2,HIV/AIDS,97,0.88659793814433,0.839413345514168,0.904328374182265,88.7 [83.9-90.4],0.997374343585896,0.995603153829637,0.997714255051797,99.7 [99.6-99.8],0.935018050541516,0.929019436341732,0.939511058184784,93.5 [92.9-94.0],0.608775137111517,0.587219422352139,0.628813699509765,60.9 [58.7-62.9]
12,3,"Diarrhea, lower respiratory infections, meningitis, and other common infectious diseases",49,0.816326530612245,0.738785244854365,0.847875318495933,81.6 [73.9-84.8],0.992262343404569,0.989848727692075,0.993284409573193,99.2 [99.0-99.3],0.936198493575543,0.930303430284688,0.940611242797729,93.6 [93.0-94.1],0.590909090909091,0.568446596769155,0.612001655149792,59.1 [56.8-61.2]
13,4,Malaria,14,1.0,0.784689197262364,1.0,100.0 [78.5-100.0],0.999636231356857,0.998430447299834,0.999447578402672,100.0 [99.8-99.9],0.934554973821989,0.928653123483889,0.939002607858828,93.5 [92.9-93.9],0.56687898089172,0.543597039339523,0.589078822950315,56.7 [54.4-58.9]
14,5,Neglected tropical diseases excluding malaria,7,0.857142857142857,0.606244956336051,0.854947590044887,85.7 [60.6-85.5],1.0,0.998608087139058,1.0,100.0 [99.9-100.0],0.935189212701174,0.929319619877219,0.939606893641749,93.5 [92.9-94.0],0.5625,0.539050200336141,0.584923421296714,56.2 [53.9-58.5]
15,6,Maternal disorders,43,0.395348837209302,0.332359478586957,0.475503038367278,39.5 [33.2-47.6],0.998161764705882,0.99656359143017,0.998354813686298,99.8 [99.7-99.8],0.940812720848057,0.93509623212515,0.945035838940323,94.1 [93.5-94.5],0.579158316633267,0.556535218339922,0.600571954535754,57.9 [55.7-60.1]


In [60]:
write.table(PERF_F,'/media/igna/Elements/HotelDieu/Cochrane/Mapping_Cancer/Incertitude_mapping/Sp_Sens_evaluation/Sens_and_Spec_per_27disease_data.txt')

### Analysis of performance evaluation

In [61]:
table(as.numeric(as.character(PERF_F$Sens_dis)) < 
      as.numeric(as.character(PERF_F$Sens_dis_low)) | 
      as.numeric(as.character(PERF_F$Sens_dis)) > 
      as.numeric(as.character(PERF_F$Sens_dis_up)))


FALSE  TRUE 
   24     2 

In [62]:
PERF_F[as.numeric(as.character(PERF_F$Sens_dis)) < 
      as.numeric(as.character(PERF_F$Sens_dis_low)) | 
      as.numeric(as.character(PERF_F$Sens_dis)) > 
      as.numeric(as.character(PERF_F$Sens_dis_up)),]

Unnamed: 0,dis,GBD,Nb,Sens_dis,Sens_dis_low,Sens_dis_up,Sensitiviy_dis,Spec_dis,Spec_dis_low,Spec_dis_up,Specificity_dis,Sens_oth,Sens_oth_low,Sens_oth_up,Sensitiviy_oth,Spec_oth,Spec_oth_low,Spec_oth_up,Specificity_oth
14.0,5.0,Neglected tropical diseases excluding malaria,7.0,0.857142857142857,0.606244956336051,0.854947590044887,85.7 [60.6-85.5],1.0,0.998608087139058,1.0,100.0 [99.9-100.0],0.935189212701174,0.929319619877219,0.939606893641749,93.5 [92.9-94.0],0.5625,0.539050200336141,0.584923421296714,56.2 [53.9-58.5]
122.0,23.0,Congenital anomalies,23.0,0.956521739130435,0.83960461606528,0.942767167088678,95.7 [84.0-94.3],0.987591240875912,0.984766731712753,0.989050465855371,98.8 [98.5-98.9],0.92904073587385,0.922938322186782,0.933701735847779,92.9 [92.3-93.4],0.572916666666667,0.549847822304949,0.59482766741149,57.3 [55.0-59.5]
,,,,,,,,,,,,,,,,,,,


In [63]:
table(as.numeric(as.character(PERF_F$Spec_dis)) < 
      as.numeric(as.character(PERF_F$Spec_dis_low)) | 
      as.numeric(as.character(PERF_F$Spec_dis)) > 
      as.numeric(as.character(PERF_F$Spec_dis_up)))


FALSE  TRUE 
   24     3 

In [64]:
PERF_F[as.numeric(as.character(PERF_F$Spec_dis)) < 
      as.numeric(as.character(PERF_F$Spec_dis_low)) | 
      as.numeric(as.character(PERF_F$Spec_dis)) > 
      as.numeric(as.character(PERF_F$Spec_dis_up)),]

Unnamed: 0,dis,GBD,Nb,Sens_dis,Sens_dis_low,Sens_dis_up,Sensitiviy_dis,Spec_dis,Spec_dis_low,Spec_dis_up,Specificity_dis,Sens_oth,Sens_oth_low,Sens_oth_up,Sensitiviy_oth,Spec_oth,Spec_oth_low,Spec_oth_up,Specificity_oth
1,1,Tuberculosis,16,0.875,0.719423456130283,0.885370783468889,87.5 [71.9-88.5],0.999271933017838,0.997949399364193,0.999200033295566,99.9 [99.8-99.9],0.934554973821989,0.928653123483889,0.939002607858828,93.5 [92.9-93.9],0.56687898089172,0.543597039339523,0.589078822950315,56.7 [54.4-58.9]
13,4,Malaria,14,1.0,0.784689197262364,1.0,100.0 [78.5-100.0],0.999636231356857,0.998430447299834,0.999447578402672,100.0 [99.8-99.9],0.934554973821989,0.928653123483889,0.939002607858828,93.5 [92.9-93.9],0.56687898089172,0.543597039339523,0.589078822950315,56.7 [54.4-58.9]
110,11,Leprosy,2,1.0,0.342380227506653,1.0,100.0 [34.2-100.0],0.999637812386816,0.998437259288162,0.999449976234128,100.0 [99.8-99.9],0.934895833333333,0.929023170953626,0.939320706109767,93.5 [92.9-93.9],0.557734204793028,0.534168237081432,0.580341815961444,55.8 [53.4-58.0]


In [65]:
table(as.numeric(as.character(PERF_F$Sens_oth)) < 
      as.numeric(as.character(PERF_F$Sens_oth_low)) | 
      as.numeric(as.character(PERF_F$Sens_oth)) > 
      as.numeric(as.character(PERF_F$Sens_oth_up)))
table(as.numeric(as.character(PERF_F$Spec_oth)) < 
      as.numeric(as.character(PERF_F$Spec_oth_low)) | 
      as.numeric(as.character(PERF_F$Spec_oth)) > 
      as.numeric(as.character(PERF_F$Spec_oth_up)))


FALSE 
   27 


FALSE 
   27 

## 2. Deriving triangular distribution for given confidence intervals

In [3]:
source('Triangular_distribution.R')

In [4]:
PERF_F <- read.table('/media/igna/Elements/HotelDieu/Cochrane/Mapping_Cancer/Incertitude_mapping/Sp_Sens_evaluation/Sens_and_Spec_per_27disease_data.txt')

In [5]:
dis <- 'Diarr'
i <- grep('Diarr',PERF_F$GBD)

In [6]:
triang_distrib(mn=PERF_F$Sens_dis[i],low=PERF_F$Sens_dis_low[i],up=PERF_F$Sens_dis_up[i])