# Identification of gaps between effort research and health needs

We will upload data with number of RCTs per region per disease, and number of DALYs per region per disease

In [72]:
setwd('/media/igna/Elements/HotelDieu/Cochrane')

GBD <- read.table("Mapping_Cancer/Tables/GBD_data_per_region_and_27_diseases_2005.txt")
RCT <- read.table("Mapping_Cancer/Tables/RCTs_data_per_region_and_27_diseases_2005_2015.txt")
RCT <- RCT[order(rownames(RCT)),]


### Gaps across regions

In [73]:
#Across regions
#Number and Proportion of RCTs vs proportion of burden across regions

D <- data.frame(
    t(rbind(RCT[rownames(RCT)=="Tot",-ncol(RCT)],
      100*RCT[rownames(RCT)=="Tot",-ncol(RCT)]/RCT$Tot[rownames(RCT)=="Tot"],
      apply(GBD[-(ncol(GBD)-0:1)],2,sum)/1e6,
      100*apply(GBD[-(ncol(GBD)-0:1)],2,sum)/sum(GBD$Tot))))

names(D) <- c("RCTs","Prop_RCTs","GBD","Prop_GBD")

In [74]:
#RCTs per million DALYs
D$RCTs_per_MDALYs <- D$RCTs/D$GBD

In [75]:
options("scipen"=100, "digits"=4)

In [76]:
Dregs <- D
Dregs[order(Dregs$RCTs_per,decreasing=TRUE),]

Unnamed: 0,RCTs,Prop_RCTs,GBD,Prop_GBD,RCTs_per_MDALYs
High.income,67659.0,73.79,231.55,10.43,292.2
North.Africa.and.Middle.East,8683.0,9.469,102.273,4.607,84.9
Central.Europe..Eastern.Europe..and.Central.Asia,6998.0,7.632,151.518,6.825,46.186
Latin.America.and.Caribbean,4766.0,5.198,115.507,5.203,41.261
Southeast.Asia..East.Asia.and.Oceania,9639.0,10.51,460.95,20.76,20.91
South.Asia,4848.0,5.287,610.712,27.509,7.938
Sub.Saharian.Africa,2585.0,2.819,547.555,24.664,4.721


### Gaps across diseases
Across diseases number and proportion of RCTs and GBD
And RCTs per million DALYs

In [77]:
D <- data.frame(
    t(rbind(RCT$Tot[rownames(RCT)!="Tot"],
      100*RCT$Tot[rownames(RCT)!="Tot"]/RCT$Tot[rownames(RCT)=="Tot"],
      GBD$Tot,
      100*GBD$Tot/sum(GBD$Tot))))

names(D) <- c("RCTs","Prop_RCTs","GBD","Prop_GBD")
rownames(D) <- rownames(GBD)

In [78]:
D$RCTs_per_MDALYs <- D$RCTs/(D$GBD/1e6)

In [79]:
D <- D[order(D$RCTs_per,decreasing=TRUE),]
head(D[D$Prop_GBD>=1,],n=10)
tail(D[D$Prop_GBD>=1,],n=10)

Unnamed: 0,RCTs,Prop_RCTs,GBD,Prop_GBD,RCTs_per_MDALYs
Sense organ diseases,6444.0,7.028,32290593.297,1.454,199.563
Digestive diseases (except cirrhosis),6109.0,6.662,32576108.319,1.467,187.53
"Diabetes, urinary diseases and male infertility",12045.0,13.136,75821480.094,3.415,158.86
Skin and subcutaneous diseases,4463.0,4.867,35061984.292,1.579,127.289
Neurological disorders,7038.0,7.675,65826670.819,2.965,106.917
Neoplasms,14149.0,15.43,176868544.264,7.967,79.997
Mental and behavioral disorders,10080.0,10.993,171914381.395,7.744,58.634
Cirrhosis of the liver,1566.0,1.708,30462721.116,1.372,51.407
Musculoskeletal disorders,7761.0,8.464,151554779.387,6.827,51.209
Chronic respiratory diseases,5168.0,5.636,112485355.223,5.067,45.944


Unnamed: 0,RCTs,Prop_RCTs,GBD,Prop_GBD,RCTs_per_MDALYs
Chronic respiratory diseases,5168.0,5.636,112485355.223,5.067,45.944
Congenital anomalies,1756.0,1.915,43254504.439,1.948,40.597
Cardiovascular and circulatory diseases,10547.0,11.5,287404109.09,12.95,36.7
HIV/AIDS,1647.0,1.796,96159861.934,4.331,17.128
Nutritional deficiencies,1509.0,1.646,89065270.102,4.012,16.943
Neglected tropical diseases excluding malaria,428.0,0.4668,27043024.7392,1.2181,15.8266
"Diarrhea, lower respiratory infections, meningitis, and other common infectious diseases",3696.0,4.031,329285536.747,14.832,11.224
Tuberculosis,365.0,0.3981,56404877.8818,2.5407,6.4711
Malaria,482.0,0.5256,101344543.5782,4.5649,4.7561
Neonatal disorders,914.0,0.9968,220288076.0198,9.9226,4.1491


### Gaps within regions

For each region we will compare the local share of RCTs across diseases to the local share of burden across diseases. We will say that a gap exist between research and needs if the share of research concerning a disease is more than two time lower than the share of RCTs concerning that disease.

In [80]:
#Proportion of RCTs concerning each disease among 
#all RCTs relevant to the burden of diseaes in the region

Pr_RCT <- do.call('rbind',apply(RCT,1,function(x){x/RCT[rownames(RCT)=="Tot",]}))
head(Pr_RCT)
Pr_RCT <- Pr_RCT[rownames(Pr_RCT)!="Tot",]
RCT <- RCT[rownames(RCT)!="Tot",]

Unnamed: 0,Central.Europe..Eastern.Europe..and.Central.Asia,High.income,Latin.America.and.Caribbean,North.Africa.and.Middle.East,South.Asia,Southeast.Asia..East.Asia.and.Oceania,Sub.Saharian.Africa,Tot
Cardiovascular and circulatory diseases,0.11275,0.11673,0.10533,0.09835,0.0823,0.12823,0.05029,0.11502
Chronic respiratory diseases,0.0916,0.06079,0.06777,0.03812,0.05136,0.04772,0.05532,0.05636
Cirrhosis of the liver,0.014004,0.015002,0.01217,0.016469,0.026403,0.028322,0.002708,0.017078
Congenital anomalies,0.00986,0.02083,0.01154,0.01693,0.01093,0.01484,0.00619,0.01915
"Diabetes, urinary diseases and male infertility",0.1436,0.1338,0.1605,0.1425,0.1469,0.1296,0.1014,0.1314
"Diarrhea, lower respiratory infections, meningitis, and other common infectious diseases",0.04115,0.03745,0.05204,0.02914,0.07364,0.04534,0.05803,0.04031


In [81]:
#Proportion of GBD attributable to each disease among 
#all burden of diseaes in the region
GBD <- GBD[,-ncol(GBD)]
apply(GBD,2,sum)

Pr_GBD <- apply(GBD,1,function(x){x/unlist(apply(GBD,2,sum))})
head(Pr_GBD)

Unnamed: 0,Cardiovascular and circulatory diseases,Chronic respiratory diseases,Cirrhosis of the liver,Congenital anomalies,"Diabetes, urinary diseases and male infertility","Diarrhea, lower respiratory infections, meningitis, and other common infectious diseases",Digestive diseases (except cirrhosis),Gynecological diseases,Hemoglobinopathies and hemolytic anemias,Hepatitis,⋯,Neonatal disorders,Neoplasms,Neurological disorders,Nutritional deficiencies,Oral disorders,Sense organ diseases,Sexually transmitted diseases excluding HIV,Skin and subcutaneous diseases,Sudden infant death syndrome,Tuberculosis
Central.Europe..Eastern.Europe..and.Central.Asia,0.350858980692946,0.0372537444683037,0.0269957412490737,0.013723943877795,0.0333282440200436,0.0480578018442715,0.0167763047149494,0.0041358046687896,0.0028036296001904,0.0008465025964932,⋯,0.0256333441617377,0.128750361445841,0.0311800836504041,0.0142521573821491,0.0090164281083686,0.0182278250966079,0.000947778516524,0.0125148772294602,0.0007397221641372,0.0182764909724469
High.income,0.188061072831755,0.0589155917484321,0.0172559448100444,0.0099195919020417,0.0532859344036023,0.02957311172767,0.0169225690896265,0.0052581276300064,0.0069643487705165,0.0008798989983167,⋯,0.0164609420828892,0.18699221685447,0.0538482966439021,0.0051652705738659,0.0101315441008439,0.0178453947955228,0.0007589873300558,0.0229428755416952,0.0013682255800045,0.0018824002657901
Latin.America.and.Caribbean,0.129973519670985,0.0476238406119794,0.0205737666197525,0.0313184114667515,0.059445583099081,0.0920730711447905,0.0169632908020917,0.0058028575027912,0.0076675033032388,0.0012439160818362,⋯,0.0871724864680537,0.0839545011527448,0.0390784949467411,0.0376458916746146,0.0108078108504257,0.0193823831971698,0.0043154734078505,0.0243888585513585,0.0006263679225148,0.0090585223708531
North.Africa.and.Middle.East,0.18786931406707,0.0467557075489037,0.0143893225945067,0.0497790011743029,0.0456736551743941,0.105457299678585,0.0106007398904736,0.0064275140423251,0.0104312945657228,0.00331099364269,⋯,0.10056944347638,0.0607236423399148,0.0292791254927653,0.0412117771274257,0.0079897291171716,0.0176250887362016,0.0030899632879519,0.020203179425382,0.0006510248265791,0.0085692943248738
South.Asia,0.0923434700901243,0.0637141611320367,0.0126878877054454,0.0210283123049935,0.0300464227115865,0.214266213115124,0.0174174973525445,0.0043569977744494,0.0063872078581331,0.0129629215748385,⋯,0.165191660333951,0.0374291115595851,0.0337182716943239,0.0551808801713907,0.0052873693290662,0.0146149967375453,0.0057229113241059,0.0138540441445633,0.0010067268039162,0.0387455189596979
Southeast.Asia..East.Asia.and.Oceania,0.177268647266155,0.0689751282016789,0.0159846270125831,0.0217798601359548,0.0457999042827754,0.081026373738974,0.014110808231362,0.0059968598645706,0.0061877207532509,0.0042056941197234,⋯,0.0606467800288265,0.14261744399815,0.0253385834382822,0.0203038622815165,0.0089075021621309,0.0196296909914641,0.0028491723516276,0.0200006320957244,0.0005102481093084,0.0284207805430976


In [82]:
Pr_GBD <- t(Pr_GBD)

In [83]:
head(Pr_GBD)

Unnamed: 0,Central.Europe..Eastern.Europe..and.Central.Asia,High.income,Latin.America.and.Caribbean,North.Africa.and.Middle.East,South.Asia,Southeast.Asia..East.Asia.and.Oceania,Sub.Saharian.Africa,Tot
Cardiovascular and circulatory diseases,0.35086,0.18806,0.12997,0.18787,0.09234,0.17727,0.03354,0.12946
Chronic respiratory diseases,0.03725,0.05892,0.04762,0.04676,0.06371,0.06898,0.0223,0.05067
Cirrhosis of the liver,0.026996,0.017256,0.020574,0.014389,0.012688,0.015985,0.006231,0.013722
Congenital anomalies,0.01372,0.00992,0.03132,0.04978,0.02103,0.02178,0.01331,0.01948
"Diabetes, urinary diseases and male infertility",0.03333,0.05329,0.05945,0.04567,0.03005,0.0458,0.01358,0.03415
"Diarrhea, lower respiratory infections, meningitis, and other common infectious diseases",0.04806,0.02957,0.09207,0.10546,0.21427,0.08103,0.22926,0.14832


In [84]:
#Ratio between research and burden
table(rownames(Pr_RCT)==rownames(Pr_GBD))
table(colnames(Pr_RCT)==colnames(Pr_GBD))



TRUE 
  27 


TRUE 
   8 

In [85]:
R <- Pr_RCT/Pr_GBD
head(R)

Unnamed: 0,Central.Europe..Eastern.Europe..and.Central.Asia,High.income,Latin.America.and.Caribbean,North.Africa.and.Middle.East,South.Asia,Southeast.Asia..East.Asia.and.Oceania,Sub.Saharian.Africa,Tot
Cardiovascular and circulatory diseases,0.3213,0.6207,0.8104,0.5235,0.8913,0.7234,1.4995,0.8885
Chronic respiratory diseases,2.4587,1.0318,1.4231,0.8153,0.8061,0.6919,2.4805,1.1124
Cirrhosis of the liver,0.5187,0.8694,0.5915,1.1445,2.0809,1.7719,0.4346,1.2446
Congenital anomalies,0.7184,2.0994,0.3685,0.3401,0.5199,0.6812,0.465,0.9829
"Diabetes, urinary diseases and male infertility",4.309,2.511,2.7,3.119,4.888,2.829,7.465,3.846
"Diarrhea, lower respiratory infections, meningitis, and other common infectious diseases",0.8564,1.2664,0.5652,0.2763,0.3437,0.5595,0.2531,0.2718


We consider gaps only for disease corresponding to a local burden higher than 1%

In [86]:
GAPS <- which(R<=1/2 & Pr_GBD>=0.01, arr.ind=TRUE)
GAPS <- data.frame(GAPS,row.names=NULL)

In [87]:
head(GAPS)

Unnamed: 0,row,col
1,1,1
2,18,1
3,27,1
4,18,2
5,4,3
6,18,3


In [88]:
GAPS$Disease <- rownames(R)[as.numeric(GAPS$row)]
GAPS$Region <- colnames(R)[as.numeric(GAPS$col)]
GAPS$Ratio <- sapply(1:nrow(GAPS),function(i){R[GAPS$row[i],GAPS$col[i]]})
GAPS$RCT <- sapply(1:nrow(GAPS),function(i){RCT[GAPS$row[i],GAPS$col[i]]})
GAPS$Pr_RCT <- sapply(1:nrow(GAPS),function(i){Pr_RCT[GAPS$row[i],GAPS$col[i]]})
GAPS$GBD <- sapply(1:nrow(GAPS),function(i){GBD[GAPS$row[i],GAPS$col[i]]})
GAPS$Pr_GBD <- sapply(1:nrow(GAPS),function(i){Pr_GBD[GAPS$row[i],GAPS$col[i]]})

In [89]:
#Cleaning region names
#Only local gaps
GAPS <- droplevels(GAPS[GAPS$Region!="Tot",])
GAPS$Region <- factor(GAPS$Region)
levels(GAPS$Region) <- c('Central Europe, Eastern Europe, and Central Asia',
                         'High-income',
                         'Latin America and Caribbean',
                         'North Africa and Middle East',
                         'South Asia',
                         'Southeast Asia, East Asia and Oceania',
                         'Sub-Saharian Africa')


In [90]:
GAPS[GAPS$Pr_GBD>=0.05 & GAPS$Disease=="Neonatal disorders",-c(1:3,5)]

Unnamed: 0,Region,RCT,Pr_RCT,GBD,Pr_GBD
6,Latin America and Caribbean,41,0.0086026017624842,10069066.126,0.0871724864680537
10,North Africa and Middle East,181,0.0208453299550846,10285578.519,0.10056944347638
13,South Asia,86,0.0177392739273927,100884520.59,0.165191660333951
16,"Southeast Asia, East Asia and Oceania",47,0.0048760244838676,27954940.493,0.0606467800288265
20,Sub-Saharian Africa,40,0.0154738878143133,63398508.5998,0.115784768564737


In [91]:
GAPS[GAPS$Pr_GBD>=0.05 & 1:nrow(GAPS)%in%grep("Diarrhea",GAPS$Dis),-c(1:3,5)]

Unnamed: 0,Region,RCT,Pr_RCT,GBD,Pr_GBD
9,North Africa and Middle East,253,0.0291373949095935,10785476.172,0.105457299678585
12,South Asia,357,0.0736386138613861,130854936.2907,0.214266213115124
19,Sub-Saharian Africa,150,0.058027079303675,125531847.741516,0.229258956705191


In [92]:
GAPS[GAPS$Pr_GBD>=0.05,-c(1:2,5)]

Unnamed: 0,Disease,Region,RCT,Pr_RCT,GBD,Pr_GBD
1,Cardiovascular and circulatory diseases,"Central Europe, Eastern Europe, and Central Asia",789,0.112746498999714,53161406.515,0.350858980692946
6,Neonatal disorders,Latin America and Caribbean,41,0.0086026017624842,10069066.126,0.0871724864680537
9,"Diarrhea, lower respiratory infections, meningitis, and other common infectious diseases",North Africa and Middle East,253,0.0291373949095935,10785476.172,0.105457299678585
10,Neonatal disorders,North Africa and Middle East,181,0.0208453299550846,10285578.519,0.10056944347638
12,"Diarrhea, lower respiratory infections, meningitis, and other common infectious diseases",South Asia,357,0.0736386138613861,130854936.2907,0.214266213115124
13,Neonatal disorders,South Asia,86,0.0177392739273927,100884520.59,0.165191660333951
16,Neonatal disorders,"Southeast Asia, East Asia and Oceania",47,0.0048760244838676,27954940.493,0.0606467800288265
19,"Diarrhea, lower respiratory infections, meningitis, and other common infectious diseases",Sub-Saharian Africa,150,0.058027079303675,125531847.741516,0.229258956705191
20,Neonatal disorders,Sub-Saharian Africa,40,0.0154738878143133,63398508.5998,0.115784768564737


In [93]:
GAPS[GAPS$Pr_GBD>=0.01 & GAPS$Pr_GBD<0.05,-c(1:2,5)]

Unnamed: 0,Disease,Region,RCT,Pr_RCT,GBD,Pr_GBD
2,Neonatal disorders,"Central Europe, Eastern Europe, and Central Asia",38,0.0054301228922549,3883909.788,0.0256333441617377
3,Tuberculosis,"Central Europe, Eastern Europe, and Central Asia",21,0.003000857387825,2769215.04,0.0182764909724469
4,Neonatal disorders,High-income,553,0.0081733398365332,3811551.90397,0.0164609420828892
5,Congenital anomalies,Latin America and Caribbean,55,0.0115400755350399,3617507.872,0.0313184114667515
7,Nutritional deficiencies,Latin America and Caribbean,78,0.0163659253042384,4348378.57679833,0.0376458916746146
8,Congenital anomalies,North Africa and Middle East,147,0.0169296326154555,5091067.5,0.0497790011743029
11,Nutritional deficiencies,North Africa and Middle East,134,0.0154324542208914,4214868.4023614,0.0412117771274257
14,Tuberculosis,South Asia,54,0.0111386138613861,23662351.34,0.0387455189596979
15,Neglected tropical diseases excluding malaria,"Southeast Asia, East Asia and Oceania",65,0.0067434381159871,7139081.1718283,0.0154878628893602
17,Tuberculosis,"Southeast Asia, East Asia and Oceania",70,0.0072621641249092,13100468.458,0.0284207805430976


In [94]:
unique(GAPS$Dise)

In [95]:
write.table(GAPS,"Mapping_Cancer/Tables/Local_research_gaps.txt")

### Top diseases per region and gaps
We see for each region wheather the top diseases present a gap or not

In [106]:
#CEE
rownames(Pr_GBD)[grep("Diarrhea",rownames(Pr_GBD))] <- "Diarrhea"
for(i in 1:(ncol(Pr_GBD)-1)){
    print(colnames(Pr_GBD)[i])
    print(sort(Pr_GBD[,i],decreasing=TRUE)[1:5])
    }

[1] "Central.Europe..Eastern.Europe..and.Central.Asia"
Cardiovascular and circulatory diseases                               Neoplasms 
                                0.35086                                 0.12875 
        Mental and behavioral disorders               Musculoskeletal disorders 
                                0.09977                                 0.08625 
                               Diarrhea 
                                0.04806 
[1] "High.income"
Cardiovascular and circulatory diseases                               Neoplasms 
                                0.18806                                 0.18699 
              Musculoskeletal disorders         Mental and behavioral disorders 
                                0.15362                                 0.13504 
           Chronic respiratory diseases 
                                0.05892 
[1] "Latin.America.and.Caribbean"
Cardiovascular and circulatory diseases         Mental and behavioral disorders 


In [107]:
tapply(GAPS$Pr_GBD,GAPS$Region,sum)

## Gaps for the mapping of patients enrolled
Same analysis

In [49]:

GBD <- read.table("Mapping_Cancer/Tables/GBD_data_per_region_and_27_diseases_2005.txt")
RCT <- read.table("Mapping_Cancer/Tables/Patients_data_per_region_and_27_diseases_2005_2015.txt")
RCT <- RCT[order(rownames(RCT)),]


### Gaps across regions

In [50]:
#Across regions
#Number and Proportion of RCTs vs proportion of burden across regions

D <- data.frame(
    t(rbind(RCT[rownames(RCT)=="Tot",-ncol(RCT)],
      100*RCT[rownames(RCT)=="Tot",-ncol(RCT)]/RCT$Tot[rownames(RCT)=="Tot"],
      apply(GBD[-(ncol(GBD)-0:1)],2,sum)/1e3,
      100*apply(GBD[-(ncol(GBD)-0:1)],2,sum)/sum(GBD$Tot))))

names(D) <- c("Patients","Prop_Patients","GBD","Prop_GBD")

In [51]:
#RCTs per million DALYs
D$Patients_per_ThDALYs <- D$Patients/D$GBD

In [52]:
options("scipen"=100, "digits"=4)

In [53]:
Dregs <- D
Dregs[order(Dregs$Patients_per,decreasing=TRUE),]

Unnamed: 0,Patients,Prop_Patients,GBD,Prop_GBD,Patients_per_ThDALYs
High.income,20877389.2,64.11,231551.26,10.43,90.16
Central.Europe..Eastern.Europe..and.Central.Asia,1713426.659,5.261,151517.873,6.825,11.308
North.Africa.and.Middle.East,899010.778,2.761,102273.396,4.607,8.79
Latin.America.and.Caribbean,932054.339,2.862,115507.387,5.203,8.069
Southeast.Asia..East.Asia.and.Oceania,3340414.398,10.257,460946.822,20.763,7.247
Sub.Saharian.Africa,3045884.04,9.353,547554.824,24.664,5.563
South.Asia,1758042.59,5.398,610711.947,27.509,2.879


### Gaps across diseases
Across diseases number and proportion of Patients and GBD
And RCTs per million DALYs

In [54]:
D <- data.frame(
    t(rbind(RCT$Tot[rownames(RCT)!="Tot"],
      100*RCT$Tot[rownames(RCT)!="Tot"]/RCT$Tot[rownames(RCT)=="Tot"],
      GBD$Tot,
      100*GBD$Tot/sum(GBD$Tot))))

names(D) <- c("Patients","Prop_Patients","GBD","Prop_GBD")
rownames(D) <- rownames(GBD)

In [55]:
D$Patients_per_ThDALYs <- D$Patients/(D$GBD/1e3)

In [56]:
D <- D[order(D$Patients_per,decreasing=TRUE),]
head(D[D$Prop_GBD>=5,],n=3)
tail(D[D$Prop_GBD>=5,],n=3)

Unnamed: 0,Patients,Prop_Patients,GBD,Prop_GBD,Patients_per_ThDALYs
Neoplasms,6532214.0,20.058,176868544.264,7.967,36.933
Cardiovascular and circulatory diseases,4851601.0,14.9,287404109.09,12.95,16.88
Chronic respiratory diseases,1666971.0,5.119,112485355.223,5.067,14.819


Unnamed: 0,Patients,Prop_Patients,GBD,Prop_GBD,Patients_per_ThDALYs
Musculoskeletal disorders,1707120.0,5.242,151554779.387,6.827,11.264
"Diarrhea, lower respiratory infections, meningitis, and other common infectious diseases",2809428.0,8.627,329285536.747,14.832,8.532
Neonatal disorders,533175.0,1.637,220288076.02,9.923,2.42


In [57]:
head(D[D$Prop_GBD>=1 & D$Prop_GBD<5,],n=3)
tail(D[D$Prop_GBD>=1 & D$Prop_GBD<5,],n=3)

Unnamed: 0,Patients,Prop_Patients,GBD,Prop_GBD,Patients_per_ThDALYs
"Diabetes, urinary diseases and male infertility",4167593.0,12.797,75821480.094,3.415,54.966
Sense organ diseases,1339601.0,4.113,32290593.297,1.454,41.486
Digestive diseases (except cirrhosis),1345165.0,4.131,32576108.319,1.467,41.293


Unnamed: 0,Patients,Prop_Patients,GBD,Prop_GBD,Patients_per_ThDALYs
Nutritional deficiencies,729640.0,2.24,89065270.102,4.012,8.192
Congenital anomalies,326955.0,1.004,43254504.439,1.948,7.559
Tuberculosis,426266.0,1.309,56404877.882,2.541,7.557


### Gaps within regions

For each region we will compare the local share of RCTs across diseases to the local share of burden across diseases. We will say that a gap exist between research and needs if the share of research concerning a disease is more than two time lower than the share of RCTs concerning that disease.

In [58]:
#Proportion of RCTs concerning each disease among 
#all RCTs relevant to the burden of diseaes in the region

Pr_Patients <- do.call('rbind',apply(RCT,1,function(x){x/RCT[rownames(RCT)=="Tot",]}))
Pr_Patients <- Pr_Patients[rownames(Pr_Patients)!="Tot",]
RCT <- RCT[rownames(RCT)!="Tot",]

In [59]:
#Proportion of GBD attributable to each disease among 
#all burden of diseaes in the region
GBD <- GBD[,-ncol(GBD)]
Pr_GBD <- apply(GBD,1,function(x){x/unlist(apply(GBD,2,sum))})

In [60]:
Pr_GBD <- t(Pr_GBD)

In [61]:
#Ratio between research and burden
table(rownames(Pr_Patients)==rownames(Pr_GBD))
table(colnames(Pr_Patients)==colnames(Pr_GBD))


TRUE 
  27 


TRUE 
   8 

In [62]:
R <- Pr_Patients/Pr_GBD

We consider gaps only for disease corresponding to a local burden higher than 1%

In [63]:
GAPS <- which(R<=1/2 & Pr_GBD>=0.01, arr.ind=TRUE)
GAPS <- data.frame(GAPS,row.names=NULL)

In [64]:
GAPS$Disease <- rownames(R)[as.numeric(GAPS$row)]
GAPS$Region <- colnames(R)[as.numeric(GAPS$col)]
GAPS$Ratio <- sapply(1:nrow(GAPS),function(i){R[GAPS$row[i],GAPS$col[i]]})
GAPS$Patients <- sapply(1:nrow(GAPS),function(i){RCT[GAPS$row[i],GAPS$col[i]]})
GAPS$Pr_Patients <- sapply(1:nrow(GAPS),function(i){Pr_Patients[GAPS$row[i],GAPS$col[i]]})
GAPS$GBD <- sapply(1:nrow(GAPS),function(i){GBD[GAPS$row[i],GAPS$col[i]]})
GAPS$Pr_GBD <- sapply(1:nrow(GAPS),function(i){Pr_GBD[GAPS$row[i],GAPS$col[i]]})

In [65]:
#Cleaning region names
#Only local gaps
GAPS <- droplevels(GAPS[GAPS$Region!="Tot",])
GAPS$Region <- factor(GAPS$Region)
levels(GAPS$Region) <- c('Central Europe, Eastern Europe, and Central Asia',
                         'High-income',
                         'Latin America and Caribbean',
                         'North Africa and Middle East',
                         'South Asia',
                         'Southeast Asia, East Asia and Oceania',
                         'Sub-Saharian Africa')


In [66]:
GAPS[GAPS$Pr_GBD>=0.05,-c(1:2,5)]

Unnamed: 0,Disease,Region,Patients,Pr_Patients,GBD,Pr_GBD
4,Musculoskeletal disorders,High-income,1287959.38408855,0.0616915923744389,35571692.16788,0.15362340047264
8,Mental and behavioral disorders,Latin America and Caribbean,46457.3076192212,0.0498439905006055,14829920.94981,0.128389372672953
9,Neonatal disorders,Latin America and Caribbean,12924.2880724539,0.0138664534154784,10069066.126,0.0871724864680537
11,"Diarrhea, lower respiratory infections, meningitis, and other common infectious diseases",North Africa and Middle East,33284.1724019517,0.0370231072076604,10785476.172,0.105457299678585
13,Neonatal disorders,North Africa and Middle East,21759.1785714286,0.024203467980877,10285578.519,0.10056944347638
14,Chronic respiratory diseases,South Asia,35282.6684557411,0.0200692910671856,38910999.427,0.0637141611320367
17,Chronic respiratory diseases,"Southeast Asia, East Asia and Oceania",96467.8694294783,0.0288790125829091,31793866.11224,0.0689751282016789
19,Musculoskeletal disorders,"Southeast Asia, East Asia and Oceania",104109.621444154,0.0311666784543797,42209775.433731,0.0915718982280306
20,Neonatal disorders,"Southeast Asia, East Asia and Oceania",12550.2354750513,0.0037570893847452,27954940.493,0.0606467800288265
23,"Diarrhea, lower respiratory infections, meningitis, and other common infectious diseases",Sub-Saharian Africa,212975.160628435,0.069922281285192,125531847.741516,0.229258956705191


In [67]:
GAPS[GAPS$Pr_GBD>=0.05 & GAPS$Disease=="Neonatal disorders",-c(1:3,5)]

Unnamed: 0,Region,Patients,Pr_Patients,GBD,Pr_GBD
9,Latin America and Caribbean,12924.2880724539,0.0138664534154784,10069066.126,0.0871724864680537
13,North Africa and Middle East,21759.1785714286,0.024203467980877,10285578.519,0.10056944347638
20,"Southeast Asia, East Asia and Oceania",12550.2354750513,0.0037570893847452,27954940.493,0.0606467800288265
25,Sub-Saharian Africa,71058.6363636364,0.0233293964652936,63398508.5998,0.115784768564737


In [68]:
GAPS[GAPS$Pr_GBD>=0.05 & 1:nrow(GAPS)%in%grep("Diarrhea",GAPS$Dis),-c(1:3,5)]

Unnamed: 0,Region,Patients,Pr_Patients,GBD,Pr_GBD
11,North Africa and Middle East,33284.1724019517,0.0370231072076604,10785476.172,0.105457299678585
23,Sub-Saharian Africa,212975.160628435,0.069922281285192,125531847.741516,0.229258956705191


In [69]:
GAPS[GAPS$Pr_GBD>=0.01 & GAPS$Pr_GBD<0.05,-c(1:2,5)]

Unnamed: 0,Disease,Region,Patients,Pr_Patients,GBD,Pr_GBD
1,Cirrhosis of the liver,"Central Europe, Eastern Europe, and Central Asia",11026.0590052792,0.0064350924772867,4090337.297,0.0269957412490737
2,Neonatal disorders,"Central Europe, Eastern Europe, and Central Asia",4723.13980668454,0.002756546235116,3883909.788,0.0256333441617377
3,Tuberculosis,"Central Europe, Eastern Europe, and Central Asia",4657.02380952381,0.0027179592335632,2769215.04,0.0182764909724469
5,Oral disorders,High-income,104009.530525031,0.0049819222868955,2345971.817,0.0101315441008439
6,Cirrhosis of the liver,Latin America and Caribbean,4995.96836766605,0.0053601685637986,2376422.022,0.0205737666197525
7,Congenital anomalies,Latin America and Caribbean,6589.37817021001,0.0070697360598864,3617507.872,0.0313184114667515
10,Congenital anomalies,North Africa and Middle East,12246.3978270238,0.0136220812157245,5091067.5,0.0497790011743029
12,Hemoglobinopathies and hemolytic anemias,North Africa and Middle East,3649.82943722944,0.0040598283446067,1066843.92,0.0104312945657228
15,Congenital anomalies,South Asia,7117.77254901961,0.0040486917596805,12842241.56,0.0210283123049935
16,Tuberculosis,South Asia,20352.4178571429,0.0115767490321306,23662351.34,0.0387455189596979


In [70]:
write.table(GAPS,"Mapping_Cancer/Tables/Local_research_gaps_patients.txt")

### Differences between gaps in RCTs and patients

In [71]:
Pr_Patients[rownames(Pr_Patients)==""]

Unnamed: 0,Central.Europe..Eastern.Europe..and.Central.Asia,High.income,Latin.America.and.Caribbean,North.Africa.and.Middle.East,South.Asia,Southeast.Asia..East.Asia.and.Oceania,Sub.Saharian.Africa,Tot
Cardiovascular and circulatory diseases,0.22043,0.1576,0.12836,0.18283,0.06448,0.21158,0.02609,0.14898
Chronic respiratory diseases,0.11332,0.05748,0.05166,0.03202,0.02007,0.02888,0.02105,0.05119
Cirrhosis of the liver,0.0064351,0.0098766,0.0053602,0.0760196,0.0091711,0.0608886,0.0006272,0.0157214
Congenital anomalies,0.0068722,0.0124541,0.0070697,0.0136221,0.0040487,0.0083571,0.0004269,0.0100397
"Diabetes, urinary diseases and male infertility",0.15683,0.13427,0.12665,0.20318,0.08617,0.17791,0.01615,0.12797
"Diarrhea, lower respiratory infections, meningitis, and other common infectious diseases",0.08124,0.08227,0.13681,0.03702,0.19518,0.07056,0.06992,0.08627
