04-abnomic.Rmd

---
title: "Comparación de la respuesta de anticuerpos ante _Plasmodium vivax_ mediante Microarreglos de proteínas"
author: "Andree Valle Campos"
date: '`r Sys.Date()`'
output:
  #html_document:
  #pdf_document:
  html_notebook:
    toc: yes
    toc_depth: 6
    toc_float:
      collapsed: yes
    #theme: united
    code_folding: "hide"
    #fig_caption: TRUE
    #number_sections: TRUE
bibliography: malaria.bib
link-citations: yes
#csl: american-medical-association.csl
editor_options: 
  chunk_output_type: console
# params:
#   non_parametric: true
---

```{r setup, include=FALSE}
knitr::opts_chunk$set(echo = TRUE, #fig.path = "01-",
                      warning = FALSE)
# knitr::opts_knit$set(root.dir = '../.')
options(width = 90) # expand limits of CONSOLE output
```

## Microarray Data Analysis

+ __UPDATE:__ 
    - Ir al contraste de los __resultados__ con la __lista de Ag de N6__. [Ver aquí.](#n6-subset)

### Dependencies

This document has the following dependencies:
```{r, warning=FALSE, message=FALSE}
library(tidyverse)
library(haven)
library(broom)
library(biobroom)
library(ggrepel)
library(forcats)
library(stringr)

# library("Rmisc")       #multiploting ggplots

library(Biobase)     #ExpressionSet
library(genefilter)  #DataCondensation
library(limma)       #DifferentialAnalysisOfGenes
library(NMF)         #AnnotatedHeatmaps!!!

#library(PerformanceAnalytics) #GRAPHICAL CORRELATIONS
#library(beeswarm)
#library(vioplot)
#library(beanplot)
#library(htmlTable)  #HTMLtables -minimalistic- without 'ResultsAsis'
#library(knitr)        #better it seems

library(patchwork)
library(qvalue)

theme_set(theme_bw())
```


### Input

```{r}
raw<-read.csv("data-raw/RawData.csv")
```

```{r, eval=FALSE}
raw %>% View()
raw %>% dplyr::count(Spot.Type)
raw %>% dim()
raw %>% glimpse()
raw %>% 
  as_tibble() %>% 
  select(-(Index:Description)) %>% 
  colnames() %>% enframe() %>% 
  mutate(start=
           case_when(
             str_starts(value,"L")~str_replace(value,"(...).+", "\\1"),
             str_starts(value,"P")~str_replace(value,"(....).+", "\\1"), 
             TRUE ~ "non")) %>% 
  dplyr::count(start)
# samples si tiene ubicacion de los 8 controles
# las lecturas de Ab no estan en el archivo de valores crudos
# pero si estan en los archivo excel
readr::read_csv("data-raw/samples.csv") %>% 
  filter(Study=="Controls") %>% 
  select(-Study)
readr::read_csv("data-raw/samples.csv") %>% 
  # rownames_to_column() %>% 
  # as_tibble() %>% 
  # dplyr::count(Study)
  # dplyr::count(Study,Group)
  select(value=Sample.ID) %>% 
  mutate(start=
           case_when(
             str_starts(value,"L")~str_replace(value,"(...).+", "\\1"),
             str_starts(value,"PQ")~str_replace(value,"(....).+", "\\1"), 
             TRUE ~ "non")) %>% 
  dplyr::count(start)
```

### 1. Tidy up

#### ivtt antigens

```{r}
anti <- raw %>% 
  dplyr::filter(Spot.Type=="IVTT.AG") %>% #head()
  select(ID,11:ncol(.)) %>% #head()
  gather(sample_name, expression, -ID) %>% 
  mutate(expression=as.numeric(expression)) %>% 
  reshape2::acast(ID ~ sample_name,
                  value.var = "expression") #%>% class()

# dimensión
dim(anti)
# seis lecturas por proteína de las 8 primeras muestras
head(anti[,1:8])
```

#### ivtt controls

```{r}
ctrl <- raw %>% 
  dplyr::filter(Spot.Type=="IVTT.CTRL") %>% #head()
  select(ID,11:ncol(.)) %>% #head()
  summarise_if(is.numeric,funs(median)) %>% 
  # MEDIAN to NORMALIZE against noDNA controls
  mutate(ID="noDNA") %>% 
  gather(sample_name, expression, -ID) %>% 
  mutate(expression=as.numeric(expression)) %>% 
  reshape2::acast(ID ~ sample_name,
                  value.var = "expression") #%>% class()

# dimensión
dim(ctrl)
# seis lecturas por proteína de las 8 primeras muestras
ctrl[,1:8]
```

```{r}
ctrm <- NULL # crear objeto vacío
# loop para generar una matriz de las dimensiones de `anti`
for(i in 1:nrow(anti)){
  ctrm <- rbind(ctrm,ctrl)
}
# dimensiones de la matriz con la mediana de ctrls `noDNA` creada
dim(ctrm)
# matriz con la mediana de ctrls `noDNA` para las 8 primeras muestras
head(ctrm[,1:8]) #%>% class()
```

#### purified proteins

```{r}
pure <- raw %>% 
  dplyr::filter(Spot.Type=="PURIFIED.PROTEIN") %>% #dim() #dplyr::count(Species)
  #separate(Description,c("gene", "conc"), sep=",",remove = FALSE) %>% #dplyr::count(gene)
  #select(ID,Species,gene,14:ncol(.)) %>% 
  select(ID,11:ncol(.)) %>% 
  gather(sample_name, expression, -ID) %>% 
  mutate(expression=as.numeric(expression)) %>% 
  reshape2::acast(ID ~ sample_name,
                  value.var = "expression") #%>% class()
```

#### gene names

```{r}
# generate a list (df) of gene names and gene ID's
ln <- raw %>% 
  dplyr::filter(Spot.Type=="IVTT.AG") %>% #head()
  select(ID,Gene.ID,Description,Species)

lg <- ln %>% #dplyr::count(Gene.ID)
  group_by(Gene.ID,Species) %>% dplyr::count() %>% #filter(Species=="Pv") %>% arrange(desc(n))
  ungroup()

readr::write_csv(lg, "data/04-listgen-raw.csv")
readr::write_csv(lg %>% select(Gene.ID), "data/04-listgen.csv")
```

#### feature data

```{r}
feat <- raw %>% 
  dplyr::filter(Spot.Type=="IVTT.AG") %>%
  
  #DO NOT WORKS!
  #dplyr::mutate(ID=as.character(ID)) %>% 
  #dplyr::mutate(ID= stringr::str_replace(ID, "(.....)_(\\d{6})_(.+)","\\3"))
  #separate(ID,c("id.sp","id.cod","id.res"),sep = "_",remove = FALSE) %>% 
  #separate(id.cod,c("id.cod","id.num")) %>% 
  #unite(id.num,id.num,id.res)
  
  #THIS WORKS!
  #dplyr::mutate(num=seq(from=33, to=32+n())) %>% #dplyr::count(num) %>% dplyr::count(n)
  #unite(Description,Description,num,sep=" _") %>%
  #dplyr::mutate(Description=as.factor(Description)) %>% 
  
  #select(ID,1:10) %>% #head()
  select(ID,Gene.ID,Description,Species) %>% #head()
  as.data.frame() %>% 
  column_to_rownames(var="ID")#%>% class()

feat_d <- new("AnnotatedDataFrame", data=feat)
```

#### phenotype data

- __join__ `sample.csv` with `03-sevrcov.rds` and `primaquine` sample data
- __create__ categorical variables from numerical: `edad` and `episodio_previo_num`
    + `cut()`advantage: automatic factor labels.

```{r}
sevdb_3 <- readRDS("data/03-sevrcov.rds") %>% #dplyr::count(episodio_previo)
  dplyr::rename(Sample.ID="codigo") %>% 
  mutate(edad_CAT=cut(edad,c(0,18,40,65,Inf)),
         expo_CAT=cut(episodio_previo_num,c(-Inf,0,1,4,Inf))) %>%  #%>%
  select(Sample.ID,sev_WHO_num,episodio_previo_num,#en_zona_endemica,#edad,#,parasitemia
         sev_WHO:expo_CAT,-Study#,-episodio_previo_num
         )
```

```{r, message=FALSE}
pridb <- #readr::read_csv("analysis/more/ADi-NAMRU6_Data/samples.csv") %>% ## BUSCAR ORIGINAL!!!
  readr::read_csv("data-raw/ADi-NAMRU6_Data-samples.csv") %>% ## ENCONTRÉ ORIGINAL
  filter(Study=="Primaquine") %>% 
  select(Sample.ID,Cat.Age.WHO:Anemia.HTO)
```

```{r, message=FALSE}
pheno <- readr::read_csv("data-raw/samples.csv") %>% 
  #dplyr::count(Sample.Type)
  dplyr::filter(Sample.Type!="Control") %>% #str()
  select(Sample.ID, 1:9, -Sample.Type,-Subject.ID) %>% 
  #dplyr::count(Filename)
  dplyr::arrange(Sample.ID) %>%
  full_join(
    sevdb_3 %>% 
      mutate(
        sev_WHO_num_precat = as.character(sev_WHO_num),
        sev_WHO_num_precat = as.numeric(sev_WHO_num_precat),
        sev_WHO_cat = case_when(
          sev_WHO_num_precat == 0 ~ "0",
          sev_WHO_num_precat == 1 ~ "1",
          sev_WHO_num_precat > 1 ~ "1+"),
        sev_WHO_cat = as.factor(sev_WHO_cat)
      ) %>% #count(sev_WHO_cat,sev_WHO_num_precat,sev_WHO_num)
      select(-sev_WHO_num_precat)
    ,by = "Sample.ID") %>% 
  full_join(pridb,by = "Sample.ID") %>% 
  dplyr::arrange(Sample.ID) %>% 
  # # manual imputation of one observation # warning! (post-hoc decision)
  # # filter(!is.na(sev_WHO) & is.na(episodio_previo)) %>% glimpse()
  # mutate(episodio_previo=as.character(episodio_previo)) %>% 
  # mutate(episodio_previo=case_when(
  #   Sample.ID=="LIM2017"~"sin",
  #   TRUE~episodio_previo)) %>% 
  # mutate(episodio_previo=as.factor(episodio_previo)) %>% 
  # # filter(Sample.ID=="LIM2017") %>% glimpse()
  as.data.frame() %>% 
  column_to_rownames(var="Sample.ID")
  

pheno_d <- new("AnnotatedDataFrame", data=pheno)

#colnames(norx)
```

### 2. Normalization + Transformation

```{r log-norm}
# custom function
# definir resultado para los valores menores o iguales a cero:
log2.NA = function(x) {log2(ifelse(x>0, x, NA))}

# normalización con respecto a los controles `noDNA` c/ transformación log2
norm <- log2.NA(anti/ctrm)
head(norm[,1:6])
```

```{r}
test <- anti/ctrm 
```

```{r ratio-test, eval=FALSE, echo=FALSE}
test <- anti/ctrm 

test %>% as.data.frame() %>% 
  rownames_to_column() %>% 
  gather(sample_name,expression,-rowname) %>% 
  filter(expression <= 0)

anti %>% as.data.frame() %>% 
  rownames_to_column() %>% 
  gather(sample_name,expression,-rowname) %>% 
  dplyr::filter(rowname=="PVX_113590_2o2.849",sample_name=="LIM2077") # 21

norm %>% as.data.frame() %>% 
  rownames_to_column() %>% 
  gather(sample_name,expression,-rowname) %>% 
  dplyr::filter(rowname=="PVX_113590_2o2.849",sample_name=="LIM2077") # 21
```

```{r vsn-norm}

```

#### outlier

```{r}

ndb <- norm %>% as.data.frame() %>% 
  gather(sample_name,expression)

p <- ndb %>% 
  full_join(ndb %>% 
              group_by(sample_name) %>% 
              dplyr::summarise(avg=mean(expression)) %>% 
              ungroup(), 
            by="sample_name") %>% 
  dplyr::arrange(avg) %>% 
  ggplot(aes(reorder(sample_name,avg,order = TRUE),expression)) +
  geom_boxplot() +
  theme(axis.text.x = element_blank()) +
  labs(title="Outlier visualization")
```

```{r}
# EDIT specific value under CONDITIONAL STATEMENT: 
# source: https://github.com/tidyverse/dplyr/issues/425
norx <- norm %>% as.data.frame() %>% 
  rownames_to_column() %>% 
  gather(sample_name,expression,-rowname) %>%
  #dplyr::filter(expression < -5) #%>% dim()
  mutate(expression= ifelse(expression < -5, NA, expression)) %>% # RULE TO EDIT a value of a column!
  #dplyr::filter(rowname=="PVX_111175.544",sample_name=="LIM2017") # 21
  reshape2::acast(rowname ~ sample_name,
                  value.var = "expression") #%>% class()
 
# norm %>% write_rds("data/04-eset_assay.rds")

#norm["PVX_111175.544","LIM2017"] <- NA ## messy modification

#anti %>% as.data.frame() %>% 
#  rownames_to_column() %>% 
#  gather(sample_name,expression,-rowname) %>% 
#  dplyr::filter(rowname=="PVX_111175.544",sample_name=="LIM2017") # 21
#
#ctrl %>% as.data.frame() %>% 
#  rownames_to_column() %>% 
#  gather(sample_name,expression,-rowname) %>% 
#  dplyr::filter(sample_name=="LIM2017") # 13550.5
```

```{r, fig.height=4, fig.width=15}
q <- norx %>% as.data.frame() %>% 
  gather(sample_name,expression) %>% 
  full_join(ndb %>% 
              group_by(sample_name) %>% 
              dplyr::summarise(avg=mean(expression)) %>% 
              ungroup(), 
            by="sample_name") %>% 
  dplyr::arrange(avg) %>% 
  ggplot(aes(reorder(sample_name,avg,order = TRUE),expression)) +
  geom_boxplot() +
  theme(axis.text.x = element_blank()) +
  labs(title="Conditional edition of a single element")

# Rmisc::multiplot(p,q,cols = 2)
p+q
```

### 3. ExpressionSet

- __arrange__ `norx` rownames wrt `feat_d` ones.
- __create__ the expression set

```{r}
head(norx[,1:5])
head(norx[feat_d %>% rownames(),1:5])
```

```{r}
eset <- ExpressionSet(assayData = norx[feat_d %>% rownames(),], 
                      phenoData = pheno_d,
                      featureData = feat_d)
eset
```

#### subset eset 
**per Pv/Pf chip and Pq/Sev experiment**
```{r}
#eset
# ExpressionSet subseted by both SAMPLES and FEATURE covariates!!!!
##
eset.VIVAX.PQ<-eset[featureData(eset)$Species=="Pv",
                    phenoData(eset)$Study=="Primaquine"]
#eset.VIVAX.PQ
#
eset.FALCIP.PQ<-eset[featureData(eset)$Species=="Pf",
                     phenoData(eset)$Study=="Primaquine"]
#eset.FALCIP.PQ
##
eset.VIVAX.SEV<-eset[featureData(eset)$Species=="Pv",
                     phenoData(eset)$Study!="Primaquine"]
eset.VIVAX.SEV
#
eset.FALCIP.SEV<-eset[featureData(eset)$Species=="Pf",
                      phenoData(eset)$Study!="Primaquine"]
#eset.FALCIP.SEV
##
```

```{r, eval=FALSE}
summary(pData(eset))
varMetadata(eset)
table(pData(eset)$Study)
table(pData(eset)$Group)
```

#### specific pData

```{r}
pData(eset.VIVAX.SEV) <- pData(eset.VIVAX.SEV) %>% 
  select(Filename:expo_CAT,sev_WHO_cat,-Filename,-Slide,-Pad,-Probing.Day)

eset.VIVAX.SEV
```

```{r}
pData(eset.VIVAX.PQ) <- pData(eset.VIVAX.PQ) %>% 
  select(Study,Group,Weight:Anemia.HTO)

#eset.VIVAX.PQ
```

### 4. Filtering

```{r, echo=TRUE}
#
#eset             # 1014 features X 200 samples   (10%= 20)
#
#eset.VIVAX.PQ    # 515 features X 140 samples    (10%= 14)
#eset.FALCIP.PQ   # 499 features X 140 samples    (10%= 14)
#eset.VIVAX.SEV   # 515 features X 60 samples     (10%= 6)
#eset.FALCIP.SEV  # 499 features X 60 samples     (10%= 6)
#
############################## [START] GENEFILTER for each DATASET (n=4)
f1 <- kOverA(20,1)                           ## expression measure above 1 in at least 20 samples
ffun <- filterfun(f1)
wh1 <- genefilter(exprs(eset),ffun)  # WHOLE data set
#sum(wh1) # 526 -> 818 ---------------------------------------------------->>>> (ALL of this are WITHOUT mean centering)
#
# GENERATE the NEW ExpressionSet
eset.FILTER<-eset[wh1,]
#eset.FILTER

############################## GENEFILTER for VIVAX PRIMAQUINE
f2 <- kOverA(14,1)                           ## expression measure above 1 in at least 14 samples
ffun2 <- filterfun(f2)
wh2 <- genefilter(exprs(eset.VIVAX.PQ),ffun2)  # WHOLE data set
#sum(wh2) # 252 -> 397
#
# GENERATE the NEW ExpressionSet
eset.VIVAX.PQ.FILTER<-eset.VIVAX.PQ[wh2,]
#eset.VIVAX.PQ.FILTER

############################## GENEFILTER for FALCIPARUM PRIMAQUINE
f2 <- kOverA(14,1)                           ## expression measure above 1 in at least 14 samples
ffun2 <- filterfun(f2)
wh3 <- genefilter(exprs(eset.FALCIP.PQ),ffun2)  # WHOLE data set
#sum(wh3) # 279 -> 430
#
# GENERATE the NEW ExpressionSet
eset.FALCIP.PQ.FILTER<-eset.FALCIP.PQ[wh3,]
#eset.FALCIP.PQ.FILTER

############################## [DONE] GENEFILTER for VIVAX SEVERE
f3 <- kOverA(6,1)                           ## expression measure above 1 in at least 6 samples
ffun3 <- filterfun(f3)
wh4 <- genefilter(exprs(eset.VIVAX.SEV),ffun3)  # WHOLE data set
#sum(wh4) # 255 -> 394
#
# GENERATE the NEW ExpressionSet
eset.VIVAX.SEV.FILTER<-eset.VIVAX.SEV[wh4,]
eset.VIVAX.SEV.FILTER

############################## [DONE] GENEFILTER for FALCIPARUM SEVERE
f3 <- kOverA(6,1)                           ## expression measure above 1 in at least 6 samples
ffun3 <- filterfun(f3)
wh5 <- genefilter(exprs(eset.FALCIP.SEV),ffun3)  # WHOLE data set
#sum(wh5) # 280 -> 419
#
# GENERATE the NEW ExpressionSet
eset.FALCIP.SEV.FILTER<-eset.FALCIP.SEV[wh5,]
#eset.FALCIP.SEV.FILTER

############################## [END] OF GENEFILTER 
#
# results
#
#eset.FILTER

#eset.VIVAX.PQ.FILTER      # 252 /515 features X 140 samples
#eset.FALCIP.PQ.FILTER     # 279 /499 features X 140 samples
#eset.VIVAX.SEV.FILTER     # 255 /515 features X 60 samples
#eset.FALCIP.SEV.FILTER    # 280 /499 features X 60 samples
#
##########################################################

```

```{r}
eset.VIVAX.SEV %>% 
  readr::write_rds("data/04-eset_vivax_sev.rds")
eset.VIVAX.SEV.FILTER %>% 
  readr::write_rds("data/04-eset_vivax_sev_filter.rds")
```


### 0. Descriptive Statistics

#### Preprocessing

##### Distributions per step

raw -> normalized -> transformed -> filtered

```{r, fig.width=12, fig.height=3.5}
a <- anti %>% as.data.frame() %>% 
  gather(sample_name,expression) %>% 
  dplyr::mutate(labl="Raw")
b <- test %>% as.data.frame() %>% 
  gather(sample_name,expression) %>% 
  filter(expression >= 0) %>% 
  dplyr::mutate(labl="Normalized")
c <- norx %>% as.data.frame() %>% 
  gather(sample_name,expression) %>% 
  dplyr::mutate(labl="Transformed")
d <- exprs(eset.VIVAX.SEV.FILTER) %>% as.data.frame() %>% 
  gather(sample_name,expression) %>% 
  dplyr::mutate(labl="Filtered*")

preprocessing_plot <- rbind(a,b,c,d) %>% 
  dplyr::mutate(labl=forcats::fct_relevel(labl,
                                          "Raw",
                                          "Normalized",
                                          "Transformed",
                                          "Filtered*")) %>% 
  ggplot(aes(expression)) +
  geom_histogram() + theme_bw() +
  #facet_grid(.~labl,scales = "free") +
  facet_wrap(~labl,nrow = 1,ncol = 4,scales = "free") +
  labs(
    x = "Value",
    y = "Count",
    # title="Preprocessing: Data distribution per step",
    caption="*subset of samples from the severe vivax malaria study and probes with P. vivax antigens") +
  theme(
    strip.background = element_rect(colour = "black"#, fill = "white"
    ),
    strip.text.x = element_text(colour = "black",size = 12#, face = "bold"
    ))

preprocessing_plot
ggsave("figure/04-fig01-preprocessing_distribution.png",
       height = 2.25,width = 8,dpi = "retina")
```

```{r, fig.width=16, fig.height=4,eval=FALSE,echo=FALSE}
a <- anti %>% as.data.frame() %>% 
  gather(sample_name,expression) %>% 
  ggplot(aes(expression)) +
  geom_histogram() +
  labs(title="RAW") + theme_bw()
b <- test %>% as.data.frame() %>% 
  gather(sample_name,expression) %>% 
  filter(expression >= 0) %>% 
  ggplot(aes(expression)) +
  geom_histogram() +
  labs(title="NORMALIZED") + theme_bw()
c <- norx %>% as.data.frame() %>% 
  gather(sample_name,expression) %>% 
  ggplot(aes(expression)) +
  geom_histogram() +
  labs(title="TRANSFORMED") + theme_bw()
d <- exprs(eset.VIVAX.SEV.FILTER) %>% as.data.frame() %>% 
  gather(sample_name,expression) %>% 
  ggplot(aes(expression)) +
  geom_histogram() +
  labs(title="SEVERE VIVAX FILTERED DATA") + theme_bw()

# Rmisc::multiplot(a,b,c,d,cols = 4)
a+b+c+d
```

*Of sample covariates, feature covariates and microarray whole dataset*

#### Sample covariates

- just show the results from `03-sevrcov.Rmd`
- compare against initial stratification and after update!

##### Reclassification

- Smith-Nuñez, correo __28oct2016__:
    + La base de datos epidemiológicos inicial pasó por __control de calidad y reingreso de fichas__.
    + __Acción:__
        - Contrastar y Crear una DB consenso,
        - Redefinir clasificación OMS de Malaria Severa,
        - Reclasificar muestras,
        - Filtrar muestras seleccionadas para el ensayo de Microarreglo de Proteínas.

```{r}
pData(eset) %>% 
  group_by(Group,sev_WHO) %>% dplyr::count()
```

```{r}
pData(eset) %>% 
  group_by(sev_WHO, episodio_previo) %>% dplyr::count()
```

#### Feature covariates

- maybe a graph of the slides with all the coordinates? :)

##### Load PlasmoDB Metadata

- __Write `data/04-listgen.csv`__ with all the gene names. [here](#gene-names)
- __Create PlasmoDB strategy__, select required features, and download data.
- __Tidy PlasmoDB data frame__ output:
    + __Programaticaly renamed all__ variable/column names using __regex__.

```{r, message=FALSE}
all <- readr::read_tsv("data/04-listgen.tsv") %>% #colnames() %>% #class()
  #make.names(unique = TRUE) %>% 
  rename_all(
      funs(
        #stringr::str_to_lower(.) %>%
        stringr::str_replace_all(., '\\s', '\\.') %>% 
        stringr::str_replace_all(., '\\[|\\]', '') %>% 
        stringr::str_replace_all(., '\\#.', '') %>% 
        stringr::str_replace_all(., 'Computed.', '') %>% 
        stringr::str_replace_all(., '\\/', '\\_')
      )
  ) %>% 
  # janitor::clean_names()
  # select(-source_id,-X18) %>% 
  select(-source_id,-...18) %>% 
  dplyr::rename(Gene.Name=Gene.Name.or.Symbol) %>% 
  dplyr::mutate_all(
    funs(
      stringr::str_replace_all(.,"N/A",replacement = NA_character_)
    )) %>% 
  dplyr::mutate(SignalP.Scores=
                  stringr::str_replace(SignalP.Scores, 
                                       "^NN.{0,}","SP")) %>% 
  dplyr::mutate(
    NonSyn_Syn.SNP.Ratio.All.Strains=
      as.numeric(NonSyn_Syn.SNP.Ratio.All.Strains),
    Total.SNPs.All.Strains=as.numeric(Total.SNPs.All.Strains),
    Transcript.Length=as.numeric(Transcript.Length),
    Ortholog.count=as.numeric(Ortholog.count)) %>% 
  # dplyr::mutate(
  #   SignalP=stringr::str_replace(SignalP.Scores, "^NN.{0,}","SP")) %>%
  #group_by(SignalP.Scores,SignalP) %>% dplyr::count()
  dplyr::rename(SignalP=SignalP.Scores) %>% 
  dplyr::mutate(
    Gene.Name= ifelse(Gene.ID == "PVX_003775", "MSP4", Gene.Name)) %>%
  dplyr::mutate(
    Gene.Name= ifelse(Gene.ID == "PVX_003770", "MSP5", Gene.Name)) %>% 
  dplyr::mutate(
    Gene.Name= ifelse(Gene.ID == "PVX_097625", "MSP8", Gene.Name)) %>% 
  dplyr::mutate(
    Gene.Name= ifelse(Gene.ID == "PVX_114145", "MSP10", Gene.Name)) %>% 
  dplyr::mutate(
    Gene.Name= ifelse(Gene.ID == "PVX_116780", "SFT2", Gene.Name))
#lg
# head(all)
all
#raw %>% filter(Gene.ID=="PVX_003775")#only_2o2 --> IS THIS SUGGESTING A MISTAKE IN ID TIPYING? --> COMPARE!
#raw %>% filter(stringr::str_detect(Description, "MSP7"))
all %>% 
  readr::write_rds("data/04-listgen-microarray_chip-raw.rds")

all %>% 
  unite(id.name, Gene.Name, Gene.ID, sep = " / ", remove = FALSE) %>% 
  mutate(id.name=str_replace(id.name,"NA / ","")) %>% 
  mutate(id.name=str_replace(id.name,"/","-")) %>% 
  janitor::clean_names() %>% 
  readr::write_rds("data/04-listgen-microarray_chip-clean.rds")
```

```{r, message=FALSE}
full <- readr::read_tsv("data/04-listall.tsv") %>% #colnames() %>% #class()
  #make.names(unique = TRUE) %>% 
  rename_all(
      funs(
        #stringr::str_to_lower(.) %>%
        stringr::str_replace_all(., '\\s', '\\.') %>% 
        stringr::str_replace_all(., '\\[|\\]', '') %>% 
        stringr::str_replace_all(., '\\#.', '') %>% 
        stringr::str_replace_all(., 'Computed.', '') %>% 
        stringr::str_replace_all(., '\\/', '\\_')
      )
  ) %>% 
  select(-source_id,-...8) %>% 
  dplyr::rename(Gene.Name=Gene.Name.or.Symbol) %>% 
  dplyr::mutate_all(
    funs(
      stringr::str_replace_all(.,"N/A",replacement = NA_character_)
    )) #%>% 
  #dplyr::mutate(SignalP.Scores=stringr::str_replace(SignalP.Scores, "^NN.{0,}","SP")) %>% 
  #dplyr::mutate(NonSyn_Syn.SNP.Ratio.All.Strains=as.numeric(NonSyn_Syn.SNP.Ratio.All.Strains),
  #              Total.SNPs.All.Strains=as.numeric(Total.SNPs.All.Strains),
  #              Transcript.Length=as.numeric(Transcript.Length),
  #              Ortholog.count=as.numeric(Ortholog.count)) %>% 
  #dplyr::mutate(SignalP=stringr::str_replace(SignalP.Scores, "^NN.{0,}","SP")) %>% 
  #group_by(SignalP.Scores,SignalP) %>% dplyr::count()
  #dplyr::rename(SignalP=SignalP.Scores) %>% 
  #dplyr::mutate(Gene.Name= ifelse(Gene.ID == "PVX_003775", "MSP4", Gene.Name)) %>%
  #dplyr::mutate(Gene.Name= ifelse(Gene.ID == "PVX_003770", "MSP5", Gene.Name)) %>% 
  #dplyr::mutate(Gene.Name= ifelse(Gene.ID == "PVX_097625", "MSP8", Gene.Name)) %>% 
  #dplyr::mutate(Gene.Name= ifelse(Gene.ID == "PVX_114145", "MSP10", Gene.Name)) %>% 
  #dplyr::mutate(Gene.Name= ifelse(Gene.ID == "PVX_116780", "SFT2", Gene.Name))
#lg
# head(full)
full
#raw %>% filter(Gene.ID=="PVX_003775")#only_2o2 --> IS THIS SUGGESTING A MISTAKE IN ID TIPYING? --> COMPARE!
#raw %>% filter(stringr::str_detect(Description, "MSP7"))
```

##### GO distribution

```{r,fig.height=5,fig.width=7.7}
all_go <- all %>% select(Gene.ID,Product.Description,GO.Components) %>% 
  separate(GO.Components, c("c1","c2","c3"),sep = ",") %>% 
  mutate_at(vars(c1:c3), funs(trimws)) %>% 
  gather(component,GO.Components,-Gene.ID,-Product.Description) %>% 
  filter(GO.Components!="NA") %>% #dplyr::count(GO.Components) %>% dplyr::arrange(desc(n)) #%>% filter(n>1)
  dplyr::mutate(summary.db="Pf/Pv500 microarray",
                protype= stringr::str_count(Product.Description, "hypothetical"),
                protype=as.factor(protype),
                protype= forcats::fct_recode(protype, "hypothetical"="1", "known"="0")) %>%
  select(Gene.ID,Product.Description,protype,summary.db,GO.Components) #%>% dplyr::count(protype)

full_go <- full %>% select(Gene.ID,Product.Description,GO.Components) %>% 
  separate(GO.Components, c("c1","c2","c3"),sep = ",") %>% 
  mutate_at(vars(c1:c3), funs(trimws)) %>% 
  gather(component,GO.Components,-Gene.ID,-Product.Description) %>% 
  filter(GO.Components!="NA") %>% #dplyr::count(GO.Components) %>% dplyr::arrange(desc(n)) #%>% filter(n>1)
  dplyr::mutate(summary.db="P. vivax whole genome",
                protype= stringr::str_count(Product.Description, "hypothetical"),
                protype=as.factor(protype),
                protype= forcats::fct_recode(protype, "hypothetical"="1", "known"="0")) %>%
  select(Gene.ID,Product.Description,protype,summary.db,GO.Components) #%>% dplyr::count(protype)

go_ord <- rbind(full_go,all_go) %>% #class()
  group_by(GO.Components,summary.db) %>% 
  dplyr::summarise(count=n()) %>% ungroup() %>% 
  dplyr::arrange(desc(count)) %>% 
  spread(summary.db,count) %>% 
  dplyr::rename(micro="Pf/Pv500 microarray",
                whole="P. vivax whole genome") %>% 
  dplyr::arrange(desc(micro)) %>% 
  replace_na(list(micro = 0)) %>% 
  filter(micro>1)

#rbind(a,b,c,d) 
rbind(full_go,all_go) %>% 
  inner_join(go_ord,by = "GO.Components") %>% 
  dplyr::arrange(desc(micro)) %>% 
  #replace_na(list(SignalP = "na")) %>% 
  ggplot(aes(#x=summary.db#, fill=GO.Components
             x=reorder(GO.Components,micro,order = TRUE), fill=protype
             )) +
  geom_bar(#position = "fill"
           #position = "identity"
           ) +
  facet_grid(.~summary.db#, scales = "free"#, space = "free"
             ) +#
  coord_flip() + theme_bw() +
  scale_fill_discrete(name="Protein",
                      labels=c("with annotation", "hypothetical")) +
  theme(legend.position=c(-.73, 0.04),#"bottom"
        legend.margin = margin(0,0,0,0),
        axis.title.y=element_blank()
        #legend.text = element_text(size = 8),
        #legend.title = element_text(size = 8)
        ) +
  labs(title="Gene Ontology: Pf/Pv500 microarray proteins",
       subtitle="Cellular Component prediction compared to P. vivax genome") +
  theme(strip.background = element_rect(colour = "black"#, fill = "white"
                                        ),
        strip.text.x = element_text(colour = "black"#,#size = 12, 
                                    #face = "bold"
                                    ))
```

```{r,fig.height=3.5,fig.width=6}
#rbind(a,b,c,d) 
rbind(full_go,all_go) %>% 
  inner_join(go_ord %>% 
               filter(micro>3),by = "GO.Components") %>% 
  dplyr::arrange(desc(micro)) %>% 
  #replace_na(list(SignalP = "na")) %>% 
  ggplot(aes(#x=summary.db#, fill=GO.Components
             x=reorder(GO.Components,micro,order = TRUE), fill=protype
             )) +
  geom_bar(#position = "fill"
           #position = "identity"
           ) +
  facet_grid(.~summary.db#, scales = "free"#, space = "free"
             ) +#
  coord_flip() + theme_bw() +
  scale_fill_discrete(name="Protein",
                      labels=c("with annotation", "hypothetical")) +
  theme(#legend.position=c(-.38, -0.04),#"bottom"
        # legend.position=c(.85, 0.15),
        # legend.margin = margin(0,0,0,0),
        axis.title.y=element_blank()
        #legend.text = element_text(size = 8),
        #legend.title = element_text(size = 8)
        ) +
  # labs(title="Gene Ontology: Pf/Pv500 microarray proteins",
  #      subtitle="Predicted Cellular Components compared to P. vivax genome") +
  theme(
    strip.background = 
      element_rect(colour = "black"#, fill = "white"
      ),
    strip.text.x = element_text(colour = "black"#,#size = 12, 
                                #face = "bold"
    ))

ggsave("figure/04-fig04-microarray_genome_comparison.png",
       height = 3.5,width = 6.5,dpi = "retina")
```

```{r}
library(treemapify)

rbind(full_go,all_go) %>% 
  inner_join(go_ord %>% 
               filter(micro>3),by = "GO.Components") %>% 
  dplyr::arrange(desc(micro)) %>% 
  janitor::clean_names() %>% 
  dplyr::count(summary_db,go_components) %>% 
  ggplot(aes(area = n, 
             fill = go_components,
             label = go_components#,
             # subgroup = summary_db
             )) +
  geom_treemap(colour = "black") +
  facet_wrap(~summary_db) +
  # geom_treemap_subgroup_border(color = "black") +
  # geom_treemap_subgroup_text(place = "middle", 
  #                            grow = T, 
  #                            alpha = 0.5, 
  #                            colour = "black", 
  #                            fontface = "italic", 
  #                            min.size = 0) +
  geom_treemap_text(colour = "black", 
                    place = "bottomleft", 
                    reflow = T) +
  colorspace::scale_fill_discrete_qualitative(palette = "Set 3") +
  theme_classic() +
  theme(legend.position = "none")

ggsave("figure/04-fig04-microarray_genome_comparison-tree.png",
       height = 3.5,width = 6.5,dpi = "retina")
```


```{r}
# rbind(full_go,all_go) %>% 
#   inner_join(go_ord,by = "GO.Components") %>% 
#   dplyr::arrange(desc(micro))

# go_ord %>% 
#   avallecam::print_inf()

# rbind(full_go,all_go) %>% #class()
#   group_by(GO.Components,summary.db) %>% 
#   dplyr::summarise(count=n()) %>% ungroup() %>% 
#   dplyr::arrange(desc(count))

library(compareGroups)
microarray_proteins <- rbind(full_go,all_go) %>% 
  inner_join(go_ord %>% 
               filter(micro>3),by = "GO.Components") %>% 
  dplyr::arrange(desc(micro)) %>% 
  janitor::clean_names() %>% 
  dplyr::mutate(go_components=fct_infreq(go_components)) %>% 
  dplyr::select(protype, summary_db, go_components) %>% 
  # dplyr::count(go_components) %>% 
  compareGroups(summary_db~., data=.,
                max.xlev = 30,
                chisq.test.perm = TRUE) %>% 
  createTable(digits = 1)

microarray_proteins %>% export2md()
microarray_proteins %>% 
  export2xls("table/04-tab01-microarray_proteins.xls")
```


#### Microarray data

##### Heteroskedasticity 
*pre- & post- transformation/normalization*

```{r, fig.height=3, fig.width=6}
m <- anti %>% as.data.frame() %>% 
  rownames_to_column() %>% 
  gather(sample_name, expression, -rowname) %>% 
  #mutate(expression= ifelse(expression < 30, NA, expression)) %>% 
  #dplyr::filter(rowname=="PVX_111175.544",sample_name=="LIM2017") # 21
  #dplyr::filter(sample_name=="PQSJ103")
  group_by(sample_name) %>% 
  dplyr::summarise(mean= mean(expression),
                   sd= sd(expression)) %>% 
  #dplyr::filter(mean<5000)
  dplyr::mutate(labl="Raw")

n <- norx %>% as.data.frame() %>% 
  rownames_to_column() %>% 
  gather(sample_name, expression, -rowname) %>% 
  group_by(sample_name) %>% 
  dplyr::summarise(mean= mean(expression),
                   sd= sd(expression)) %>% 
  dplyr::mutate(labl="Transformed")

heteroskedasticity_plot <- rbind(m,n) %>% 
  dplyr::mutate(labl=forcats::fct_relevel(labl,
                                          "Raw",
                                          "Transformed")) %>% 
  ggplot(aes(mean,sd)) +
  geom_point(alpha=0.5) + 
  theme_bw() + 
  geom_smooth(linetype=0) +
  facet_wrap(~labl,nrow = 1,ncol = 2,scales = "free") +
  labs(
    # title="Mean-variance dependence"
    x = "Mean", 
    y = "Standard deviation"
       ) +
  theme(strip.background = element_rect(colour = "black"#, fill = "white"
                                        ),
        strip.text.x = element_text(colour = "black",size = 10#, face = "bold"
                                    ))

heteroskedasticity_plot
ggsave("figure/04-fig02-heteroskedasticity_prepos.png",
       height = 2.5,width = 5,dpi = "retina")
```

```{r,eval=FALSE}
# this may require adjusting the height of each 
# plot inside the patchwork
preprocessing_plot +
heteroskedasticity_plot +
  plot_layout(ncol = 1) +
  plot_annotation(tag_levels = "A")
ggsave("figure/04-fig02-mix-preprocessing_heteroskedasticity.png",
       height = 5,width = 5,dpi = "retina")
```


```{r, fig.height=3, fig.width=6,eval=FALSE,echo=FALSE}
m <- anti %>% as.data.frame() %>% 
  rownames_to_column() %>% 
  gather(sample_name, expression, -rowname) %>% 
  #mutate(expression= ifelse(expression < 30, NA, expression)) %>% 
  #dplyr::filter(rowname=="PVX_111175.544",sample_name=="LIM2017") # 21
  #dplyr::filter(sample_name=="PQSJ103")
  group_by(sample_name) %>% dplyr::summarise(mean= mean(expression),
                                             sd= sd(expression)) %>% 
  #dplyr::filter(mean<5000)
  ggplot(aes(mean,sd)) +
  geom_point() +
  #geom_smooth() +
  labs(title="Mean-variance dependence",
       subtitle="Raw data") + theme_bw()

n <- norx %>% as.data.frame() %>% 
  rownames_to_column() %>% 
  gather(sample_name, expression, -rowname) %>% 
  group_by(sample_name) %>% dplyr::summarise(mean= mean(expression),
                                             sd= sd(expression)) %>% 
  ggplot(aes(mean,sd)) +
  geom_point() +
  #geom_smooth() +
  labs(title="Mean-variance dependence",
       subtitle="FOC Normalization method") + theme_bw()

# Rmisc::multiplot(m,n,cols = 2)
m+n
```

##### Validity test

*Using Pf IVTT and purified proteins*

As a reference, check Crompton, 2010 [@crompton2010] supplementary info [here](http://www.pnas.org/content/suppl/2010/03/25/1001323107.DCSupplemental/pnas.201001323SI.pdf).

All comparison are in raw scale, since normalization can not be done with the purified protein spots.

***

```{r, eval=FALSE}
rownames(pure)
```

__Non Present Reactive Targets__

_This was a Preliminary Cromprenhensive Screening?_

- __Load and tidy up__ the file sent in __28sep2015__ by Joe Campo.

- AMA1, ~~CSP, GEST, CelTOS~~
- Here, AMA1 and all the proteins in the list with a known _Gene Name_:

```{r}
np <- readxl::read_xlsx("data-raw/PfPv500_NonpresentReactiveTargets.xlsx") %>% 
  rename_all(
      funs(
        #stringr::str_to_lower(.) %>%
        stringr::str_replace_all(., '\\s', '\\.') %>% 
        stringr::str_replace_all(., '\\[|\\]', '')
      )
  )
#np %>% filter(Gene ID=="PVX_092275")
np %>% filter(stringr::str_detect(Product.Description, "AMA1"))
#np %>% filter(stringr::str_detect(Product.Description, "CSP"))
#np %>% filter(stringr::str_detect(Product.Description, "GEST"))
#np %>% filter(stringr::str_detect(Product.Description, "CelTOS"))
np %>% filter(stringr::str_detect(Product.Description, "\\("))
```

- In this list, __ADi__ reports a __total of `r nrow(np)`__ Non present Reactive Targets.

**Validate w.r.t Pf EBA175, MSP2, CSP**
```{r}
EBA175 <- as.data.frame(cbind(as.matrix(anti["PF3D7_0731500_e1s2.361",]), 
                              as.matrix(pure["EBA175, 0.3mg/mL.23",]), 
                              as.matrix(pure["EBA175, 0.1mg/mL.24",])
                              )
                        )
colnames(EBA175) <- c("PF3D7_0731500_e1s2.361", "EBA175_0.3.23", "EBA175_0.1.24")

# PF3D7_0930300_s1.67   falciparum      IVTT.AG PF3D7_0930300   merozoite surface protein 1 (MSP1)
MSP1s1 <- as.data.frame(cbind(as.matrix(anti["PF3D7_0930300_s1.67",]), 
                              as.matrix(pure["MSP1, 0.3mg/mL.601",]), 
                              as.matrix(pure["MSP1, 0.1mg/mL.602",])))
colnames(MSP1s1) <- c("PF3D7_0930300_s1.67", "MSP1_0.3.601", "MSP1_0.1.602")

# PF3D7_0930300_s2.941  falciparum      IVTT.AG PF3D7_0930300   merozoite surface protein 1 (MSP1)
MSP1s2 <- as.data.frame(cbind(as.matrix(anti["PF3D7_0930300_s2.941",]), 
                              as.matrix(pure["MSP1, 0.3mg/mL.601",]), 
                              as.matrix(pure["MSP1, 0.1mg/mL.602",])))
colnames(MSP1s2) <- c("PF3D7_0930300_s2.941", "MSP1_0.3.601", "MSP1_0.1.602")

# PF3D7_0206800.65      falciparum      IVTT.AG PF3D7_0206800   merozoite surface protein 2 (MSP2)
MSP2 <- as.data.frame(cbind(as.matrix(anti["PF3D7_0206800.65",]), 
                            as.matrix(pure["MSP2, 0.3mg/mL.890",]), 
                            as.matrix(pure["MSP2, 0.1mg/mL.891",])))
colnames(MSP2) <- c("PF3D7_0206800.65", "MSP2_0.3.890", "MSP2_0.1.891")

# PF3D7_0304600.932     falciparum      IVTT.AG PF3D7_0304600   circumsporozoite (CS) protein (CSP)
CSP <- as.data.frame(cbind(as.matrix(anti["PF3D7_0304600.932",]), 
                           as.matrix(pure["Pf CSP, 0.3mg/mL.894",]), 
                           as.matrix(pure["Pf CSP, 0.1mg/mL.895",])))
colnames(CSP) <- c("PF3D7_0304600.932", "PfCSP_0.3.894", "PfCSP_0.1.895")

# VIVAX AMA1
PvAMA1 <- as.data.frame(cbind(as.matrix(pure["Pvivax AMA1, 0.3mg/mL.603",]), 
                              as.matrix(pure["Pvivax AMA1, 0.1mg/mL.604",]), 
                              as.matrix(pure["Pvivax AMA1 Eoto monomer prep2, 0.3mg/mL.892",]), 
                              as.matrix(pure["Pvivax AMA1 Eoto monomer prep2, 0.1mg/mL.893",])))
colnames(PvAMA1) <- c("PvAMA1_0.3.603", "PvAMA1_0.1.604", 
                      "PvAMA1_Eoto_prep2_0.3.892", "PvAMA1_Eoto_prep2_0.1.893")

```

```{r, warning=FALSE, eval=FALSE, echo=FALSE}
cor(EBA175, method = "spearman")
cor(MSP2, method = "spearman")
cor(CSP, method = "spearman")
#PerformanceAnalytics::chart.Correlation(log(EBA175), method = "pearson")
PerformanceAnalytics::chart.Correlation(EBA175, method = "spearman")
PerformanceAnalytics::chart.Correlation(MSP2, method = "spearman")
PerformanceAnalytics::chart.Correlation(CSP, method = "spearman")
#PerformanceAnalytics::chart.Correlation(PvAMA1, method = "spearman")

#summary(lm(PF3D7_0731500_e1s2.361 ~ EBA175_0.3.23, EBA175))$r.squared
```

```{r}
f <- EBA175 %>% 
  cor.test(~ PF3D7_0731500_e1s2.361 + EBA175_0.3.23, 
           data = ., method = "spearman") %>% 
  broom::tidy() #%>% format(digits=2)

g <- MSP2 %>% 
  cor.test(~ PF3D7_0206800.65 + MSP2_0.1.891, 
           data = ., method = "spearman") %>% 
  broom::tidy() #%>% format(digits=2)

h <- CSP %>% 
  cor.test(~ PF3D7_0304600.932 + PfCSP_0.3.894, 
           data = ., method = "spearman") %>% 
  broom::tidy() #%>% format(digits=2)
```

```{r, fig.width=9, fig.height=3,eval=FALSE}
v11 <- EBA175 %>% 
  ggplot(aes(x=EBA175_0.3.23,y=PF3D7_0731500_e1s2.361)) + 
  geom_point() + #geom_smooth(method = "lm") +
  xlab("Ab reactivity to Purified EBA175") + ylab("Ab reactivity to IVTT EBA175") +
  labs(title="Validity test: PfEBA175", 
       subtitle= paste0(#"S=",f$statistic,", rho="
                        "rho=",f$estimate %>% format(digits=2),", P=",f$p.value %>% format(digits=2))#,
       #caption=paste0("method: ",f$method)
       ) + theme_bw()
  

v32 <- MSP2 %>% 
  ggplot(aes(x=MSP2_0.1.891,y=PF3D7_0206800.65)) + 
  geom_point() + #geom_smooth(method = "lm") +
  xlab("Ab reactivity to Purified MSP2") + ylab("Ab reactivity to IVTT MSP2") +
  labs(title="Validity test: PfMSP2", 
       subtitle= paste0(#"S=",g$statistic,", rho="
                        "rho=",g$estimate %>% format(digits=2),", P=",g$p.value %>% format(digits=2))#,
       #caption=paste0("method: ",g$method)
       ) + theme_bw()

v41 <- CSP %>% 
  ggplot(aes(x=PfCSP_0.3.894,y=PF3D7_0304600.932)) + 
  geom_point() + #geom_smooth(method = "lm") +
  xlab("Ab reactivity to Purified CSP") + ylab("Ab reactivity to IVTT CSP") +
  labs(title="Validity test: PfCSP", 
       subtitle= paste0(#"S=",h$statistic,", rho="
                        "rho=",h$estimate %>% format(digits=2),", P=",h$p.value %>% format(digits=2))#,
       #caption=paste0("method: ",h$method)
       ) + theme_bw()

# multiplot(v11, v32, v41, cols= 3)
v11+v32+v41
```

```{r, fig.width=8.5, fig.height=3}
a <- EBA175 %>% rownames_to_column() %>% 
  dplyr::mutate(antigen="Pf EBA175") %>% 
  select(rowname,antigen,x=EBA175_0.3.23,y=PF3D7_0731500_e1s2.361)
b <- MSP2 %>% rownames_to_column() %>% 
  dplyr::mutate(antigen="Pf MSP2") %>% 
  select(rowname,antigen,x=MSP2_0.1.891,y=PF3D7_0206800.65)
c <- CSP %>% rownames_to_column() %>% 
  dplyr::mutate(antigen="Pf CSP") %>% 
  select(rowname,antigen,x=PfCSP_0.3.894,y=PF3D7_0304600.932)

rbind(a,b,c) %>% 
  ggplot(aes(x,y)) +
  geom_point() + theme_bw() + #geom_smooth(method = "lm") +
  facet_grid(.~antigen#,scales = "free", space = "free"
             ) +
  geom_text(
    aes(x,y,label=lab),
    data = data_frame(x=0,y=50700,
                      lab=c(
                        paste0(#"S=",f$statistic,", rho="
                          "rho=",f$estimate %>% format(digits=2),
                          "\nP",ifelse(f$p.value<0.001,"<0.001",
                                       paste0("=",f$p.value))),
                        paste0(#"S=",g$statistic,", rho="
                          "rho=",g$estimate %>% format(digits=2),
                          "\nP",ifelse(g$p.value<0.001,"<0.001",
                                       paste0("=",g$p.value))),
                        paste0(#"S=",h$statistic,", rho="
                          "rho=",h$estimate %>% format(digits=2),
                          "\nP",ifelse(h$p.value<0.001,"<0.001",
                                       paste0("=",h$p.value)))
                      ),
                      antigen=c("Pf EBA175","Pf MSP2","Pf CSP")),
    vjust=.5,hjust=0.05,size=3.5) +
  xlab("Ab reactivity to Purified Antigen") + ylab("Ab reactivity to IVTT Antigen") +
  labs(title="Validity of the protein microarray assay") +
  #     subtitle= paste0(#"S=",g$statistic,", rho="
  #                      "rho=",g$estimate,", P=",g$p.value)#,
  #     #caption=paste0("method: ",g$method)
  #    ) + 
  theme(strip.background = element_rect(colour = "black"#, fill = "white"
  ),
  strip.text.x = element_text(colour = "black",size = 11#, face = "bold"
  ))

### TRY TO ADD MATHEMATICAL NOTATION
# https://blog.snap.uaf.edu/2013/03/25/mathematical-notation-in-r-plots/
# https://stackoverflow.com/questions/11408031/using-plotmath-symbol-in-ggplot2-geom-text-legend-is-altered-why
# https://rstudio-pubs-static.s3.amazonaws.com/136237_170402e5f0b54561bf7605bdea98267a.html
# http://ggplot2.tidyverse.org/reference/geom_text.html
# http://vis.supstat.com/2013/04/mathematical-annotation-in-r/
# https://stackoverflow.com/questions/4973898/combining-paste-and-expression-functions-in-plot-labels
```

output figure

```{r, fig.width=8, fig.height=4}
ebaxx <- EBA175 %>% 
  as_tibble() %>% 
  # rownames_to_column() %>% 
  dplyr::mutate(antigen="Pf EBA175",
                ag_ivtt="PF3D7_0731500_e1s2.361") %>% 
  dplyr::rename("ag_ivtt_values"=PF3D7_0731500_e1s2.361) %>% 
  pivot_longer(cols = starts_with("EBA"),
               names_to = "ag_purified",
               values_to = "ag_purified_values") %>% 
  select(antigen,ag_ivtt,ag_ivtt_values,everything()) %>% 
  mutate(ag_purified_fct=str_replace(ag_purified,
                                     "(.+)\\_(.+)\\.(.+)",
                                     "\\2"))

mspxx <- MSP2 %>% 
  as_tibble() %>% 
  # rownames_to_column() %>% 
  dplyr::mutate(antigen="Pf MSP2",
                ag_ivtt="PF3D7_0206800.65") %>% 
  dplyr::rename("ag_ivtt_values"=PF3D7_0206800.65) %>% 
  pivot_longer(cols = starts_with("MSP"),
               names_to = "ag_purified",
               values_to = "ag_purified_values") %>% 
  select(antigen,ag_ivtt,ag_ivtt_values,everything()) %>% 
  mutate(ag_purified_fct=str_replace(ag_purified,
                                     "(.+)\\_(.+)\\.(.+)",
                                     "\\2"))

cspxx <- CSP %>% 
  as_tibble() %>% 
  # rownames_to_column() %>% 
  dplyr::mutate(antigen="Pf CSP",
                ag_ivtt="PF3D7_0304600.932") %>% 
  dplyr::rename("ag_ivtt_values"=PF3D7_0304600.932) %>% 
  pivot_longer(cols = starts_with("PfCSP"),
               names_to = "ag_purified",
               values_to = "ag_purified_values") %>% 
  select(antigen,ag_ivtt,ag_ivtt_values,everything()) %>% 
  mutate(ag_purified_fct=str_replace(ag_purified,
                                     "(.+)\\_(.+)\\.(.+)",
                                     "\\2"))

ebaxx %>%
  union_all(mspxx) %>% 
  union_all(cspxx) %>% 
  ggplot(aes(x = ag_ivtt_values, y = ag_purified_values)) +
  geom_point(alpha = 0.5) +
  coord_fixed(ratio = 1) +
  facet_grid(ag_purified_fct~antigen) +
  geom_text(data = ebaxx %>%
              union_all(mspxx) %>%
              union_all(cspxx) %>%
              group_by(antigen,ag_purified_fct) %>%
              filter(ag_purified_values==max(ag_purified_values)) %>%
              filter(ag_purified_fct == "0.3") %>% 
              mutate(ag_ivtt=str_replace(ag_ivtt,
                                             "(.+)\\.(.+)",
                                             "\\1")) %>% 
              mutate(x = 50700, y=0),
            aes(x = x, y = y, label=ag_ivtt), 
            vjust=0,hjust=1, size = 2) +
  ylab("Purified Antigen MFI") + 
  xlab("IVTT Antigen MFI") + 
  labs(caption = "MFI = Mean Fluorescence Intensity")

ggsave("figure/04-fig03-validation_correlation.png",
       height = 4,width = 8,dpi = "retina")
```

```{r}
w <- cor(EBA175, method = "spearman") %>% as.data.frame() %>% 
  rownames_to_column(var="ID") %>% 
  slice(1) %>% 
  gather(pure.prot,rho,-ID) %>% 
  filter(rho!=1) %>% 
  inner_join(ln, by="ID")
x <- cor(MSP2, method = "spearman") %>% as.data.frame() %>% 
  rownames_to_column(var="ID") %>% 
  slice(1) %>% 
  gather(pure.prot,rho,-ID) %>% 
  filter(rho!=1) %>% 
  inner_join(ln, by="ID")
z <- cor(CSP, method = "spearman") %>% as.data.frame() %>% 
  rownames_to_column(var="ID") %>% 
  slice(1) %>% 
  gather(pure.prot,rho,-ID) %>% 
  filter(rho!=1) %>% 
  inner_join(ln, by="ID")

v <- rbind(w,x,z)#; max(v$rho); min(v$rho)
v
```

```{r}
# f
# g
# h
# list_validation <- c("0731500", "0206800", "0304600", "EBA", "MSP2", "CSP")
d <- dplyr::bind_cols(EBA175,
                      MSP1s1,MSP1s2,
                      PvAMA1,
                      MSP2,CSP) %>% 
  as_tibble() %>% 
  janitor::clean_names()

# one
# d %>% 
#   as.matrix() %>% 
#   Hmisc::rcorr() %>% 
#   broom::tidy()

#two
correlate_combo <- function(matrix_to_correlate) {
  d <- matrix_to_correlate
  
  var_pairs <- t(combn(names(d), 2)) %>%
  as_tibble() %>% 
  setNames(c("x", "y"))

  var_pairs %>% 
    dplyr::mutate(
      r.test = purrr::map2(x, y, 
                           ~ stats::cor.test(d[[.x]], 
                                             d[[.y]],
                                             method = "spearman")),
      r.test = purrr::map(r.test, broom::tidy)) %>%
    tidyr::unnest(r.test) #%>% 
  # filter(str_detect(x,paste0(str_to_lower(list_validation),collapse = "|"))) %>% 
  # distinct(x,.keep_all = T) %>% 
  # arrange(desc(estimate)) %>% 
  # view()
}

rho_pvalue <- correlate_combo(matrix_to_correlate = EBA175) %>% 
  slice(-3) %>% 
  bind_rows(correlate_combo(matrix_to_correlate = CSP) %>% 
              slice(-3)) %>% 
  bind_rows(correlate_combo(matrix_to_correlate = MSP2) %>% 
              slice(-3)) #%>% view()

rho_pvalue
rho_pvalue %>% 
  writexl::write_xlsx("table/04-tab02-validation_correlation.xlsx")

correlate_combo(matrix_to_correlate = PvAMA1) #%>% view()

#three
library(corrr)
d %>%
  correlate() %>%
  rplot() +
  theme(axis.text.x = element_text(angle = 90, vjust = 0.5, hjust=1))
  # focus(mpg:drat, mirror = TRUE) %>%
  # network_plot()
```

```{r}
ebaxx %>%
  union_all(mspxx) %>% 
  union_all(cspxx) %>% 
  ggplot(aes(x = ag_ivtt_values, y = ag_purified_values)) +
  geom_point(alpha = 0.5) +
  coord_fixed(ratio = 1) +
  facet_grid(ag_purified_fct~antigen) +
  geom_text(data = ebaxx %>%
              union_all(mspxx) %>%
              union_all(cspxx) %>%
              group_by(antigen,ag_purified_fct) %>%
              filter(ag_purified_values==max(ag_purified_values)) %>%
              left_join(
                rho_pvalue %>% 
                  select(x:p.value) %>% 
                  dplyr::rename("ag_ivtt" = x,
                                "ag_purified" = y)
              ) %>% 
              # filter(ag_purified_fct == "0.3") %>% 
              mutate(ag_ivtt=str_replace(ag_ivtt,
                                             "(.+)\\.(.+)",
                                             "\\1")) %>%
              mutate(x = 50700, y=0),
            aes(x = x, y = y, label=paste0("rho=",
                                           format(x = estimate,digits=2),
                                           "\np",
                                           ifelse(p.value<0.001,
                                                  "<0.001",
                                                  paste0("=",p.value))
                                       )),
            # parse = T,
            vjust=0,hjust=1, size = 3) +
  ylab("Purified Antigen MFI") + 
  xlab("IVTT Antigen MFI") + 
  labs(caption = "MFI = Mean Fluorescence Intensity")

ggsave("figure/04-fig03-validation_correlation-inset.png",
       height = 4,width = 8,dpi = "retina")
```


__RANGO__ de valores `rho`: __`r format(min(v$rho),digits=2)` - `r max(v$rho) %>% format(digits=2)`__

### 0. end

```{r}

```


<!-- ### 6. Breath and Intensity of response -->

<!-- - Método: -->
<!--     + __Primero,__ Prueba de Hipótesis para comparar varianzas: -->
<!--         + Rpta: Bajo un n.s. 0.05, F cae en Región de no-Rechazo de Hipótesis Nula (__RnoRHo__). -->
<!--             + Conclusión: Supuesto de igualdad de varianzas poblacionales SÍ es válido. -->
<!--         + Rpta: Bajo un n.s. 0.05, F cae en Región de Rechazo de Hipótesis Nula (__RRHo__). -->
<!--             + Conclusión: Supuesto de igualdad de varianzas poblacionales NO es válido. -->
<!--     + __Segundo,__ Prueba de Hipótesis para comparar medias: -->
<!--         + Si es que SÍ se asume que las varianzas son iguales: __Student t-test__ -->
<!--         + Si es que NO se asume que las varianzas son iguales: __Welch t-test__ -->

<!-- - Definiciones: -->
<!--     + __Amplitud:__ Número de Ag reactivos por paciente -->
<!--     + __Intensidad:__ Media de intensidad de Ag por paciente -->

<!-- ```{r} -->
<!-- eset <- eset.VIVAX.SEV.FILTER -->

<!-- fin <- biobroom::tidy.ExpressionSet(eset,addPheno = TRUE) %>%  -->
<!--   group_by(sample) %>%  -->
<!--   # dplyr::summarise(sample_mean=mean(value, na.rm=TRUE),  -->
<!--   # cambio por exploracion de distribuciones! -->
<!--   dplyr::summarise(sample_mean=median(value, na.rm=TRUE), -->
<!--                    sample_freq=(sum(value>1, na.rm=TRUE)#*100/n() -->
<!--                                 )) %>%  -->
<!--   full_join(pData(eset) %>%  -->
<!--               rownames_to_column() %>%  -->
<!--               dplyr::rename(sample=rowname), -->
<!--             by="sample") %>%  -->
<!--   select(-Study,-Group) %>%  -->
<!--   mutate(episodio_previo=forcats::fct_relevel(episodio_previo,"con")) %>%  -->
<!--   mutate(sev_WHO=forcats::fct_relevel(sev_WHO,"no-severo")) -->

<!-- #biobroom::tidy.ExpressionSet(eset,addPheno = TRUE) %>%  -->
<!-- #filter(sample=="LIM2071") %>% select(value) %>% filter(value > 1) -->
<!-- ``` -->

<!-- __evaluate distribution per subject__ -->

<!-- ```{r,fig.height=8,fig.width=8} -->
<!-- # explorar distribuciones individuales -->
<!-- biobroom::tidy.ExpressionSet(eset,addPheno = TRUE) %>%  -->
<!--   ggplot(aes(x = value)) + -->
<!--   geom_histogram() + -->
<!--   facet_wrap(~sample) -->
<!-- ``` -->

<!-- ```{r,fig.width=3,fig.height=3} -->
<!-- biobroom::tidy.ExpressionSet(eset,addPheno = TRUE) %>%  -->
<!--   select(value) %>%  -->
<!--   # group_by(sample) %>%  -->
<!--   skimr::skim() -->

<!-- biobroom::tidy.ExpressionSet(eset,addPheno = TRUE) %>%  -->
<!--   select(sev_WHO,value) %>%  -->
<!--   group_by(sev_WHO) %>% -->
<!--   skimr::skim() -->

<!-- biobroom::tidy.ExpressionSet(eset,addPheno = TRUE) %>%  -->
<!--   select(episodio_previo,value) %>%  -->
<!--   filter(!is.na(episodio_previo)) %>%  -->
<!--   group_by(episodio_previo) %>% -->
<!--   skimr::skim() -->

<!-- biobroom::tidy.ExpressionSet(eset,addPheno = TRUE) %>%  -->
<!--   select(sample,value) %>%  -->
<!--   group_by(sample) %>%  -->
<!--   skimr::skim() %>%  -->
<!--   as_tibble() %>%  -->
<!--   mutate(diff_median_mean=numeric.p50-numeric.mean) %>%  -->
<!--   ggplot(aes(x = diff_median_mean)) + -->
<!--   geom_histogram() -->
<!-- ``` -->


<!-- #### 6.3 Exploratory -->

<!-- ##### Per age -->

<!-- ```{r} -->
<!-- fin %>%  -->
<!--   dplyr::count(edad_CAT) -->
<!-- ``` -->

<!-- ```{r, fig.height=3, fig.width=6} -->
<!-- r <- fin %>% -->
<!--   ggplot(aes(edad_CAT,sample_freq#, fill=sev_WHO -->
<!--              )) + -->
<!--   geom_boxplot(position=position_dodge(0.8)) + -->
<!--   geom_dotplot(#aes(fill=sev_WHO),  -->
<!--                binaxis='y', stackdir='center',  -->
<!--                dotsize=1, position=position_dodge(0.8)) + -->
<!--   xlab("Categorized Age") + ylab("Number Reactive") + -->
<!--   labs(title="Breadth of Ab response", subtitle="per age category")  + theme_bw() -->

<!-- s <- fin %>%  -->
<!--   ggplot(aes(edad_CAT,sample_mean#, fill=sev_WHO -->
<!--              )) + -->
<!--   geom_boxplot(position=position_dodge(0.8)) + -->
<!--   geom_dotplot(#aes(fill=sev_WHO),  -->
<!--                binaxis='y', stackdir='center',  -->
<!--                dotsize=1, position=position_dodge(0.8)) + -->
<!--   xlab("Categorized Age") + ylab("Mean Intensity") + -->
<!--   labs(title="Intensity of Ab response", subtitle="per age category")  + theme_bw() -->

<!-- #t <- fin %>% filter(expo_CAT != "NA") %>%  -->
<!-- #  ggplot(aes(expo_CAT,sample_freq#, fill=episodio_previo -->
<!-- #             )) + -->
<!-- #  geom_boxplot(position=position_dodge(0.8)) + -->
<!-- #  geom_dotplot(#aes(fill=episodio_previo),  -->
<!-- #               binaxis='y', stackdir='center',  -->
<!-- #               dotsize=1, position=position_dodge(0.8)) + -->
<!-- #  labs(title="Breadth of Ab response",  -->
<!-- #       subtitle="per previous episode category")  + theme_bw() -->
<!-- # -->
<!-- #u <- fin %>% filter(expo_CAT != "NA") %>%   -->
<!-- #  ggplot(aes(expo_CAT,sample_mean#, fill=episodio_previo -->
<!-- #             )) + -->
<!-- #  geom_boxplot(position=position_dodge(0.8)) + -->
<!-- #  geom_dotplot(#aes(fill=episodio_previo),  -->
<!-- #               binaxis='y', stackdir='center',  -->
<!-- #               dotsize=1, position=position_dodge(0.8)) + -->
<!-- #  labs(title="Intensity of Ab response",  -->
<!-- #       subtitle="per previous episode category")  + theme_bw() -->

<!-- # Rmisc::multiplot(r,s,cols = 2) -->
<!-- r+s -->
<!-- ``` -->


<!-- ##### severe and episodes per age -->

<!-- ```{r} -->
<!-- fin %>%  -->
<!--   group_by(sev_WHO) %>%  -->
<!--   dplyr::count(edad_CAT) -->

<!-- fin %>%  -->
<!--   group_by(episodio_previo) %>%  -->
<!--   dplyr::count(edad_CAT) -->
<!-- ``` -->


<!-- ```{r, fig.height=6, fig.width=8.5} -->
<!-- r <- fin %>%  -->
<!--   ggplot(aes(edad_CAT,sample_freq, fill=sev_WHO)) + -->
<!--   geom_boxplot(position=position_dodge(0.8)) + -->
<!--   geom_dotplot(aes(fill=sev_WHO),  -->
<!--                binaxis='y', stackdir='center',  -->
<!--                dotsize=1, position=position_dodge(0.8)) + -->
<!--   scale_fill_discrete(name="WHO criteria", -->
<!--                       #breaks=c("with", "without"), -->
<!--                       labels=c("non-severe", "severe")) + -->
<!--   xlab("Categorized Age") + ylab("Number Reactive") + -->
<!--   labs(title="Breadth of Ab response", subtitle="per Age and Severity")  + theme_bw() -->

<!-- s <- fin %>%  -->
<!--   ggplot(aes(edad_CAT,sample_mean, fill=sev_WHO)) + -->
<!--   geom_boxplot(position=position_dodge(0.8)) + -->
<!--   geom_dotplot(aes(fill=sev_WHO),  -->
<!--                binaxis='y', stackdir='center',  -->
<!--                dotsize=1, position=position_dodge(0.8)) + -->
<!--   scale_fill_discrete(name="WHO criteria", -->
<!--                       #breaks=c("with", "without"), -->
<!--                       labels=c("non-severe", "severe")) + -->
<!--   xlab("Categorized Age") + ylab("Mean Intensity") + -->
<!--   labs(title="Intensity of Ab response", subtitle="per Age and Severity")  + theme_bw() -->

<!-- t <- fin %>%  -->
<!--   filter(!is.na(episodio_previo)) %>%  -->
<!--   ggplot(aes(edad_CAT,sample_freq, fill=episodio_previo)) + -->
<!--   geom_boxplot(position=position_dodge(0.8)) + -->
<!--   geom_dotplot(aes(fill=episodio_previo),  -->
<!--                binaxis='y', stackdir='center',  -->
<!--                dotsize=1, position=position_dodge(0.8)) + -->
<!--   scale_fill_discrete(name="Previous\nEpisodes", -->
<!--                       #breaks=c("with", "without"), -->
<!--                       labels=c("with", "without")) + -->
<!--   xlab("Categorized Age") + ylab("Number Reactive") + -->
<!--   labs(title="Breadth of Ab response", subtitle="per Age and Previous Episode")  + theme_bw() -->

<!-- u <- fin %>%  -->
<!--   filter(!is.na(episodio_previo)) %>%  -->
<!--   ggplot(aes(edad_CAT,sample_mean, fill=episodio_previo)) + -->
<!--   geom_boxplot(position=position_dodge(0.8)) + -->
<!--   geom_dotplot(aes(fill=episodio_previo),  -->
<!--                binaxis='y', stackdir='center',  -->
<!--                dotsize=1, position=position_dodge(0.8)) + -->
<!--   scale_fill_discrete(name="Previous\nEpisodes", -->
<!--                       #breaks=c("with", "without"), -->
<!--                       labels=c("with", "without")) + -->
<!--   xlab("Categorized Age") + ylab("Mean Intensity") + -->
<!--   labs(title="Intensity of Ab response", subtitle="per Age and Previous Episode")  + theme_bw() -->

<!-- # Rmisc::multiplot(r,t,s,u,cols = 2) -->
<!-- r+t+s+u -->
<!-- ``` -->

<!-- ### 0. Extra-Analysis -->

<!-- #### Shared proteins -->

<!-- - __MSP1__ was shared with Previous Exposure and Highly reactive antigens: -->
<!--     + __S1__, N-terminal (PvMSP1~NT~), was differentially reactive. -->
<!--     + __S2__, C-terminal (PvMSP1~19~), was highly reactive. -->

<!-- ```{r, fig.align='center', fig.width=4.7, fig.height=5} -->
<!-- g1_id <- exp_t %>% dplyr::select(Gene.ID)      # 960 -->
<!-- g2_id <- exp_df %>% dplyr::select(Gene.ID)     # 963 -->
<!-- g3_id <- sev_d %>% dplyr::select(Gene.ID)     # 963 -->

<!-- g1_idl= unlist(as.list(g1_id)) -->
<!-- g2_idl= unlist(as.list(g2_id)) -->
<!-- g3_idl= unlist(as.list(g3_id)) -->

<!-- tmp <- gplots::venn(list("severe\nmalaria"=g3_idl, "with\nepisode"=g2_idl, "top reactive"=g1_idl)) -->
<!-- ``` -->

<!-- ```{r} -->
<!-- sg <- exp_t %>% inner_join(exp_df, by="Gene.ID") %>% select(Gene.ID) -->
<!-- td %>% filter(Gene.ID==sg$Gene.ID) %>%  -->
<!--   inner_join(all, by="Gene.ID") %>% inner_join(lg %>% dplyr::rename(seg.num=n), by="Gene.ID") %>%  -->
<!--   select(ID,Gene.ID,Product.Description,Gene.Name,adj.P.Val,p.order,logFC,AveExpr,con,GO.Components,GO.Processes) -->
<!-- ``` -->

<!-- #### Shared proteins all -->

<!-- - against all with previous episode -->
<!-- - Intensidad de reactividad por Episodios Previos __interviene__ en la intensidad por Malaria Severa: -->
<!--     + __5__ antigens identified in Severe Malaria  were differentially reactive (adj.P.Val<0.05)  -->
<!--     in samples with Previous Episode. -->

<!-- ```{r, fig.align='center', fig.width=4.7, fig.height=5} -->
<!-- g1_id <- exp_t %>% dplyr::select(Gene.ID)      # 960 -->
<!-- g2_id <- exp_da %>% dplyr::select(Gene.ID)     # 963 -->
<!-- g3_id <- sev_d %>% dplyr::select(Gene.ID)     # 963 -->

<!-- g1_idl= unlist(as.list(g1_id)) -->
<!-- g2_idl= unlist(as.list(g2_id)) -->
<!-- g3_idl= unlist(as.list(g3_id)) -->

<!-- tmp <- gplots::venn(list("severe\nmalaria"=g3_idl, "with\nepisode"=g2_idl, "top reactive"=g1_idl)) -->
<!-- ``` -->

<!-- ```{r} -->
<!-- exp_da %>%  -->
<!--   inner_join(sev_d, by="Gene.ID") %>%  -->
<!--   select(Gene.ID,Product.Description.x,Gene.Name.x, -->
<!--          con,sin,adj.P.Val.x,p.order.x, -->
<!--          severo,no_severo,P.Value.y,adj.P.Val.y,p.order.y) -->
<!-- #sg <- exp_da %>% inner_join(sev_d, by="Gene.ID") %>% select(Gene.ID) -->
<!-- #sev_d %>%  -->
<!-- #  inner_join(exp_da %>% inner_join(sev_d, by="Gene.ID"), -->
<!-- #             by="Gene.ID") -->
<!-- #all %>% filter(Gene.ID==sg$Gene.ID) -->
<!-- #td %>% filter(Gene.ID==sg$Gene.ID) %>%  -->
<!-- #  inner_join(all, by="Gene.ID") %>% inner_join(lg %>% dplyr::rename(seg.num=n), by="Gene.ID") -->
<!-- ``` -->

<!-- #### Subset severe Ag -->

<!-- - __Update__ `sev_d` with `anti_join()` -->

<!-- ```{r} -->
<!-- sev_d <- sev_d %>% -->
<!--   anti_join(exp_da, by="Gene.ID")  -->

<!-- sev_d %>% -->
<!--   dplyr::arrange(p.order) %>%  -->
<!--   select(Gene.ID,Product.Description,Gene.Name,p.order,  -->
<!--          everything(), -Description -->
<!--          #severo,no_severo,P.Value,adj.P.Val,p.order -->
<!--          ) -->
<!-- ``` -->

<!-- ```{r} -->

<!-- ``` -->


<!-- ### 7. PlasmoDB summaries -->

<!-- ##### GO proportion others -->

<!-- ```{r} -->
<!-- all %>% select(GO.Components,GO.Functions,GO.Processes) %>%  -->
<!--   replace_na(list(GO.Components="na", -->
<!--                   GO.Functions="na", -->
<!--                   GO.Processes="na") -->
<!--              ) %>%  -->
<!--   dplyr::mutate(GO.Components=ifelse(GO.Components!="na",1,0), -->
<!--                 GO.Functions=ifelse(GO.Functions!="na",1,0), -->
<!--                 GO.Processes=ifelse(GO.Processes!="na",1,0) -->
<!--                 ) %>%  -->
<!--   summarise_all(sum,na.rm=T) #%>% t() -->
<!-- ``` -->

<!-- ```{r, eval=FALSE} -->
<!-- exp_t %>%  dplyr::count(GO.Functions) %>% dplyr::arrange(desc(n)) -->
<!-- sev_d %>%  dplyr::count(GO.Functions) %>% dplyr::arrange(desc(n)) -->
<!-- sev_d %>% select(Gene.ID,GO.Functions) %>%  -->
<!--   separate(GO.Functions, c("c1","c2","c3"),sep = ",") %>%  -->
<!--   mutate_at(vars(c1:c3), funs(trimws)) %>%  -->
<!--   gather(component,value,-Gene.ID) %>%  -->
<!--   filter(value!="NA") %>% dplyr::count(value) %>% dplyr::arrange(desc(n)) #%>% filter(n>1) -->
<!-- exp_da %>% filter(GO.Functions!="NA") %>%  dplyr::count(GO.Functions) %>% dplyr::arrange(desc(n)) %>% filter(n>1) -->
<!-- exp_da %>% select(Gene.ID,GO.Functions) %>%  -->
<!--   separate(GO.Functions, c("c1","c2","c3"),sep = ",") %>%  -->
<!--   mutate_at(vars(c1:c3), funs(trimws)) %>%  -->
<!--   gather(component,value,-Gene.ID) %>%  -->
<!--   filter(value!="NA") %>% dplyr::count(value) %>% dplyr::arrange(desc(n)) %>% filter(n>1) -->
<!-- exp_df %>%  dplyr::count(GO.Functions) %>% dplyr::arrange(desc(n)) -->
<!-- exp_df %>% select(Gene.ID,GO.Functions) %>%  -->
<!--   separate(GO.Functions, c("c1","c2","c3"),sep = ",") %>%  -->
<!--   mutate_at(vars(c1:c3), funs(trimws)) %>%  -->
<!--   gather(component,value,-Gene.ID) %>%  -->
<!--   filter(value!="NA") %>% dplyr::count(value) %>% dplyr::arrange(desc(n)) #%>% filter(n>1) -->
<!-- ``` -->

<!-- ```{r,fig.height=5,fig.width=12, eval=FALSE, echo=FALSE} -->
<!-- a <- exp_t %>%  #dplyr::count(GO.Functions) %>% dplyr::arrange(desc(n)) -->
<!--   #dplyr::mutate(summary.db="Top 10%") %>% -->
<!--   dplyr::mutate(summary.db="Top 10%") %>%   -->
<!--   select(summary.db,GO.Functions) -->
<!-- b <- sev_d %>%  #dplyr::count(GO.Functions) %>% dplyr::arrange(desc(n)) -->
<!--   dplyr::mutate(summary.db="severe p<0.05") %>% -->
<!--   select(summary.db,GO.Functions) -->
<!-- #exp_da %>% filter(GO.Functions!="NA") %>%  dplyr::count(GO.Functions) %>% dplyr::arrange(desc(n)) %>% filter(n>1) -->
<!-- c <- exp_da %>% select(Gene.ID,GO.Functions) %>%  -->
<!--   separate(GO.Functions, c("c1","c2","c3"),sep = ",") %>%  -->
<!--   mutate_at(vars(c1:c3), funs(trimws)) %>%  -->
<!--   gather(component,GO.Functions,-Gene.ID) %>%  -->
<!--   filter(GO.Functions!="NA") %>% #dplyr::count(GO.Functions) %>% dplyr::arrange(desc(n)) %>% filter(n>1) -->
<!--   dplyr::mutate(summary.db="w/episode FDR<0.05") %>% -->
<!--   select(summary.db,GO.Functions) -->
<!-- #exp_df %>%  dplyr::count(GO.Functions) %>% dplyr::arrange(desc(n)) -->
<!-- d <- exp_df %>% select(Gene.ID,GO.Functions) %>%  -->
<!--   separate(GO.Functions, c("c1","c2","c3"),sep = ",") %>%  -->
<!--   mutate_at(vars(c1:c3), funs(trimws)) %>%  -->
<!--   gather(component,GO.Functions,-Gene.ID) %>%  -->
<!--   filter(GO.Functions!="NA") %>% #dplyr::count(GO.Functions) %>% dplyr::arrange(desc(n)) #%>% filter(n>1) -->
<!--   dplyr::mutate(summary.db="w/episode FDR<0.05 & logFC>1") %>% -->
<!--   select(summary.db,GO.Functions) -->

<!-- rbind(a,b,c, -->
<!--       d) %>%  -->
<!--   #replace_na(list(SignalP = "na")) %>%  -->
<!--   ggplot(aes(#x=summary.db#, fill=GO.Functions -->
<!--     x=GO.Functions#, fill= -->
<!--   )) + -->
<!--   geom_bar(#position = "fill" -->
<!--     position = "identity") + -->
<!--   facet_grid(.~summary.db, -->
<!--              scales = "free" -->
<!--              #space = "free" -->
<!--   ) +# -->
<!--   coord_flip() -->
<!-- ``` -->

<!-- ```{r, eval=FALSE} -->
<!-- exp_t %>%  dplyr::count(GO.Processes) %>% dplyr::arrange(desc(n)) -->
<!-- sev_d %>%  dplyr::count(GO.Processes) %>% dplyr::arrange(desc(n)) -->
<!-- exp_da %>% filter(GO.Processes!="NA") %>%  dplyr::count(GO.Processes) %>% dplyr::arrange(desc(n)) %>% filter(n>1) -->
<!-- exp_da %>% select(Gene.ID,GO.Processes) %>%  -->
<!--   separate(GO.Processes, c("c1","c2","c3"),sep = ",") %>%  -->
<!--   mutate_at(vars(c1:c3), funs(trimws)) %>%  -->
<!--   gather(component,value,-Gene.ID) %>%  -->
<!--   filter(value!="NA") %>% dplyr::count(value) %>% dplyr::arrange(desc(n)) %>% filter(n>1) -->
<!-- exp_df %>%  dplyr::count(GO.Processes) %>% dplyr::arrange(desc(n)) -->
<!-- ``` -->

<!-- ```{r,fig.height=5,fig.width=12, eval=FALSE, echo=FALSE} -->
<!-- a <- exp_t %>%  #dplyr::count(GO.Processes) %>% dplyr::arrange(desc(n)) -->
<!--   #dplyr::mutate(summary.db="Top 10%") %>% -->
<!--   dplyr::mutate(summary.db="Top 10%") %>%   -->
<!--   select(summary.db,GO.Processes) -->
<!-- b <- sev_d %>%  #dplyr::count(GO.Processes) %>% dplyr::arrange(desc(n)) -->
<!--   dplyr::mutate(summary.db="severe p<0.05") %>% -->
<!--   select(summary.db,GO.Processes) -->
<!-- #exp_da %>% filter(GO.Processes!="NA") %>%  dplyr::count(GO.Processes) %>% dplyr::arrange(desc(n)) %>% filter(n>1) -->
<!-- c <- exp_da %>% select(Gene.ID,GO.Processes) %>%  -->
<!--   separate(GO.Processes, c("c1","c2","c3"),sep = ",") %>%  -->
<!--   mutate_at(vars(c1:c3), funs(trimws)) %>%  -->
<!--   gather(component,GO.Processes,-Gene.ID) %>%  -->
<!--   filter(GO.Processes!="NA") %>% #dplyr::count(GO.Processes) %>% dplyr::arrange(desc(n)) %>% filter(n>1) -->
<!--   dplyr::mutate(summary.db="w/episode FDR<0.05") %>% -->
<!--   select(summary.db,GO.Processes) -->
<!-- #exp_df %>%  dplyr::count(GO.Processes) %>% dplyr::arrange(desc(n)) -->
<!-- d <- exp_df %>% select(Gene.ID,GO.Processes) %>%  -->
<!--   separate(GO.Processes, c("c1","c2","c3"),sep = ",") %>%  -->
<!--   mutate_at(vars(c1:c3), funs(trimws)) %>%  -->
<!--   gather(component,GO.Processes,-Gene.ID) %>%  -->
<!--   filter(GO.Processes!="NA") %>% #dplyr::count(GO.Processes) %>% dplyr::arrange(desc(n)) #%>% filter(n>1) -->
<!--   dplyr::mutate(summary.db="w/episode FDR<0.05 & logFC>1") %>% -->
<!--   select(summary.db,GO.Processes) -->

<!-- rbind(a,b,c, -->
<!--       d) %>%  -->
<!--   #replace_na(list(SignalP = "na")) %>%  -->
<!--   ggplot(aes(#x=summary.db#, fill=GO.Processes -->
<!--     x=GO.Processes#, fill= -->
<!--   )) + -->
<!--   geom_bar(#position = "fill" -->
<!--     position = "identity") + -->
<!--   facet_grid(.~summary.db, -->
<!--              scales = "free" -->
<!--              #space = "free" -->
<!--   ) +# -->
<!--   coord_flip() -->
<!-- ``` -->


<!-- ##### HYPOTHETICAL proportion -->

<!-- ```{r, eval=FALSE} -->
<!-- exp_t %>%  #dplyr::count(Product.Description) %>% dplyr::arrange(desc(n)) %>%  -->
<!--   filter(stringr::str_detect(Product.Description, "hypothetical")) %>%  -->
<!--   select(ID,Gene.ID,Product.Description,GO.Components) -->
<!-- sev_d %>%  #dplyr::count(Product.Description) %>% dplyr::arrange(desc(n)) %>%  -->
<!--   filter(stringr::str_detect(Product.Description, "hypothetical")) %>%  -->
<!--   select(ID,Gene.ID,Product.Description,P.Value,p.order,GO.Components) -->
<!-- ``` -->

<!-- - Among highly reactive antigens (n=10), __03__ were hypothetical -->
<!-- - Among differentially reactive antigens in: -->
<!--     + severe malaria (n=10), __02__ were hypothetical. -->
<!--     + previous exposure (n=140), __46__ were hypothetical. (table below) -->

<!-- ```{r} -->
<!-- exp_da %>%  #dplyr::count(Product.Description) %>% dplyr::arrange(desc(n)) %>%  -->
<!--   filter(stringr::str_detect(Product.Description, "hypothetical")) %>%  -->
<!--   select(ID,Gene.ID,Product.Description,adj.P.Val,p.order,GO.Components) -->
<!-- ``` -->

<!-- ##### KNOWN proportion -->

<!-- - Using Product Descriptions: -->
<!--     + In previous exposure (adj.P.Val < 0.05): -->
<!--         + __9__ antigens with known gene names were differentially reactive. -->
<!--         + __5__ antigens belong to __MSP7__. -->
<!--     + In previous exposure (adj.P.Val < 0.05, logFC > 1): -->
<!--         + __14__ antigens were differentially reactive. -->
<!--         + __2__ antigens belong to __MSP4__. -->

<!-- ```{r} -->
<!-- exp_da %>%  dplyr::count(Product.Description) %>% dplyr::arrange(desc(n)) %>%  -->
<!--   filter(stringr::str_detect(Product.Description, "\\(.{2,}\\)")) -->
<!-- #exp_df %>%  dplyr::count(Product.Description) %>% dplyr::arrange(desc(n)) -->

<!-- #exp_da %>%  dplyr::count(Product.Description) %>% dplyr::arrange(desc(n)) %>%  -->
<!-- #  filter(stringr::str_detect(Product.Description, "MSP7")) -->
<!-- exp_da %>%  #dplyr::count(Product.Description) %>% dplyr::arrange(desc(n)) %>%  -->
<!--   filter(stringr::str_detect(Product.Description, "MSP7")) %>%  -->
<!--   select(ID,Gene.ID,Product.Description,adj.P.Val,p.order) -->
<!-- ``` -->

<!-- ```{r, eval=FALSE} -->
<!-- exp_t %>%  dplyr::count(Gene.Name) %>% dplyr::arrange(desc(n)) -->
<!-- sev_d %>%  dplyr::count(Gene.Name) %>% dplyr::arrange(desc(n)) -->
<!-- exp_da %>%  dplyr::count(Gene.Name) %>% dplyr::arrange(desc(n)) -->
<!-- exp_df %>%  dplyr::count(Gene.Name) %>% dplyr::arrange(desc(n)) -->
<!-- ``` -->

<!-- ##### ORTHOLOGS count -->

<!-- ```{r, eval=FALSE} -->
<!-- exp_t %>% dplyr::mutate(Ortholog.count=as.numeric(Ortholog.count)) %>%  -->
<!--   dplyr::count(Ortholog.count) %>% dplyr::arrange(Ortholog.count) -->
<!-- sev_d %>% dplyr::mutate(Ortholog.count=as.numeric(Ortholog.count)) %>%  -->
<!--   dplyr::count(Ortholog.count) %>% dplyr::arrange(Ortholog.count) -->
<!-- exp_da %>% dplyr::mutate(Ortholog.count=as.numeric(Ortholog.count)) %>%  -->
<!--   dplyr::count(Ortholog.count) %>% dplyr::arrange(Ortholog.count) -->
<!-- exp_df %>% dplyr::mutate(Ortholog.count=as.numeric(Ortholog.count)) %>%  -->
<!--   dplyr::count(Ortholog.count) %>% dplyr::arrange(Ortholog.count) -->
<!-- ``` -->

<!-- - __PVX_082685 (MSP7)__ and __PVX_097730 (hypothetical protein, conserved)__  -->
<!-- were differentially reactive in samples with previous exposure and have only __two orthologs__:  -->
<!-- _P. vivax_ and _P. cynomolgi_. -->
<!--     + __MSP7__ had an adj.P.Val<0.05 and a Case mean intensity > 1, -->
<!--     + __PVX_097730__ had an adj.P.Val<0.05, logFC>0.97, and Case mean intensity > 2.8 -->

<!-- ```{r} -->
<!-- exp_da %>% dplyr::mutate(Ortholog.count=as.numeric(Ortholog.count)) %>%  -->
<!--   filter(Ortholog.count==2) %>%  -->
<!--   select(ID,Gene.ID,Product.Description,adj.P.Val,p.order,logFC,con,Ortholog.count) -->
<!-- #exp_df %>% dplyr::mutate(Ortholog.count=as.numeric(Ortholog.count)) %>%  -->
<!-- #  filter(Ortholog.count==2) -->
<!-- ``` -->


<!-- <!-- ```{r} --> -->
<!-- <!-- rna_dtm.ann <- rna_dt %>%  --> -->
<!-- <!--   dplyr::mutate(id.name= ifelse(ID == "PVX_003775_2o2.150",  --> -->
<!-- <!--                                 "MSP4. / PVX_003775", id.name), --> -->
<!-- <!--                 id.name=ifelse(ID=="PVX_099980_1o1_S2.504", --> -->
<!-- <!--                                "MSP1_S2 / PVX_099980",id.name), --> -->
<!-- <!--                 id.name=ifelse(ID=="PVX_099980_1o1_S1.793", --> -->
<!-- <!--                                "MSP1_S1 / PVX_099980",id.name)) %>%  --> -->
<!-- <!--   separate(id.name,c("gene.name","gene.id"),sep = " / ") %>%  --> -->
<!-- <!--   unite(id.name,gene.id,gene.name,sep = " / ") %>%  --> -->
<!-- <!--   #mutate_at(vars(id.name), funs(trimws)) %>%  --> -->
<!-- <!--   #filter(rna_s=="s1") %>%  --> -->
<!-- <!--   #filter(subset=="diff") %>% --> -->
<!-- <!--   select(ID,id.name,subset,rna_t,fpkm,intensity,logFC) #%>%  --> -->
<!-- <!--   #dplyr::mutate(fpkm=log2(fpkm)) %>% --> -->
<!-- <!--   #filter(fpkm!="-Inf")  --> -->

<!-- <!-- rna_dtm <- rna_dtm.ann  %>%  --> -->
<!-- <!--   select(-subset) %>%  --> -->
<!-- <!--   reshape2::acast(id.name ~ rna_t, --> -->
<!-- <!--                   value.var = "fpkm") #%>% class() --> -->
<!-- <!-- ``` --> -->

<!-- <!-- ```{r,fig.height=5,fig.width=7.7} --> -->
<!-- <!-- x.ann <- rna_dtm.ann %>% --> -->
<!-- <!--   group_by(id.name) %>%  --> -->
<!-- <!--   dplyr::mutate(fpkm.max=max(fpkm,na.rm = T), --> -->
<!-- <!--                 fpkm.min=min(fpkm,na.rm = T), --> -->
<!-- <!--                 fold.change=fpkm.max-fpkm.min) %>%  --> -->
<!-- <!--   ungroup() %>%  --> -->
<!-- <!--   filter(fpkm==fpkm.max) %>%  --> -->
<!-- <!--   dplyr::arrange(subset,rna_t,fpkm.max) #fold.change --> -->

<!-- <!-- x <- x.ann %>%  --> -->
<!-- <!--   select(id.name) %>% as.matrix()  --> -->

<!-- <!-- aheatmap(rna_dtm[x,], Rowv = NA, Colv = NA, --> -->
<!-- <!--          annRow = x.ann %>%  --> -->
<!-- <!--            select(subset,intensity,logFC,rna_t) %>%  --> -->
<!-- <!--            dplyr::rename("time of maximum expression (FPKM)"=rna_t) --> -->
<!-- <!--          ) --> -->

<!-- <!-- aheatmap(rna_dtm[x,],Rowv = FALSE, Colv = NA, --> -->
<!-- <!--          annRow = x.ann %>%  --> -->
<!-- <!--            select(subset,intensity,logFC,rna_t) %>%  --> -->
<!-- <!--            dplyr::rename("time of maximum expression (FPKM)"=rna_t) --> -->
<!-- <!--          ) --> -->
<!-- <!-- ``` --> -->

<!-- ##### (**) GENERAL summary -->

<!-- - count/proportion of immune reactive Hypothetical proteins -->
<!-- - count/proportion of Differentially Reactive proteins -->
<!-- - count/proportion of R_square higher than 0.9 -->
<!-- - count/proportion of Unique proteins in P vivax / Pv &PcyB / no Pf homolog -->
<!-- - count/proportion + list of Reactive proteins with more than gonw reactive polypeptide antigen -->

<!-- ### 0. N6 subset -->

<!-- - __OBJETIVO:__ -->
<!--     + __Comparar__ lista de proteínas de interés para el Dept.Parasitología NAMRU-6 -->

<!-- - __PROTOCOLO:__ -->
<!--     + __Leer__ data -->
<!--         + Problema: Nombre de columna es observación -->
<!--         + Limpieza:  -->
<!--             - __Agregar__ nombre de columna como fila, y -->
<!--             - __Renombrar__ columna a `Gene.ID` -->
<!--     + __Unir__ dos bases de datos, conservando solo las observaciones (filas) en común. -->
<!--         - Lista de Ag de __interés para N6__ con: -->
<!--             - Lista de Ag reactivos __no presentes__ en el microarreglo, -->
<!--             - Lista de Ag con mayor reactividad en toda la muestra __(Top10)__, -->
<!--             - Lista de Ag dif. reactivos en __Malaria Severa (P.Value<0.05)__, -->
<!--             - Lista de Ag dif. reactivos en pacientes __CON episodio previo (adj.P.Val<0.05)__, y -->
<!--             - Lista de Ag dif. reactivos en pacientes __CON episodio previo (adj.P.Val<0.05 & logFC>1)__. -->

<!-- ```{r} -->
<!-- a <- readxl::read_xlsx("data/00-protn6c.xlsx") %>% colnames() -->
<!-- prt_n6 <- readxl::read_xlsx("data/00-protn6c.xlsx") %>%  -->
<!--   add_row(PVX_119355=a) %>%  -->
<!--   dplyr::rename(Gene.ID=PVX_119355) -->
<!-- ``` -->

<!-- - __RESULTADOS:__ -->
<!--     + De un total de __`r dim(prt_n6)[1]`__ Ag en lista de __N6__: -->
<!--         - __07__ son Ag reactivos __no presentes__ en el microarreglo, -->
<!--         - __01__ es Ag con mayor reactividad en toda la muestra __(Top10)__, -->
<!--         - __00__ son Ag dif. reactivos en __Malaria Severa (P.Value<0.05)__, -->
<!--         - __05__ son Ag dif. reactivos en pacientes __CON episodio previo (adj.P.Val<0.05)__, y -->
<!--         - __03__ son Ag dif. reactivos en pacientes __CON episodio previo (adj.P.Val<0.05 & logFC>1)__. -->

<!-- - __EXPLORAR:__ -->
<!--     + Explorar tablas con: -->
<!--         + __Resultados__ de la comparación entre grupos, e -->
<!--         + __Información__ molecular extraida de PlasmoDB -->

<!-- + __07 AG REACTIVOS NO PRESENTES EN MICROARREGLO__ -->
<!--     - Relevancia: -->
<!--         + 07 de 38 proteínas de interés presentan __evidencia de reactividad__  -->
<!--         ante la respuesta humoral contra _Plasmodium vivax_. -->

<!-- ```{r} -->
<!-- np %>% inner_join(prt_n6,by="Gene.ID") #%>% #select(ID,Gene.ID,Gene.Name,Product.Description,Description) -->
<!-- ``` -->

<!-- + __01 AG MÁS REACTIVOS__ -->
<!--     - NOTA: MSP1 __Segmento 2__ -->

<!-- ```{r} -->
<!-- exp_t %>% inner_join(prt_n6,by="Gene.ID") %>% select(-Description) -->
<!-- ``` -->

<!-- + __00 AG EN MALARIA SEVERA__ -->

<!-- ```{r} -->
<!-- sev_d %>% inner_join(prt_n6,by="Gene.ID") %>% select(ID,Gene.ID,Product.Description,Gene.Name,everything()) -->
<!-- ``` -->

<!-- + __05 AG EN 140 DIF REACTIVOS CON EPISODIO PREVIO__ -->
<!--     - NOTA:  -->
<!--         + MSP1 __Segmento 1__ -->
<!--         + Dos variantes de 6-cysteine protein -->
<!--             - PVX_113775: __P12__ -->
<!--             - PVX_000995: __P41__ (antes llamada _transmission-blocking target antigen Pfs230, putative_) -->
<!--     - INESPERADO: __StAR (PVX_081550)__ sí está en microarreglo. -->

<!-- ```{r} -->
<!-- exp_da %>% inner_join(prt_n6,by="Gene.ID") %>% select(ID,Gene.ID,Product.Description,Gene.Name,everything()) -->
<!-- ``` -->

<!-- + __03 AG EN 15 DIF REACTIVOS CON EPISODIO PREVIO CON DUPLICACIÓN SOBRE CONTROL__ -->

<!-- ```{r} -->
<!-- exp_df %>% inner_join(prt_n6,by="Gene.ID") %>% select(ID,Gene.ID,Product.Description,Gene.Name,everything()) -->
<!-- ``` -->

<!-- #### yapa -->

<!-- - __YAPA:__ -->
<!--     + Unir lista de microarreglos con Ag reportados como "reactivos no presentes" por ADi. -->

<!-- - __RESULTADOS__ -->
<!--     + De un total de 151, __54 Ag SÍ están presentes__ en el microarreglos -->
<!--     + De un total de __`r dim(prt_n6)[1]`__ Ag en lista de __N6__: -->
<!--         - __05__ son Ag reactivos que __SÍ están presentes__ en el microarreglo -->

<!-- ```{r} -->
<!-- yp <- raw %>% inner_join(np,by="Gene.ID") %>% #filter(Gene.ID=="PVX_081550") -->
<!--   select(ID,Gene.ID,Description) %>%  -->
<!--   inner_join(all %>% select(Gene.ID,Product.Description,Gene.Name),by="Gene.ID") %>%  -->
<!--   select(ID,Gene.ID,Gene.Name,everything()) -->

<!-- yp -->
<!-- ``` -->

<!-- + __05 AG REACTIVOS DECLARADOS COMO "NO PRESENTES EN MICROARREGLO", ¡SÍ LO ESTÁN!__ -->
<!--     - CelTOS, GEST, CSP, ICP. -->

<!-- ```{r} -->
<!-- yp %>% inner_join(prt_n6,by="Gene.ID") %>% select(ID,Gene.ID,Gene.Name,Product.Description,Description) -->
<!-- ``` -->

<!-- <!-- ### PAPER STRUCTURE --> -->

<!-- <!-- #### Introduction --> -->

<!-- <!-- #### Methods --> -->

<!-- <!-- ##### Ethics statement --> -->

<!-- <!-- ##### Study participants and sample collection --> -->

<!-- <!-- ##### Protein microarray --> -->

<!-- <!-- ##### Data analysis --> -->

<!-- <!-- #### Results --> -->

<!-- <!-- ##### Study population characteristics --> -->

<!-- <!-- ##### Characterization of P.vivax proteins on microarray --> -->

<!-- <!-- ##### Antibody reactivity induced by Severe Vivax Malaria --> -->

<!-- <!-- ##### Antibody profile associated with previous episode to P. vivax malaria --> -->

<!-- <!-- #### Discusion --> -->

<!-- <!-- #### Conclusions --> -->

<!-- <!-- #### Recommendations --> -->

<!-- <!-- #### Appendix --> -->

<!-- <!-- ##### Supporting information --> -->

<!-- <!-- ##### Acknowledgments --> -->

<!-- <!-- ##### Author contributions --> -->

### Computer environment

```{r}
devtools::session_info()
```


### References