# Performance of annotation-enhanced fine-mapping

Let's skip the formalities and dive directly into the results: power, false discovery proportion, top hit rate, size and purity.

## Results

In [None]:
readRDS("~/GIT/github/annotation-finemap/analysis/xh_grant/0819.summary.rds")

Workflow below was used to obtain this table.

## Results data processing

To get the results,

```
sos run report.ipynb
```

In [None]:
[global]
parameter: cwd = path('~/GIT/github/annotation-finemap/analysis')
parameter: date = '0819'
dirname = path(f'{cwd:a}/xh_grant/')

In [None]:
[default_1]
output: f'{dirname}/{date}.out.rds'
R: expand = '${ }', workdir = cwd
    out = dscrutils::dscquery(${dirname:br}, target = "simulate_region.dataset simulate_region.g simulate_z.n_signal fit evaluate", load.pkl = TRUE)
    saveRDS(out, ${_output:r})

In [None]:
[default_2]
cluster_cutoff = 0.95
output: f'{_input:nn}.summary.rds'
R: expand = '${ }', workdir = cwd
    options(warn=2)
    dat = readRDS(${_input:r})
    odds_ratio = as.character(unique(dat$simulate_region.g))
    data_sets = unique(dat$simulate_region.dataset)
    dap = list()
    dapa = list()
    res = NULL
    for (s in odds_ratio) {
      dap[[s]] = list(power_d = 0, power_n = 0, fdp_d = 0, fdp_n = 0, hit_d = 0, hit_n = 0, size = vector(), purity = vector())
      dapa[[s]] = list(power_d = 0, power_n = 0, fdp_d = 0, fdp_n = 0, hit_d = 0, hit_n = 0, size = vector(), purity = vector())
      for (d in data_sets) {
          ## for DAP
          tmp = readRDS(paste0("${dirname}/", dat[which(dat$fit == 'dap' & dat$simulate_region.g == as.double(s) & dat$simulate_region.dataset == d), c('simulate_z.n_signal', 'evaluate.output.file')], ".rds"))
          for (i in 1:length(tmp$is_recovered)) {
              # power
              dap[[s]]$power_d = dap[[s]]$power_d + ifelse(tmp$is_recovered[[i]][1] == 'failed', 0, sum(tmp$is_recovered[[i]]))
              dap[[s]]$power_n = dap[[s]]$power_n + tmp$simulate_z.n_signal[1]
              if (tmp$is_cs_true[[i]][1] != 'failed') {
                  # fdp
                  dap[[s]]$fdp_d = dap[[s]]$fdp_d + length(tmp$is_cs_true[[i]]) - sum(tmp$is_cs_true[[i]])
                  dap[[s]]$fdp_n = dap[[s]]$fdp_n + length(tmp$is_cs_true[[i]])
                  # top hit of cluster is signal
                  dap[[s]]$hit_d = dap[[s]]$hit_d + sum(tmp$is_top_true[[i]])
                  dap[[s]]$hit_n = dap[[s]]$hit_n + length(tmp$is_top_true[[i]])
                  # size
                  dap[[s]]$size = c(dap[[s]]$size, tmp$size[[i]])
                  # purity
                  dap[[s]]$purity = c(dap[[s]]$purity, tmp$purity[[i]])
              }
          }
          ## for DAP with annotation
          tmp = readRDS(paste0("${dirname}/", dat[which(dat$fit == 'dapa' & dat$simulate_region.g == as.double(s) & dat$simulate_region.dataset == d), c('simulate_z.n_signal', 'evaluate.output.file')], ".rds"))
          for (i in 1:length(tmp$is_recovered)) {
              # power
              dapa[[s]]$power_d = dapa[[s]]$power_d + ifelse(tmp$is_recovered[[i]][1] == 'failed', 0, sum(tmp$is_recovered[[i]]))
              dapa[[s]]$power_n = dapa[[s]]$power_n + tmp$simulate_z.n_signal[1]
              if (tmp$is_cs_true[[i]][1] != 'failed') {
                  # fdp
                  dapa[[s]]$fdp_d = dapa[[s]]$fdp_d + length(tmp$is_cs_true[[i]]) - sum(tmp$is_cs_true[[i]])
                  dapa[[s]]$fdp_n = dapa[[s]]$fdp_n + length(tmp$is_cs_true[[i]])
                  # top hit in cluster is signal
                  dapa[[s]]$hit_d = dapa[[s]]$hit_d + sum(tmp$is_top_true[[i]])
                  dapa[[s]]$hit_n = dapa[[s]]$hit_n + length(tmp$is_top_true[[i]])
                  # size
                  dapa[[s]]$size = c(dapa[[s]]$size, tmp$size[[i]])
                  # purity
                  dapa[[s]]$purity = c(dapa[[s]]$purity, tmp$purity[[i]])
              }
          }
      }
      dap[[s]]$power = dap[[s]]$power_d / dap[[s]]$power_n
      dap[[s]]$fdp = dap[[s]]$fdp_d / dap[[s]]$fdp_n    
      dap[[s]]$hit = dap[[s]]$hit_d / dap[[s]]$hit_n
      dap[[s]]$size = median(dap[[s]]$size)
      dap[[s]]$purity = mean(dap[[s]]$purity)
      #
      dapa[[s]]$power = dapa[[s]]$power_d / dapa[[s]]$power_n
      dapa[[s]]$fdp = dapa[[s]]$fdp_d / dapa[[s]]$fdp_n    
      dapa[[s]]$hit = dapa[[s]]$hit_d / dapa[[s]]$hit_n
      dapa[[s]]$size = median(dapa[[s]]$size)
      dapa[[s]]$purity = mean(dapa[[s]]$purity)
      line = c(s, dap[[s]]$power, dapa[[s]]$power, dap[[s]]$fdp, dapa[[s]]$fdp, dap[[s]]$hit, dapa[[s]]$hit, dap[[s]]$size, dapa[[s]]$size, dap[[s]]$purity, dapa[[s]]$purity)
      if (is.null(res)) {
        res = line
      } else {
        res = rbind(res, line)
      }
    }
    colnames(res) = c('OR', 'DAP Power', 'aDAP Power', 'DAP FDP', 'aDAP FDP', 'DAP top hit rate', 'aDAP top hit rate', 'DAP size', 'aDAP size', 'DAP purity', 'aDAP purity')
    rownames(res) = res[,1]
    saveRDS(res, ${_output:r})