# INTACT pipeline
run intact on the ptwas and fastenloc output

The posterior probabilities in the intact function are calculated through the following steps:

- Compute Bayes Factors: The function converts TWAS z-scores into Bayes factors by computing a grid of Bayes factors using different values of `K` (a vector of values over which Bayesian model averaging is performed, default: c(1,2,4,8,16)), then performing Bayesian model averaging using the log sum exp trick. 
- Compute Prior Probabilities: The `prior_fun` function converts `GLCP` into prior probabilities. `prior_fun` is set as linear by default. 
- Compute Posterior Probabilities

## Example
We now run an example based on [example fastenloc and ptwas output from Tiffany, Montgomery lab]( https://github.com/tyeulalio/QTL_pipeline)


In [None]:
sos run pipeline/intact.ipynb intact \
    --fastenloc_file output/fastenloc/*enloc.gene.out \
    --ptwas_file output/ptwas/*.ptwas.output \
    --tissue DLPFC -s force

In [None]:
sos run pipeline/intact.ipynb intact \
    --fastenloc_file output/fastenloc/*enloc.gene.out \
    --ptwas_file output/ptwas/test.output \
    --tissue DLPFC -s force

In [None]:
[global]
# Workdir
parameter: cwd = path("output")
# fastenloc output
parameter: fastenloc_file = ""
# ptwas output
parameter: ptwas_file = ""
# dataset 
parameter: tissue = ''
parameter: alpha = 0.05
# QTL data type
parameter: QTL = 'eQTL'
parameter: container = ''
parameter: entrypoint={('micromamba run -n' + ' ' + container.split('/')[-1][:-4]) if container.endswith('.sif') else f''}
parameter: job_size = 1
parameter: walltime = "5h"
parameter: mem = "8G"
parameter: numThreads = 1


In [None]:
[intact]
input: ptwas_file, fastenloc_file
output: f'{cwd}/{tissue}.INTACT.rds'
task: trunk_workers = 1, trunk_size = job_size, walltime = walltime, mem = mem, cores = numThreads, tags = f'{step_name}_{_output:bn}'
R: expand= "${ }", stderr = f'{_output:nn}.stderr', stdout = f'{_output:nn}.stdout'
    library(INTACT)
    #library(biomaRt)
    library(tidyverse)

    # run intact on the ptwas and colocalization output


    run_intact <- function(ptwas, fastenloc, alpha){
        # join the columns that we want
        sub_ptwas <- ptwas %>%
            select(gene=GENE, zscore=STAT)

        # match the gene names 
        sub_fastenloc <- fastenloc %>%
            select(gene=Gene, GLCP) %>%
            mutate(gene = str_replace_all(gene, "-", "\\."))

        ptwas_fastenloc <- sub_ptwas %>%
            inner_join(sub_fastenloc)

        res <- intact(GLCP_vec=ptwas_fastenloc$GLCP,
                    z_vec=ptwas_fastenloc$zscore,
                    prior_fun=linear
        )
        pip_fdr <- fdr_rst(res, alpha)

        # combine results
        intact_res <- cbind(ptwas_fastenloc, intact_pip=res, fdr_sig = pip_fdr[["sig"]]) %>%
            arrange(-intact_pip)
        return(intact_res)
        
    }


        # load ptwas data
        #ptwas <- read.csv("${ptwas_file}",sep='\t')
        ptwas <- read.csv("${ptwas_file}",sep='\t', check.names = FALSE, row.names = NULL) %>%
            distinct(GENE, SUBCLASS, .keep_all = TRUE)
        # load fastenloc GLCP data
        fastenloc <- read_table("${fastenloc_file}") %>% filter(GLCP <= 1)##FIX

        res <- run_intact(ptwas, fastenloc, alpha= '${alpha}')
        saveRDS(res, "${_output}")

