In [94]:
# 16 - Disease Diagnostic
#
# This script performs disease diagnostic. Reads bam files of two persons and extracts relevant reads to analyze.
#
# INPUTS:
#   run_number
#   disease
#   BAM files
# 
# OUTPUTS:
#   relevant reads with the SNP
args = commandArgs(trailingOnly=TRUE)
print('')
print('------------------------------------------------------------------------------------------')
print('16 - DISEASE DIAGNOSTIC (16_disease_diagnostic.R)')
print(paste0("Run: ", args[1], ", disease: ", args[2]))
print('')
print('')

# Set variables
# -----------------------------------------------------------------------------
run_num = 3
dis = 'sca'

# Load Modules
# -----------------------------------------------------------------------------
print('Loading modules.')
suppressMessages(library(tidyverse, quietly=TRUE))
suppressMessages(library(Rsamtools, quietly=TRUE))
suppressMessages(library(dplyr, quietly=TRUE))

# Set Parameters
# -----------------------------------------------------------------------------
# print('Setting parameters.')

run_number=paste0("run",run_num)

dis_params = data.frame('disease' = c('sca', 'cf', 'sma1', 'sma2', 'thal1', 'thal2', 'thal3', 'pompe'),
                    'chr' = c('chr11','chr7','chr5', 'chr5', 'chr16', 'chr16', 'chr11', 'chr17'),
                    'start' = c(5227002, 117559590, 70924941, 70049523, 176680, 172876, 5225464, 25000000),
                    'end'   = c(5227002, 117559590, 70953015, 70077595, 177522, 173710, 5227071, 25000000))

chr       = dis_params %>% filter(disease == dis) %>% select(chr)   %>% pull
snp_start = dis_params %>% filter(disease == dis) %>% select(start) %>% pull
snp_end   = dis_params %>% filter(disease == dis) %>% select(end)   %>% pull

datadir = paste0('/mnt/aretian/genomics/nanopore/',run_number)

# Load data
# -----------------------------------------------------------------------------
print('Loading data.')

person0bam <- BamFile(
                file  = paste0(datadir,'/strspy/',dis,'/input/',run_number,'_',chr,'_',dis,'_person0.bam'    ), 
                index = paste0(datadir,'/strspy/',dis,'/input/',run_number,'_',chr,'_',dis,'_person0.bam.bai'))

person1bam <- BamFile(
                file  = paste0(datadir,'/strspy/',dis,'/input/',run_number,'_',chr,'_',dis,'_person1.bam'    ), 
                index = paste0(datadir,'/strspy/',dis,'/input/',run_number,'_',chr,'_',dis,'_person1.bam.bai'))

[1] ""
[1] "------------------------------------------------------------------------------------------"
[1] "16 - DISEASE DIAGNOSTIC (16_disease_diagnostic.R)"
[1] "Run: /home/fer/.local/share/jupyter/runtime/kernel-4006da55-029b-4016-8e60-df46edccae0c.json, disease: NA"
[1] ""
[1] ""
[1] "Loading modules."
[1] "Loading data."


In [95]:
# Diagnostic and save table
# -----------------------------------------------------------------------------
print('Extracting reads containing the relevant SNP.')

# gr <- GRanges(seqnames = chr,
#               ranges = IRanges(start = snp_start, end = snp_end+1000))
gr <- GRanges(seqnames = 'chr11',
              ranges = IRanges(start = 5227002, end = 5228002))
params <- ScanBamParam(which = gr, what = scanBamWhat())

# Person 0
aln <- scanBam(person0bam, param = params)
reads <- data.frame('startpos' = aln[[1]]$pos, 
                    'read'     = aln[[1]]$seq)
tryCatch({ 
    reads2 <- reads                                                    %>%
    mutate(length         = nchar(read)                   ,
           snp_start_0    = snp_start - startpos          ,
           snp_end_0      = snp_end   - startpos          ,
           left_padding   = str_sub(read,0,snp_start_0   ),
           right_padding  = str_sub(read,  snp_end_0  +2 ),
           segment        = str_sub(read,  snp_start_0+1  , snp_end_0+1),
           snp_start      = snp_start                     ,
           snp_end        = snp_end                       )            %>%
    select(snp_start, snp_end, left_padding,  segment, right_padding)  %>%
    sample_n(10)
    
    write.csv(reads2, paste0(datadir,'/',run_number,'_',chr,'_',dis,'person0_diagnostic_reads.csv'), row.names = FALSE)
    write.csv(reads2, paste0('disease_diagnostic/',run_number,'_',chr,'_',dis,'person0_diagnostic_reads.csv'), row.names = FALSE)
    print(paste0('Saved: ',datadir,'/',run_number,'_',chr,'_',dis,'person0_diagnostic_reads.csv'))
    print('Saved copies of each csv in disease_diagnostic/ folder in homedir.')    
    }, 
    error=function(cond){
        print(paste0('There are no reads in location ', chr,':',snp_start,'-',snp_end))
    })



# Person 1
aln <- scanBam(person1bam, param = params)
reads <- data.frame('startpos' = aln[[1]]$pos, 
                    'read' = aln[[1]]$seq)

tryCatch({ 
    reads2 <- reads                                                    %>%
    mutate(length         = nchar(read)                   ,
           snp_start_0    = snp_start - startpos          ,
           snp_end_0      = snp_end   - startpos          ,
           left_padding   = str_sub(read,0,snp_start_0   ),
           right_padding  = str_sub(read,  snp_end_0  +2 ),
           segment        = str_sub(read,  snp_start_0+1  , snp_end_0+1),
           snp_start      = snp_start                     ,
           snp_end        = snp_end                       )            %>%
    select(snp_start, snp_end, left_padding,  segment, right_padding)  %>%
    sample_n(10)
    
    write.csv(reads2, paste0(datadir,'/',run_number,'_',chr,'_',dis,'person1_diagnostic_reads.csv'), row.names = FALSE)
    write.csv(reads2, paste0('disease_diagnostic/',run_number,'_',chr,'_',dis,'person1_diagnostic_reads.csv'), row.names = FALSE)

    print(paste0('Saved: ',datadir,'/',run_number,'_',chr,'_',dis,'person1_diagnostic_reads.csv'))
    print('Saved copies of each csv in disease_diagnostic/ folder in homedir.')
    }, 
    error=function(cond){
        print(paste0('There are no reads in location ', chr,':',snp_start,'-',snp_end))
    })


[1] "Extracting reads containing the relevant SNP."
[1] "There are no reads in location chr11:5227002-5227002"


ERROR: Error in value[[3L]](cond): failed to open BamFile: file(s) do not exist:
  ‘/mnt/aretian/genomics/nanopore/run3/strspy/sca/input/run3_chr11_sca_person1.bam’
