Translation
===========
This notebook is devoted to to researching amino-acid properties of our clonotypes

The simplest way to do this is via alakazam::translateDNA in R:

library(alakazam)
db <- ExampleDb
db$sequence_alignment_aa <- translateDNA(db$sequence_alignment)

This will put a new column in db with the amino acid sequences, preserving the IMGT gaps. alakazam::translateDNA doesn’t really do anything special - it’s just a wrapper around seqinr::translate that adds some handling for the gaps (. and - characters), so you can use any translation function on sequence_alignment that can deal with gaps, because position 1 in sequence_alignment should be frame 1 w.r.t. the germline reference.



In [1]:
debug <- TRUE

In [2]:
include = function(pkg){
  if(!suppressMessages(require(pkg, character.only = TRUE)))
  install.packages(pkg, character.only = TRUE)
  suppressMessages(library(pkg, pkg, character.only = TRUE))
}

In [3]:
include("docopt")
include("stringr")
include("alakazam")

In [4]:
translate_db = function(db_path){
    db <- readChangeoDb(db_path)
    without_gaps <- gsub("...", "", db$sequence_alignment, fixed=T)
    return(translateDNA(without_gaps))
}

In [5]:
with_translation = function(db_path){
    db <- readChangeoDb(db_path)
    without_gaps <- gsub("...", "", db$sequence_alignment, fixed=T)    
    return (cbind(db,Translation=translateDNA(without_gaps)))     
}

In [6]:
doc <- 'Usage:
  translate.R [--wd <wd>][--suffix <suffix>] <dbs> ...

  Options:   
   -w --wd <wd> [default: TRUE]
   -s --suffix <suffix> [default: _with_translation].
   -h --help     Show this screen.'

In [7]:
if(debug == TRUE) {
    ramos <- file.path("/data/samples/AIRR-Seq/ramos/test/merged")
    dbs <- file.path(ramos, c('heavy.tsv','light.tsv'))
    args <- union(c(TRUE, "_with_translation"), dbs)
    print(args)
    values <- docopt(doc, args = args, version="0.1")
} else {
    values <- docopt(doc, version="0.1")
}


[1] "TRUE"                                              
[2] "_with_translation"                                 
[3] "/data/samples/AIRR-Seq/ramos/test/merged/heavy.tsv"
[4] "/data/samples/AIRR-Seq/ramos/test/merged/light.tsv"


In [8]:
print(values)

List of 8
 $ --wd    : chr "TRUE"
 $ --suffix: chr "_with_translation"
 $ --help  : logi FALSE
 $ <dbs>   : chr [1:4] "TRUE" "_with_translation" "/data/samples/AIRR-Seq/ramos/test/merged/heavy.tsv" "/data/samples/AIRR-Seq/ramos/test/merged/light.tsv"
 $ wd      : chr "TRUE"
 $ suffix  : chr "_with_translation"
 $ help    : logi FALSE
 $ dbs     : chr [1:4] "TRUE" "_with_translation" "/data/samples/AIRR-Seq/ramos/test/merged/heavy.tsv" "/data/samples/AIRR-Seq/ramos/test/merged/light.tsv"
NULL


In [9]:
wd <- values$wd
suffix <- values$suffix
dbs_pathes <- values$dbs
dbs_translated_pathes <- str_replace(dbs_pathes, ".tsv", paste0(suffix,".tsv"))
if(wd)
    dbs_translated_pathes <-file.path(getwd(), basename(dbs_translated_pathes))

In [10]:
print(str_interp("extending ${dbs_pathes} with Translation column!"))

[1] "extending c(\"/data/samples/AIRR-Seq/ramos/test/merged/heavy.tsv\", \"/data/samples/AIRR-Seq/ramos/test/merged/light.tsv\") with Translation column!"


In [11]:
dbs_pathes

In [12]:
for(i in 1:length(dbs_pathes)){
    path <- dbs_pathes[i]
    path_with_translation <- dbs_translated_pathes[i]
    db_with_translation <- with_translation(path)
    writeChangeoDb(db_with_translation, path_with_translation)
}

In [13]:
list.files(getwd())

In [14]:
if(wd == TRUE){
    print("Saving files to working directory...")
}
print(str_interp("Execution successfully finished, file are saved as ${dbs_translated_pathes}"))

[1] "Saving files to working directory..."
[1] "Execution successfully finished, file are saved as c(\"/home/magus/notebooks/src/heavy_with_translation.tsv\", \"/home/magus/notebooks/src/light_with_translation.tsv\")"
