## SAMap prep
So I want to run SAMap, but I'd prefer to use the protein blast from diamond.  
It is much faster than both tblastx and blastp and can be more accurate with its new very sensitive mode.  

Because SAMap requires the protein names to match the gene names, I quickly use genespace to prepare.  
I've tried this myself in the past and it's a hassle. Genespace can quickly parse the files and prepare them for me instead.  

In [None]:
library(GENESPACE)
genomeRepo <- "samap_directory/rawGenomes"
setwd('/mnt/mpistaff/Cranio_Lab/Louk_Seton/4_species_project')
wd <- "samap_directory/rawGenomes"

In [None]:
system(paste("mkdir -p", genomeRepo))

In [None]:
urls <- c(
 mouse= '000/001/635/GCF_000001635.26_GRCm38.p6/GCF_000001635.26_GRCm38.p6_',
 catshark= '902/713/615/GCF_902713615.1_sScyCan1.1/GCF_902713615.1_sScyCan1.1_')

genomes2run <- names(urls)
urls <- file.path("https://ftp.ncbi.nlm.nih.gov/genomes/all/GCF", urls)
translatedCDS <- sprintf("%stranslated_cds.faa.gz", urls)
geneGff <- sprintf("%sgenomic.gff.gz", urls)

names(translatedCDS) <- genomes2run
names(geneGff) <- genomes2run
writeDirs <- file.path(genomeRepo, genomes2run)
names(writeDirs) <- genomes2run
for(i in genomes2run){
  print(i)
  if(!dir.exists(writeDirs[i]))
    dir.create(writeDirs[i])
  download.file(
    url = geneGff[i], 
    destfile = file.path(writeDirs[i], basename(geneGff[i])))
  download.file(
    url = translatedCDS[i], 
    destfile = file.path(writeDirs[i], basename(translatedCDS[i])))
}

genomes2run <- genomes2run
parsedPaths <- parse_annotations(
  rawGenomeRepo = genomeRepo,
  genomeDirs = genomes2run,
  genomeIDs = genomes2run,
  presets = "ncbi",
  genespaceWd = wd)

In [None]:
system()

In [None]:
##diamond blast 
setwd('/mnt/mpistaff/Cranio_Lab/Louk_Seton/4_species_project/samap_directory')
mouse = 'rawGenomes/peptide/mouse.fa'
mousedb = 'rawGenomes/peptide/mouse'
mouse_n = 'mm'
catshark = 'rawGenomes/peptide/catshark.fa'
catsharkdb = 'rawGenomes/peptide/catshark'
shark_n = 'sc'

system('mkdir -p maps')
system(paste('mkdir -p', paste0('maps/',mouse_n,shark_n)))

threads = 16

command = paste('diamond makedb --in',mouse, '--db',mousedb)
system(command)

command = paste('diamond makedb --in',catshark, '--db',catsharkdb)
system(command)

command = paste('diamond blastp',
                '--query', mouse,
                '--db', paste0(catsharkdb,'.dmnd'),
                '--outfmt 6',
                '--out', paste0("maps/",mouse_n,shark_n,'/',mouse_n,'_to_',shark_n,".txt"),
                '--ultra-sensitive',
                '--threads', threads,
                '--max-hsps 1', 
                '--evalue 1e-6')
system(command)

command = paste('diamond blastp',
                '--query', catshark,
                '--db', paste0(mousedb,'.dmnd'),
                '--outfmt 6',
                '--out', paste0("maps/",mouse_n,shark_n,'/',shark_n,'_to_',mouse_n,".txt"),
                '--ultra-sensitive',
                '--threads', threads,
                '--max-hsps 1', 
                '--evalue 1e-6')
system(command)