/
diamond.py
120 lines (100 loc) · 4.71 KB
/
diamond.py
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
98
99
100
101
102
103
104
105
106
107
108
109
110
111
112
113
114
115
116
117
118
119
120
'''
DIAMOND - blastx replacement for large database protein sequence queries
'''
from builtins import super
import itertools
import logging
import os
import os.path
import shlex
import shutil
import subprocess
import tools
import util.file
import util.misc
URL = 'https://github.com/bbuchfink/diamond/archive/b576a5c03177603554f4627ae367f7bcbc6b8dcb.zip'
TOOL_VERSION = '0.7.9'
DIAMOND_COMMIT_DIR = 'diamond-b576a5c03177603554f4627ae367f7bcbc6b8dcb'
DIAMOND_DIR = 'diamond-{}'.format(TOOL_VERSION)
log = logging.getLogger(__name__)
class Diamond(tools.Tool):
SUBCOMMANDS = ['makedb', 'blastx', 'blastp', 'view']
def __init__(self, install_methods=None):
if not install_methods:
install_methods = [DownloadAndBuildDiamond(URL, os.path.join(DIAMOND_DIR, 'bin', 'diamond'))]
super().__init__(install_methods=install_methods)
def version(self):
return TOOL_VERSION
def build(self, db, protein_fastas, options=None, option_string=None):
'''Create a diamond database.
Args:
db: Diamond database file to create.
protein_fastas: List of input fasta files to process.
'''
assert len(protein_fastas), ('Diamond requires input files to create a database.')
options = options or {}
temp_file = util.file.temp_catted_files(protein_fastas, prefix='diamond_', suffix='.fasta')
with temp_file as input_fasta:
options['--in'] = input_fasta
options['--db'] = db
return self.execute('makedb', options=options, option_string=option_string)
def blastx(self, db, query_files, diamond_alignment, options=None, option_string=None):
'''Perform a blastx-like search from query file to database.
Args:
db: Diamond database file.
query_files: List of input fastq files.
diamond_alignment: Diamond alignment output file. Must end in .daa
'''
assert diamond_alignment.endswith('.daa'), 'Output must end in .daa'
options = options or {}
temp_file = util.file.temp_catted_files(query_files, prefix='diamond_', suffix='.fasta')
with temp_file as query:
options['--db'] = db
options['--query'] = query
options['--daa'] = diamond_alignment
return self.execute('blastx', options=options, option_string=option_string)
def view(self, diamond_alignment, output_file, output_format='tab', options=None, option_string=None):
'''Perform translation between diamond output and blast tab/sam output.
'''
assert output_format in ('tab', 'sam'), 'Invalid diamond view format'
options = options or {}
options['--out'] = output_file
options['--daa'] = diamond_alignment
options['--outfmt'] = output_format
return self.execute('view', options=options, option_string=option_string)
def execute(self, command, options=None, option_string=None, return_stdout=False):
'''Run a diamond command
Args:
options: Dict of command line options to values. Set value to None
for an option with no value.
return_stdout: Whether to return stdout as well as in
(exitcode, stdout).
'''
assert command in Diamond.SUBCOMMANDS, 'Diamond command is unknown'
cmd = [self.install_and_get_path(), command]
if options:
# We need some way to allow empty options args like --log, hence
# we filter out on 'x is None'.
cmd.extend([str(x) for x in itertools.chain(*options.items()) if x is not None])
if option_string:
cmd.extend(shlex.split(option_string))
log.debug("Calling {}: {}".format(command, " ".join(cmd)))
return util.misc.run(cmd, stdout=subprocess.PIPE, stderr=subprocess.STDOUT)
class DownloadAndBuildDiamond(tools.DownloadPackage):
# We need to refactor to have a generic cmake installer.
def post_download(self):
diamond_dir = os.path.join(self.destination_dir, DIAMOND_DIR)
# We should rather have a way to rename self.download_file in
# DownloadPackage generically.
if not os.path.exists(diamond_dir):
shutil.move(os.path.join(self.destination_dir, DIAMOND_COMMIT_DIR), diamond_dir)
build_dir = os.path.join(diamond_dir, 'src')
#util.file.mkdir_p(build_dir)
env = os.environ.copy()
# The default travis gcc version is 4.6, which is too old to build
# diamond properly.
if os.environ.get('TRAVIS') == 'true':
env['CC'] = 'gcc-4.9'
env['CXX'] = 'g++-4.9'
#util.misc.run_and_print(['cmake', '..'], env=env, cwd=build_dir)
util.misc.run_and_print(['make'], env=env, cwd=build_dir, check=True)