Skip to content

Commit

Permalink
Implement a functionnal test with metaquast as final step.
Browse files Browse the repository at this point in the history
  • Loading branch information
loic-couderc committed Dec 14, 2017
1 parent 68fbc6e commit 5ef4c58
Show file tree
Hide file tree
Showing 8 changed files with 213 additions and 82 deletions.
19 changes: 13 additions & 6 deletions .travis.yml
Original file line number Diff line number Diff line change
Expand Up @@ -2,6 +2,7 @@ dist: trusty
sudo: required
language: cpp


#gcc 4.9
matrix:
include:
Expand All @@ -17,21 +18,27 @@ matrix:
- MATRIX_EVAL="CC=gcc-4.9 && CXX=g++-4.9"

before_install:
- eval "${MATRIX_EVAL}"
- ./travis_download_db.sh & pid=$!
- free -m
- eval "${MATRIX_EVAL}"


install:
- sudo apt-get -qq update
- sudo apt-get install -y xz-utils
- wget https://repo.continuum.io/miniconda/Miniconda3-latest-Linux-x86_64.sh -O miniconda.sh
- bash miniconda.sh -b -p $HOME/miniconda
- export PATH="$HOME/miniconda/bin:$PATH"
- hash -r
- conda config --set always_yes yes --set changeps1 no
# Useful for debugging any issues with conda
- conda info -a
- sudo apt-get install -y curl default-jdk ant libsparsehash-dev zlib1g-dev bzip2
- conda install --yes -c bioconda samtools
- conda install --yes pytest
- ./build.py
- conda env create -n matam -f environment.yml
- source activate matam
- ./travis_build.sh
- cd $HOME && wget https://sourceforge.net/projects/quast/files/quast-4.6.1.tar.gz && tar xvf quast-4.6.1.tar.gz && export PATH="$HOME/quast-4.6.1":$PATH && cd -
# Wait until the db is dowloaded
- while kill -0 $pid 2> /dev/null; do sleep 1; done;

script:
- pytest
- travis_wait 60 pytest -rs
36 changes: 36 additions & 0 deletions environment.yml
Original file line number Diff line number Diff line change
@@ -0,0 +1,36 @@
name: matam

channels:
- bioconda
- conda-forge
- salford_systems
- defaults

dependencies:
- sed
- git
- curl
#- gcc-5
#- libgcc-5
- python >=3.5
- numpy
# - autoconf
- make
- cmake >=3.1
- wget
# - automake
- sparsehash
- zlib
- bzip2
- coreutils # Centos6 (otherwise sort --parallel is unrecognized)
- samtools ==1.4.1 # Centos7 (otherwise need libbz2.so.1.0)
- vsearch
- sga
- sortmerna >=2.1*
- rdptools
- krona
- exonerate=2.2
- pytest

prefix: /home/lcouderc/miniconda3

71 changes: 0 additions & 71 deletions tests/functional.py

This file was deleted.

1 change: 0 additions & 1 deletion tests/test_binaries_utils.py
Original file line number Diff line number Diff line change
Expand Up @@ -16,7 +16,6 @@
'sortmerna',
'vsearch',
'java', #rdp
'classifier.jar', #rdp
'ktImportText', #krona
])

Expand Down
137 changes: 137 additions & 0 deletions tests/test_functional.py
Original file line number Diff line number Diff line change
@@ -0,0 +1,137 @@
import os
import sys
import tempfile
import subprocess
import shutil
import multiprocessing
import pytest
import collections

CURRENT_DIR = os.path.dirname(os.path.abspath(__file__))
SCRIPTS_DIR = os.path.join(CURRENT_DIR, '..', 'scripts')
EXAMPLES_DIR = os.path.join(CURRENT_DIR, '..', 'examples')
DB_DIR = os.path.join(CURRENT_DIR, '..', 'db')

sys.path.append(SCRIPTS_DIR)

from binary_utils import Binary

# skip all module tests if needed
pytestmark = pytest.mark.skipif(
not os.path.isdir(DB_DIR),
reason='DB_DIR is missing:%s' % DB_DIR
)


@pytest.fixture(scope='module')
def matam_results():
out = tempfile.mkdtemp(dir='/tmp/', prefix='matam_functionnal_test_')
p = {
'bin': os.path.join(SCRIPTS_DIR, 'matam_assembly.py'),
'reads': os.path.join(
EXAMPLES_DIR,
'16sp_simulated_dataset/16sp.art_HS25_pe_100bp_50x.fq'
),
'db': os.path.join(DB_DIR, 'SILVA_128_SSURef_NR95'),
'out': out,
'cpu': multiprocessing.cpu_count()
}
cmd = '{bin} -i {reads} -d {db} -o {out} --cpu {cpu} --max_memory 3000 \
--debug --coverage_threshold 2000 \
--perform_taxonomic_assignment'.format(**p)

completed_process = subprocess.run(cmd, shell=True)
return_code = completed_process.returncode
fasta = os.path.join(out, 'final_assembly.fa')
krona_html = os.path.join(out, 'krona.html')
krona_tab = os.path.join(out, 'krona.tab')
rdp_tab = os.path.join(out, 'rdp.tab')

MatamResults = collections.namedtuple(
"MatamResults",
"return_code fasta krona_html krona_tab rdp_tab"
)
results = MatamResults(
return_code=return_code,
fasta=fasta,
krona_html=krona_html,
krona_tab=krona_tab,
rdp_tab=rdp_tab
)

yield results

if os.path.isdir(out):
shutil.rmtree(out)


def exists_and_not_empty(fpath):
return os.path.isfile(fpath) and os.stat(fpath).st_size != 0


def test_return_code(matam_results):
assert matam_results.return_code == 0


def test_final_fasta_file(matam_results):
assert exists_and_not_empty(matam_results.fasta)


def test_krona_html(matam_results):
assert exists_and_not_empty(matam_results.krona_html)


def test_krona_tab(matam_results):
assert exists_and_not_empty(matam_results.krona_tab)


def test_rdp_tab(matam_results):
assert exists_and_not_empty(matam_results.rdp_tab)


def extract_metaquast_val(tsv):
with open(tsv, 'r') as tsv_handler:
lines = tsv_handler.readlines()
return float(lines[1].split('\t')[1].strip())


@pytest.mark.skipif(
not Binary.which('metaquast.py'),
reason="requires metaquast.py to be in PATH"
)
def test_metaquast(matam_results):
data_directory = tempfile.mkdtemp(dir='/tmp/', prefix='metaquast_')
fasta = matam_results.fasta
true_ref = os.path.join(EXAMPLES_DIR, '16sp_simulated_dataset/16sp.fasta')
cmd = "metaquast.py -a all --ambiguity-score 1 --min-identity 97 -x 500 \
--unaligned-part-size 200 -R %s %s" % (true_ref, fasta)
subprocess.run(cmd, shell=True, cwd=data_directory)

genome_fraction_file = os.path.join(
data_directory,
'quast_results/latest/summary/TSV/Genome_fraction_(%).tsv'
)
mismatches_file = os.path.join(
data_directory,
'quast_results/latest/summary/TSV/#_mismatches_per_100_kbp.tsv'
)
indels_file = os.path.join(
data_directory,
'quast_results/latest/summary/TSV/#_indels_per_100_kbp.tsv'
)
ns_file = os.path.join(
data_directory,
'quast_results/latest/summary/TSV/#_N\'s_per_100_kbp.tsv'
)

genome_fraction = extract_metaquast_val(genome_fraction_file)
mismatches = extract_metaquast_val(mismatches_file)
indels = extract_metaquast_val(indels_file)
ns = extract_metaquast_val(ns_file)
error_rate = (mismatches + indels + ns) / 1000 # 100000bp * 100

assert genome_fraction > 86.4
assert error_rate < 0.06

if os.path.isdir(data_directory):
shutil.rmtree(data_directory)
14 changes: 10 additions & 4 deletions tests/test_rdp.py
Original file line number Diff line number Diff line change
Expand Up @@ -11,26 +11,32 @@
from rdp import run_rdp_classifier, read_rpd_file, get_lineage, filter_rdp_file
from binary_utils import Binary


def test_run_rdp_classifier_ok():
fasta = os.path.join(SAMPLE_DIR, 'scaffolds.fa')
bin = Binary.which('java')
jar = Binary.which('classifier.jar')
rdp_exe = '{java} -Xmx1g -jar {jar}'.format(java=bin, jar=jar)

if jar is not None:
java = Binary.assert_which('java')
rdp_exe = '{java} -Xmx1g -jar {jar}'.format(java=java, jar=jar)
else:
rdp_exe = Binary.assert_which('classifier')

result_file = tempfile.NamedTemporaryFile()
run_rdp_classifier(rdp_exe, fasta, result_file.name)


def test_read_rdp_results():
rdp_file = os.path.join(SAMPLE_DIR, 'rdp.txt')
lines = list(read_rpd_file(rdp_file))
assert len(lines) == 23 # 23 scaffolds
assert len(lines) == 23 # 23 scaffolds


def test_filter_rdp_file():
rdp_file = os.path.join(SAMPLE_DIR, 'rdp.txt')
result_file = tempfile.NamedTemporaryFile()
filter_rdp_file(rdp_file, result_file.name)
for line in read_rpd_file(result_file.name):
assert len(line) == 19 # seqid + 6 taxonomic levels * 3
assert len(line) == 19 # seqid + 6 taxonomic levels * 3
if line[0] == "87":
assert get_lineage(line) == ['unclassified'] * 6
12 changes: 12 additions & 0 deletions travis_build.sh
Original file line number Diff line number Diff line change
@@ -0,0 +1,12 @@
#!/bin/bash

# An activated env is needed for $CONDA_PREFIX
#CC=${CONDA_PREFIX}/bin/gcc
#CXX=${CONDA_PREFIX}/bin/g++

# Build componentsearch
cd componentsearch && make && cd -

# Build ovgraphbuild
build_dir=ovgraphbuild/build
mkdir -p $build_dir && cd $build_dir && cmake .. -G"CodeBlocks - Unix Makefiles" && make && cd -
5 changes: 5 additions & 0 deletions travis_download_db.sh
Original file line number Diff line number Diff line change
@@ -0,0 +1,5 @@
#!/bin/bash

wget --quiet http://bioinfo.lifl.fr/matam/SILVA_128_SSURef_NR95_indexed_max_mem_3G.tar.xz{.md5,}
md5sum -c SILVA_128_SSURef_NR95_indexed_max_mem_3G.tar.xz.md5
tar Jxvf SILVA_128_SSURef_NR95_indexed_max_mem_3G.tar.xz

0 comments on commit 5ef4c58

Please sign in to comment.