# 210818 Verify query results

In [1]:
from pathlib import Path

In [2]:
import pandas as pd

In [3]:
from gambit.db import load_database
from gambit.query import query_parse, QueryParams
from gambit.io.seq import SequenceFile

## File paths

In [4]:
outdir = Path('output')
tmpdir = Path('tmp')

In [5]:
infiles = dict(
    ref_genomes=outdir / 'testdb_210818-genomes.db',
    ref_sigs=outdir / 'testdb_210818-signatures.h5',
    queries_table=tmpdir / 'query-seqs.csv',
)

## Load data

In [6]:
db = load_database(infiles['ref_genomes'], infiles['ref_sigs'])

In [7]:
queries_df = pd.read_csv(infiles['queries_table'])

In [10]:
queries_dir = tmpdir / 'query-seqs'

query_files = SequenceFile.from_paths([queries_dir / f'{name}.fasta' for name in queries_df['name']], 'fasta')

## Query

In [11]:
params = QueryParams(classify_strict=True)
results = query_parse(db, query_files, params)

In [12]:
for row, item in zip(queries_df.itertuples(), results.items):
    assert item.input.file.path.stem == row.name
    
    cr = item.classifier_result
    
    if pd.isnull(row.predicted):
        assert cr.predicted_taxon is None
        assert cr.primary_match is None
    else:
        assert cr.predicted_taxon.name == row.predicted
        assert cr.primary_match.genome.description == row.primary
        
    assert cr.closest_match.genome.description == row.closest
    assert bool(cr.warnings) == row.warnings