# 210125 Verify query results

In [1]:
from pathlib import Path
import logging

In [2]:
import pandas as pd

In [3]:
from midas.app.app import MidasAppDir, MidasApp
from midas.app.data import ReferenceDatabase
from midas.app.query import QueryRunner, QueryParams
from midas.app.util import TqdmProgressMonitor
from midas.io.seq import SeqFileInfo

In [4]:
DATESTR = '210126'
DBNAME = 'testdb_' + DATESTR

## Files

In [5]:
appdir_path = Path('tmp/appdir')
sigfile = '%s.midas-signatures.gz' % DBNAME
dbfile = '%s.db' % DBNAME

In [6]:
queries_df = pd.read_csv('tmp/queries.csv')

## Setup app dir

In [7]:
logging.basicConfig()
logging.getLogger('midas').setLevel(logging.INFO)

In [8]:
if appdir_path.is_dir():
    !rm -r {appdir_path}

In [9]:
appdir = MidasAppDir.create(appdir_path)

appdir.db_file.unlink()
appdir.db_file.symlink_to('../../../output/%s' % dbfile)
(appdir.signatures_dir / sigfile).symlink_to('../../../../output/%s' % sigfile)

INFO:midas.app.app:Creating new application directory at /home/jared/git/midas/jlumpe-midas-lab-nb/experiments/210126-artificial-test-database/tmp/appdir
INFO:midas.app.app:Initializing application database
INFO  [alembic.runtime.migration] Context impl SQLiteImpl.
INFO  [alembic.runtime.migration] Will assume non-transactional DDL.
INFO  [alembic.runtime.migration] Running upgrade  -> 955d795123c3, v2.0.0
INFO  [alembic.runtime.migration] Running upgrade 955d795123c3 -> 68acf3fff58d, v2.0.1
INFO  [alembic.runtime.migration] Running upgrade 68acf3fff58d -> 8e4286c25e33, Remove tables for Signature and SignatureSet models
INFO  [alembic.runtime.migration] Running upgrade 8e4286c25e33 -> 4815cccfb01b, Add signature set columns to ReferenceGenomeSet table
INFO  [alembic.runtime.migration] Running upgrade 4815cccfb01b -> d961d0698083, Remove unneeded columns


In [10]:
app = MidasApp(appdir)

INFO  [midas.app.data] Refreshing signature sets...
INFO  [midas.app.data] 1 signature sets found
INFO  [midas.app.data] Refreshing classifiers...
INFO  [midas.app.data] No classifiers found
INFO  [midas.app.data] Refreshing reference databases...
INFO  [midas.app.data] No databases found


## Run query

In [11]:
params = QueryParams(
    signatures_ref='midas/test/%s' % DBNAME,
    refset_ref='midas/test/%s' % DBNAME,
)

In [12]:
files = SeqFileInfo.from_paths(['tmp/query_sequences/%s.fa' % name for name in queries_df['name']], 'fasta')

In [13]:
runner = QueryRunner(app, TqdmProgressMonitor())

In [14]:
results = runner.run_query(params, files)

INFO  [midas.app.query] Beginning query on 120 files
INFO  [midas.app.query] Creating query context...
INFO  [midas.app.query] Loading signature set IDs...
INFO  [midas.app.query] Loading signature set (878 signatures)...
Loading reference signatures: 100%|██████████| 878/878 [00:00<00:00, 51037.34it/s]
INFO  [midas.app.query] Finished loading signature sets
INFO  [midas.app.query] Running queries...
Running query: 100%|██████████| 120/120 [00:02<00:00, 58.57it/s] 
INFO  [midas.app.query] Queries complete!


## Check results

In [15]:
items_by_name = {item.input.name.split('.')[0]: item for item in results.items}

In [16]:
for _i, row in queries_df.iterrows():
    item = items_by_name[row['name']]
    expected = row['expected_taxon']
    
    if pd.isnull(expected):
        assert item.call_taxon is None
    else:
        assert item.call_name == expected