In [None]:
import pandas as pd
import pyhmmer
import glob
import pyhmmer.easel as easel
import collections

def retrieve_hits(seqs_path, hmms, fields=["query", "subject", "bitscore", "evalue"]):

    # Load cluster proteins
    with pyhmmer.easel.SequenceFile(seqs_path, digital=True, alphabet=easel.Alphabet.amino()) as seqs_file:
        proteins = seqs_file.read_block()

    # Run HMMs
    Result = collections.namedtuple("Result", fields)

    results = []
    for hits in pyhmmer.hmmsearch(hmms, proteins, E=1):
        cog = hits.query_name.decode()
        for hit in hits:
            if hit.included:
                results.append(Result(hit.name.decode(), cog, hit.score, hit.evalue))

    # Results --> df
    hits_df = {}
    c = 0
    for i in results:
        hits_df[c] = list(i)
        c += 1
    hits_df = pd.DataFrame.from_dict(hits_df, orient='index', columns=fields)
    
    return hits_df

# Find and load a collection of HMMs
HMMS = []
for fil in glob.glob('./data/hmms_nifHDK/*.hmm'):
    with pyhmmer.plan7.HMMFile(fil) as hmm_file:
        HMMS.append(hmm_file.read())
HMMS

In [None]:
results = retrieve_hits('./data/omd2_candidate.faa', HMMS)
results

In [None]:
retrieve_hits('./data/cyanobact.faa', HMMS)