In [1]:
import os
import shutil
import json
import zlib
import traceback
import urllib.request

import pandas as pd
from Bio import Entrez
from Bio import SearchIO
from Bio.SearchIO.HmmerIO.hmmer3_text import Hmmer3TextParser

from eaglib.alignment import SeqsProfileInfo, SeqsProfile
from eaglib.seqs import SeqsDict, load_fasta_to_dict

In [7]:
! pip uninstall -y EAGLE
! pip install ../../dist/EAGLE-0.0.1-py3-none-any.whl

Found existing installation: EAGLE 0.0.1
Uninstalling EAGLE-0.0.1:
  Successfully uninstalled EAGLE-0.0.1
Processing /media/olga/Data/Denis/EAGLE/dist/EAGLE-0.0.1-py3-none-any.whl
Installing collected packages: EAGLE
Successfully installed EAGLE-0.0.1


In [2]:
def get_taxonomy(tax_id):
    tax_keys = ["superkingdom", "phylum", "clade", "class", "order", "family", "genus", "species"]
    tax_dict = {tax_key: None for tax_key in tax_keys}
    
    record = Entrez.efetch(db="taxonomy", id=tax_id, retmode='xml')
    tax_info = Entrez.read(record)[0]
    tax_dict["species"] = tax_info['ScientificName']
    
    for lin_tax in tax_info['LineageEx']:
        if lin_tax['Rank'] in tax_dict:
            tax_dict[lin_tax['Rank']] = lin_tax['ScientificName']
            
    return [tax_dict[tax_key] for tax_key in tax_keys]

In [3]:
arch_16s_profile = SeqsProfile(SeqsProfileInfo.load_from_dict({
    'name': '16S_rRNA_archaea',
    'path': '16S_rRNA_archaea.cm',
    'type': 'rna',
    'weight': 1.0,
    'method': 'infernal'
}))

In [4]:
hsp70_profile = SeqsProfile(SeqsProfileInfo.load_from_dict({
    'name': 'HSP70',
    'path': 'HSP70.hmm',
    'type': 'protein',
    'weight': 1.0,
    'method': 'hmmer'
}))

In [5]:
assembly_summary_path = "archaea_assembly_summary.txt"
Entrez.email = "moshenskydenis@gmail.com"
db_dir = "archaea"

processed_ac = list()
arch_df = pd.read_csv(assembly_summary_path, sep="\t")
# arch_df.query("assembly_level=='Complete Genome' & refseq_category!='representative genome'")
for _, row in arch_df.query("assembly_level=='Complete Genome'").iterrows():
    ac = row['assembly_accession']  # id field in genomes_table
    asm = row['asm_name']
    taxonomy = get_taxonomy(row['species_taxid'])
    name = row['organism_name'] + ("" if pd.isna(row['infraspecific_name']) else " " + row['infraspecific_name'])
    ftp_prefix = (row['ftp_path'] + "/" + ac + "_" + asm).replace(" ", "_")
    fna_seq = [ftp_prefix+"_genomic.fna.gz"]
    # fna_seq = [os.path.join(db_dir, ac+"_genomic.fna")] #
    btc_seqs = [os.path.join(db_dir, ac+"_btc.fasta")]
    rna_path = os.path.join(db_dir, ac+"_rna_from_genomic.fna")
    tcds_path = os.path.join(db_dir, ac+"_translated_cds.faa")
    btc_seqs_dict = dict()
    
    try:
    #     with open(fna_seq[0], 'wb') as gfna_f: #
    #         gfna_f.write(zlib.decompress(urllib.request.urlopen(ftp_prefix+"_genomic.fna.gz").read(), 15+32)) #
        with open(rna_path, 'wb') as rna_f:
            rna_f.write(zlib.decompress(urllib.request.urlopen(ftp_prefix+"_rna_from_genomic.fna.gz").read(), 15+32))
        psr_rna_df = arch_16s_profile.search(seqdb=rna_path, threads=4)
        if not psr_rna_df.empty:
            max_score_rna = psr_rna_df.loc[psr_rna_df["score"].idxmax()]
            btc_seqs_dict[arch_16s_profile.name] = load_fasta_to_dict(fasta_path=rna_path)[max_score_rna["target name"]][max_score_rna["seq from"]-1: max_score_rna["seq to"]]                     
        
        with open(tcds_path, 'wb') as tcds_f:
            tcds_f.write(zlib.decompress(urllib.request.urlopen(ftp_prefix+"_translated_cds.faa.gz").read(), 15+32))
        psr_hsp_df = hsp70_profile.search(seqdb=tcds_path, threads=4)
        if not psr_hsp_df.empty:
            max_score_hsp = psr_hsp_df.loc[psr_hsp_df["domain_score"].idxmax()]
            btc_seqs_dict[hsp70_profile.name] = load_fasta_to_dict(fasta_path=tcds_path)[max_score_hsp["target name"]][max_score_hsp["ali_from"]-1: max_score_hsp["ali_to"]]                     
        
        SeqsDict.load_from_dict(btc_seqs_dict).dump(btc_seqs[0])
        processed_ac.append({"id": ac, "name": name, "taxonomy": taxonomy, "btc_seqs": btc_seqs, "fna_seq": fna_seq})
    except:
        print(traceback.format_exc())
    print(ac, name)

# cmsearch :: search CM(s) against a sequence database
# INFERNAL 1.1.4 (Dec 2020)
# Copyright (C) 2020 Howard Hughes Medical Institute.
# Freely distributed under the BSD open source license.
# - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - -
# query CM file:                         16S_rRNA_archaea.cm
# target sequence database:              archaea/GCF_000762265.1_rna_from_genomic.fna
# tabular output of hits:                16S_rRNA_archaea_out_anXXhVH5xC.psr
# number of worker threads:              4 [--cpu]
# - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - -

Query:       RF01959  [CLEN=1478]
Hit scores:
 rank     E-value  score  bias  sequence                   start    end   mdl trunc   gc  description
 ----   --------- ------ -----  ------------------------- ------ ------   --- ----- ----  -----------
  (1) !         0 1293.4  12.7  lcl|NZ_CP006933.1_rrna_4       3   1475 + hmm     - 0.56  [locus_tag=BRM9_RS01355] [db_xref=RFAM:RF01

# hmmsearch :: search profile(s) against a sequence database
# HMMER 3.3.2 (Nov 2020); http://hmmer.org/
# Copyright (C) 2020 Howard Hughes Medical Institute.
# Freely distributed under the BSD open source license.
# - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - -
# query HMM file:                  HSP70.hmm
# target sequence database:        archaea/GCF_000762265.1_translated_cds.faa
# per-dom hits tabular output:     HSP70_out_wyPnMhHncY.psr
# number of worker threads:        4
# - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - -

Query:       HSP70  [M=599]
Accession:   PF00012.23
Description: Hsp70 protein
Scores for complete sequences (score includes all domains):
   --- full sequence ---   --- best 1 domain ---    -#dom-
    E-value  score  bias    E-value  score  bias    exp  N  Sequence                                   Description
    ------- ------ -----    ------- ------ -----   ---- --  --------                                   

# cmsearch :: search CM(s) against a sequence database
# INFERNAL 1.1.4 (Dec 2020)
# Copyright (C) 2020 Howard Hughes Medical Institute.
# Freely distributed under the BSD open source license.
# - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - -
# query CM file:                         16S_rRNA_archaea.cm
# target sequence database:              archaea/GCF_001458655.1_rna_from_genomic.fna
# tabular output of hits:                16S_rRNA_archaea_out_JNhaF6CTeZ.psr
# number of worker threads:              4 [--cpu]
# - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - -

Query:       RF01959  [CLEN=1478]
Hit scores:
 rank     E-value  score  bias  sequence                   start    end   mdl trunc   gc  description
 ----   --------- ------ -----  ------------------------- ------ ------   --- ----- ----  -----------
  (1) !         0 1293.4  12.7  lcl|NZ_LN734822.1_rrna_30      3   1475 + hmm     - 0.56  [locus_tag=MB9_RS08735] [db_xref=RFAM:RF019

# hmmsearch :: search profile(s) against a sequence database
# HMMER 3.3.2 (Nov 2020); http://hmmer.org/
# Copyright (C) 2020 Howard Hughes Medical Institute.
# Freely distributed under the BSD open source license.
# - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - -
# query HMM file:                  HSP70.hmm
# target sequence database:        archaea/GCF_001458655.1_translated_cds.faa
# per-dom hits tabular output:     HSP70_out_LNgdKscHSv.psr
# number of worker threads:        4
# - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - -

Query:       HSP70  [M=599]
Accession:   PF00012.23
Description: Hsp70 protein
Scores for complete sequences (score includes all domains):
   --- full sequence ---   --- best 1 domain ---    -#dom-
    E-value  score  bias    E-value  score  bias    exp  N  Sequence                                   Description
    ------- ------ -----    ------- ------ -----   ---- --  --------                                   

# cmsearch :: search CM(s) against a sequence database
# INFERNAL 1.1.4 (Dec 2020)
# Copyright (C) 2020 Howard Hughes Medical Institute.
# Freely distributed under the BSD open source license.
# - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - -
# query CM file:                         16S_rRNA_archaea.cm
# target sequence database:              archaea/GCF_002813085.1_rna_from_genomic.fna
# tabular output of hits:                16S_rRNA_archaea_out_zKxTsQ9QpQ.psr
# number of worker threads:              4 [--cpu]
# - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - -

Query:       RF01959  [CLEN=1478]
Hit scores:
 rank     E-value  score  bias  sequence                   start    end   mdl trunc   gc  description
 ----   --------- ------ -----  ------------------------- ------ ------   --- ----- ----  -----------
  (1) !         0 1275.5   6.2  lcl|NZ_CP017803.1_rrna_12      2   1475 + hmm     - 0.53  [locus_tag=BK798_RS02330] [db_xref=RFAM:RF0

# hmmsearch :: search profile(s) against a sequence database
# HMMER 3.3.2 (Nov 2020); http://hmmer.org/
# Copyright (C) 2020 Howard Hughes Medical Institute.
# Freely distributed under the BSD open source license.
# - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - -
# query HMM file:                  HSP70.hmm
# target sequence database:        archaea/GCF_002813085.1_translated_cds.faa
# per-dom hits tabular output:     HSP70_out_2fRvB0VG5W.psr
# number of worker threads:        4
# - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - -

Query:       HSP70  [M=599]
Accession:   PF00012.23
Description: Hsp70 protein
Scores for complete sequences (score includes all domains):
   --- full sequence ---   --- best 1 domain ---    -#dom-
    E-value  score  bias    E-value  score  bias    exp  N  Sequence                                   Description
    ------- ------ -----    ------- ------ -----   ---- --  --------                                   

# cmsearch :: search CM(s) against a sequence database
# INFERNAL 1.1.4 (Dec 2020)
# Copyright (C) 2020 Howard Hughes Medical Institute.
# Freely distributed under the BSD open source license.
# - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - -
# query CM file:                         16S_rRNA_archaea.cm
# target sequence database:              archaea/GCF_022846155.1_rna_from_genomic.fna
# tabular output of hits:                16S_rRNA_archaea_out_ueIuQKkFlD.psr
# number of worker threads:              4 [--cpu]
# - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - -

Query:       RF01959  [CLEN=1478]
Hit scores:
 rank     E-value  score  bias  sequence                   start    end   mdl trunc   gc  description
 ----   --------- ------ -----  ------------------------- ------ ------   --- ----- ----  -----------
  (1) !         0 1275.7   6.1  lcl|NZ_AP025586.1_rrna_6       2   1475 + hmm     - 0.53  [locus_tag=MWL82_RS01290] [db_xref=RFAM:RF0

# hmmsearch :: search profile(s) against a sequence database
# HMMER 3.3.2 (Nov 2020); http://hmmer.org/
# Copyright (C) 2020 Howard Hughes Medical Institute.
# Freely distributed under the BSD open source license.
# - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - -
# query HMM file:                  HSP70.hmm
# target sequence database:        archaea/GCF_022846155.1_translated_cds.faa
# per-dom hits tabular output:     HSP70_out_3nY7Bbgqh1.psr
# number of worker threads:        4
# - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - -

Query:       HSP70  [M=599]
Accession:   PF00012.23
Description: Hsp70 protein
Scores for complete sequences (score includes all domains):
   --- full sequence ---   --- best 1 domain ---    -#dom-
    E-value  score  bias    E-value  score  bias    exp  N  Sequence                                   Description
    ------- ------ -----    ------- ------ -----   ---- --  --------                                   

# cmsearch :: search CM(s) against a sequence database
# INFERNAL 1.1.4 (Dec 2020)
# Copyright (C) 2020 Howard Hughes Medical Institute.
# Freely distributed under the BSD open source license.
# - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - -
# query CM file:                         16S_rRNA_archaea.cm
# target sequence database:              archaea/GCF_022846175.1_rna_from_genomic.fna
# tabular output of hits:                16S_rRNA_archaea_out_KS2wkEiSAQ.psr
# number of worker threads:              4 [--cpu]
# - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - -

Query:       RF01959  [CLEN=1478]
Hit scores:
 rank     E-value  score  bias  sequence                  start    end   mdl trunc   gc  description
 ----   --------- ------ -----  ------------------------ ------ ------   --- ----- ----  -----------
  (1) !         0 1275.7   6.1  lcl|NZ_AP025587.1_rrna_3      2   1475 + hmm     - 0.53  [locus_tag=MWL87_RS00410] [db_xref=RFAM:RF0195

# hmmsearch :: search profile(s) against a sequence database
# HMMER 3.3.2 (Nov 2020); http://hmmer.org/
# Copyright (C) 2020 Howard Hughes Medical Institute.
# Freely distributed under the BSD open source license.
# - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - -
# query HMM file:                  HSP70.hmm
# target sequence database:        archaea/GCF_022846175.1_translated_cds.faa
# per-dom hits tabular output:     HSP70_out_8vOTi3R2gu.psr
# number of worker threads:        4
# - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - -

Query:       HSP70  [M=599]
Accession:   PF00012.23
Description: Hsp70 protein
Scores for complete sequences (score includes all domains):
   --- full sequence ---   --- best 1 domain ---    -#dom-
    E-value  score  bias    E-value  score  bias    exp  N  Sequence                                   Description
    ------- ------ -----    ------- ------ -----   ---- --  --------                                   

# cmsearch :: search CM(s) against a sequence database
# INFERNAL 1.1.4 (Dec 2020)
# Copyright (C) 2020 Howard Hughes Medical Institute.
# Freely distributed under the BSD open source license.
# - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - -
# query CM file:                         16S_rRNA_archaea.cm
# target sequence database:              archaea/GCF_001889405.1_rna_from_genomic.fna
# tabular output of hits:                16S_rRNA_archaea_out_QIriKEUOsT.psr
# number of worker threads:              4 [--cpu]
# - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - -

Query:       RF01959  [CLEN=1478]
Hit scores:
 rank     E-value  score  bias  sequence                   start    end   mdl trunc   gc  description
 ----   --------- ------ -----  ------------------------- ------ ------   --- ----- ----  -----------
  (1) !         0 1267.3   9.6  lcl|NZ_CP017921.1_rrna_18      2   1474 + hmm     - 0.55  [locus_tag=BHR79_RS03985] [db_xref=RFAM:RF0

# hmmsearch :: search profile(s) against a sequence database
# HMMER 3.3.2 (Nov 2020); http://hmmer.org/
# Copyright (C) 2020 Howard Hughes Medical Institute.
# Freely distributed under the BSD open source license.
# - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - -
# query HMM file:                  HSP70.hmm
# target sequence database:        archaea/GCF_001889405.1_translated_cds.faa
# per-dom hits tabular output:     HSP70_out_l1iIoTWZ0G.psr
# number of worker threads:        4
# - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - -

Query:       HSP70  [M=599]
Accession:   PF00012.23
Description: Hsp70 protein
Scores for complete sequences (score includes all domains):
   --- full sequence ---   --- best 1 domain ---    -#dom-
    E-value  score  bias    E-value  score  bias    exp  N  Sequence                                   Description
    ------- ------ -----    ------- ------ -----   ---- --  --------                                   

# hmmsearch :: search profile(s) against a sequence database
# HMMER 3.3.2 (Nov 2020); http://hmmer.org/
# Copyright (C) 2020 Howard Hughes Medical Institute.
# Freely distributed under the BSD open source license.
# - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - -
# query HMM file:                  HSP70.hmm
# target sequence database:        archaea/GCF_019263745.1_translated_cds.faa
# per-dom hits tabular output:     HSP70_out_6PUhK3tzp9.psr
# number of worker threads:        4
# - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - -

Query:       HSP70  [M=599]
Accession:   PF00012.23
Description: Hsp70 protein
Scores for complete sequences (score includes all domains):
   --- full sequence ---   --- best 1 domain ---    -#dom-
    E-value  score  bias    E-value  score  bias    exp  N  Sequence                                   Description
    ------- ------ -----    ------- ------ -----   ---- --  --------                                   

# hmmsearch :: search profile(s) against a sequence database
# HMMER 3.3.2 (Nov 2020); http://hmmer.org/
# Copyright (C) 2020 Howard Hughes Medical Institute.
# Freely distributed under the BSD open source license.
# - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - -
# query HMM file:                  HSP70.hmm
# target sequence database:        archaea/GCF_010706455.1_translated_cds.faa
# per-dom hits tabular output:     HSP70_out_FRa9m2t4PW.psr
# number of worker threads:        4
# - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - -

Query:       HSP70  [M=599]
Accession:   PF00012.23
Description: Hsp70 protein
Scores for complete sequences (score includes all domains):
   --- full sequence ---   --- best 1 domain ---    -#dom-
    E-value  score  bias    E-value  score  bias    exp  N  Sequence                                   Description
    ------- ------ -----    ------- ------ -----   ---- --  --------                                   

# cmsearch :: search CM(s) against a sequence database
# INFERNAL 1.1.4 (Dec 2020)
# Copyright (C) 2020 Howard Hughes Medical Institute.
# Freely distributed under the BSD open source license.
# - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - -
# query CM file:                         16S_rRNA_archaea.cm
# target sequence database:              archaea/GCF_019669945.1_rna_from_genomic.fna
# tabular output of hits:                16S_rRNA_archaea_out_97GpG64BU7.psr
# number of worker threads:              4 [--cpu]
# - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - -

Query:       RF01959  [CLEN=1478]
Hit scores:
 rank     E-value  score  bias  sequence                   start    end   mdl trunc   gc  description
 ----   --------- ------ -----  ------------------------- ------ ------   --- ----- ----  -----------
  (1) !         0 1257.5  12.6  lcl|NZ_AP019780.1_rrna_17      2   1474 + hmm     - 0.57  [locus_tag=MmazTMA_RS04595] [db_xref=RFAM:R

# hmmsearch :: search profile(s) against a sequence database
# HMMER 3.3.2 (Nov 2020); http://hmmer.org/
# Copyright (C) 2020 Howard Hughes Medical Institute.
# Freely distributed under the BSD open source license.
# - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - -
# query HMM file:                  HSP70.hmm
# target sequence database:        archaea/GCF_019669945.1_translated_cds.faa
# per-dom hits tabular output:     HSP70_out_eq1xMfMmId.psr
# number of worker threads:        4
# - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - -

Query:       HSP70  [M=599]
Accession:   PF00012.23
Description: Hsp70 protein
Scores for complete sequences (score includes all domains):
   --- full sequence ---   --- best 1 domain ---    -#dom-
    E-value  score  bias    E-value  score  bias    exp  N  Sequence                                   Description
    ------- ------ -----    ------- ------ -----   ---- --  --------                                   

# cmsearch :: search CM(s) against a sequence database
# INFERNAL 1.1.4 (Dec 2020)
# Copyright (C) 2020 Howard Hughes Medical Institute.
# Freely distributed under the BSD open source license.
# - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - -
# query CM file:                         16S_rRNA_archaea.cm
# target sequence database:              archaea/GCF_004799605.1_rna_from_genomic.fna
# tabular output of hits:                16S_rRNA_archaea_out_p47vxMokOk.psr
# number of worker threads:              4 [--cpu]
# - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - -

Query:       RF01959  [CLEN=1478]
Hit scores:
 rank     E-value  score  bias  sequence                  start    end   mdl trunc   gc  description
 ----   --------- ------ -----  ------------------------ ------ ------   --- ----- ----  -----------
  (1) !         0 1289.7  17.3  lcl|NZ_CP038631.1_rrna_2      2   1472 + hmm     - 0.58  [locus_tag=HBSAL_RS00380] [db_xref=RFAM:RF0195

# hmmsearch :: search profile(s) against a sequence database
# HMMER 3.3.2 (Nov 2020); http://hmmer.org/
# Copyright (C) 2020 Howard Hughes Medical Institute.
# Freely distributed under the BSD open source license.
# - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - -
# query HMM file:                  HSP70.hmm
# target sequence database:        archaea/GCF_004799605.1_translated_cds.faa
# per-dom hits tabular output:     HSP70_out_jEHb2b3BYN.psr
# number of worker threads:        4
# - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - -

Query:       HSP70  [M=599]
Accession:   PF00012.23
Description: Hsp70 protein
Scores for complete sequences (score includes all domains):
   --- full sequence ---   --- best 1 domain ---    -#dom-
    E-value  score  bias    E-value  score  bias    exp  N  Sequence                                  Description
    ------- ------ -----    ------- ------ -----   ---- --  --------                                  --

# hmmsearch :: search profile(s) against a sequence database
# HMMER 3.3.2 (Nov 2020); http://hmmer.org/
# Copyright (C) 2020 Howard Hughes Medical Institute.
# Freely distributed under the BSD open source license.
# - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - -
# query HMM file:                  HSP70.hmm
# target sequence database:        archaea/GCF_009729015.1_translated_cds.faa
# per-dom hits tabular output:     HSP70_out_L3QED8InDw.psr
# number of worker threads:        4
# - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - -

Query:       HSP70  [M=599]
Accession:   PF00012.23
Description: Hsp70 protein
Scores for complete sequences (score includes all domains):
   --- full sequence ---   --- best 1 domain ---    -#dom-
    E-value  score  bias    E-value  score  bias    exp  N  Sequence Description
    ------- ------ -----    ------- ------ -----   ---- --  -------- -----------

   [No hits detected that satisfy reporting thresholds]

# hmmsearch :: search profile(s) against a sequence database
# HMMER 3.3.2 (Nov 2020); http://hmmer.org/
# Copyright (C) 2020 Howard Hughes Medical Institute.
# Freely distributed under the BSD open source license.
# - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - -
# query HMM file:                  HSP70.hmm
# target sequence database:        archaea/GCF_002215405.1_translated_cds.faa
# per-dom hits tabular output:     HSP70_out_pbcQc04GDg.psr
# number of worker threads:        4
# - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - -

Query:       HSP70  [M=599]
Accession:   PF00012.23
Description: Hsp70 protein
Scores for complete sequences (score includes all domains):
   --- full sequence ---   --- best 1 domain ---    -#dom-
    E-value  score  bias    E-value  score  bias    exp  N  Sequence Description
    ------- ------ -----    ------- ------ -----   ---- --  -------- -----------

   [No hits detected that satisfy reporting thresholds]

# hmmsearch :: search profile(s) against a sequence database
# HMMER 3.3.2 (Nov 2020); http://hmmer.org/
# Copyright (C) 2020 Howard Hughes Medical Institute.
# Freely distributed under the BSD open source license.
# - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - -
# query HMM file:                  HSP70.hmm
# target sequence database:        archaea/GCF_002215445.1_translated_cds.faa
# per-dom hits tabular output:     HSP70_out_9Wb48OPIHV.psr
# number of worker threads:        4
# - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - -

Query:       HSP70  [M=599]
Accession:   PF00012.23
Description: Hsp70 protein
Scores for complete sequences (score includes all domains):
   --- full sequence ---   --- best 1 domain ---    -#dom-
    E-value  score  bias    E-value  score  bias    exp  N  Sequence Description
    ------- ------ -----    ------- ------ -----   ---- --  -------- -----------

   [No hits detected that satisfy reporting thresholds]

# hmmsearch :: search profile(s) against a sequence database
# HMMER 3.3.2 (Nov 2020); http://hmmer.org/
# Copyright (C) 2020 Howard Hughes Medical Institute.
# Freely distributed under the BSD open source license.
# - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - -
# query HMM file:                  HSP70.hmm
# target sequence database:        archaea/GCF_002215485.1_translated_cds.faa
# per-dom hits tabular output:     HSP70_out_32Ol86tCrJ.psr
# number of worker threads:        4
# - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - -

Query:       HSP70  [M=599]
Accession:   PF00012.23
Description: Hsp70 protein
Scores for complete sequences (score includes all domains):
   --- full sequence ---   --- best 1 domain ---    -#dom-
    E-value  score  bias    E-value  score  bias    exp  N  Sequence Description
    ------- ------ -----    ------- ------ -----   ---- --  -------- -----------

   [No hits detected that satisfy reporting thresholds]

# hmmsearch :: search profile(s) against a sequence database
# HMMER 3.3.2 (Nov 2020); http://hmmer.org/
# Copyright (C) 2020 Howard Hughes Medical Institute.
# Freely distributed under the BSD open source license.
# - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - -
# query HMM file:                  HSP70.hmm
# target sequence database:        archaea/GCF_002215525.1_translated_cds.faa
# per-dom hits tabular output:     HSP70_out_FYJTU5fHuO.psr
# number of worker threads:        4
# - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - -

Query:       HSP70  [M=599]
Accession:   PF00012.23
Description: Hsp70 protein
Scores for complete sequences (score includes all domains):
   --- full sequence ---   --- best 1 domain ---    -#dom-
    E-value  score  bias    E-value  score  bias    exp  N  Sequence Description
    ------- ------ -----    ------- ------ -----   ---- --  -------- -----------

   [No hits detected that satisfy reporting thresholds]

# hmmsearch :: search profile(s) against a sequence database
# HMMER 3.3.2 (Nov 2020); http://hmmer.org/
# Copyright (C) 2020 Howard Hughes Medical Institute.
# Freely distributed under the BSD open source license.
# - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - -
# query HMM file:                  HSP70.hmm
# target sequence database:        archaea/GCF_002215565.1_translated_cds.faa
# per-dom hits tabular output:     HSP70_out_W9gOZHkehW.psr
# number of worker threads:        4
# - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - -

Query:       HSP70  [M=599]
Accession:   PF00012.23
Description: Hsp70 protein
Scores for complete sequences (score includes all domains):
   --- full sequence ---   --- best 1 domain ---    -#dom-
    E-value  score  bias    E-value  score  bias    exp  N  Sequence Description
    ------- ------ -----    ------- ------ -----   ---- --  -------- -----------

   [No hits detected that satisfy reporting thresholds]

# hmmsearch :: search profile(s) against a sequence database
# HMMER 3.3.2 (Nov 2020); http://hmmer.org/
# Copyright (C) 2020 Howard Hughes Medical Institute.
# Freely distributed under the BSD open source license.
# - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - -
# query HMM file:                  HSP70.hmm
# target sequence database:        archaea/GCF_019175305.1_translated_cds.faa
# per-dom hits tabular output:     HSP70_out_fkU0wPkb2y.psr
# number of worker threads:        4
# - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - -

Query:       HSP70  [M=599]
Accession:   PF00012.23
Description: Hsp70 protein
Scores for complete sequences (score includes all domains):
   --- full sequence ---   --- best 1 domain ---    -#dom-
    E-value  score  bias    E-value  score  bias    exp  N  Sequence Description
    ------- ------ -----    ------- ------ -----   ---- --  -------- -----------

   [No hits detected that satisfy reporting thresholds]

# hmmsearch :: search profile(s) against a sequence database
# HMMER 3.3.2 (Nov 2020); http://hmmer.org/
# Copyright (C) 2020 Howard Hughes Medical Institute.
# Freely distributed under the BSD open source license.
# - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - -
# query HMM file:                  HSP70.hmm
# target sequence database:        archaea/GCF_019175325.1_translated_cds.faa
# per-dom hits tabular output:     HSP70_out_Iht7YD3n8M.psr
# number of worker threads:        4
# - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - -

Query:       HSP70  [M=599]
Accession:   PF00012.23
Description: Hsp70 protein
Scores for complete sequences (score includes all domains):
   --- full sequence ---   --- best 1 domain ---    -#dom-
    E-value  score  bias    E-value  score  bias    exp  N  Sequence                                   Description
    ------- ------ -----    ------- ------ -----   ---- --  --------                                   

# hmmsearch :: search profile(s) against a sequence database
# HMMER 3.3.2 (Nov 2020); http://hmmer.org/
# Copyright (C) 2020 Howard Hughes Medical Institute.
# Freely distributed under the BSD open source license.
# - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - -
# query HMM file:                  HSP70.hmm
# target sequence database:        archaea/GCF_000968355.2_translated_cds.faa
# per-dom hits tabular output:     HSP70_out_I0KNdtE4iq.psr
# number of worker threads:        4
# - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - -

Query:       HSP70  [M=599]
Accession:   PF00012.23
Description: Hsp70 protein
Scores for complete sequences (score includes all domains):
   --- full sequence ---   --- best 1 domain ---    -#dom-
    E-value  score  bias    E-value  score  bias    exp  N  Sequence Description
    ------- ------ -----    ------- ------ -----   ---- --  -------- -----------

   [No hits detected that satisfy reporting thresholds]

# hmmsearch :: search profile(s) against a sequence database
# HMMER 3.3.2 (Nov 2020); http://hmmer.org/
# Copyright (C) 2020 Howard Hughes Medical Institute.
# Freely distributed under the BSD open source license.
# - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - -
# query HMM file:                  HSP70.hmm
# target sequence database:        archaea/GCF_000968395.2_translated_cds.faa
# per-dom hits tabular output:     HSP70_out_7sTMRIWTL0.psr
# number of worker threads:        4
# - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - -

Query:       HSP70  [M=599]
Accession:   PF00012.23
Description: Hsp70 protein
Scores for complete sequences (score includes all domains):
   --- full sequence ---   --- best 1 domain ---    -#dom-
    E-value  score  bias    E-value  score  bias    exp  N  Sequence Description
    ------- ------ -----    ------- ------ -----   ---- --  -------- -----------

   [No hits detected that satisfy reporting thresholds]

# hmmsearch :: search profile(s) against a sequence database
# HMMER 3.3.2 (Nov 2020); http://hmmer.org/
# Copyright (C) 2020 Howard Hughes Medical Institute.
# Freely distributed under the BSD open source license.
# - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - -
# query HMM file:                  HSP70.hmm
# target sequence database:        archaea/GCF_000968435.2_translated_cds.faa
# per-dom hits tabular output:     HSP70_out_ZdObijdrph.psr
# number of worker threads:        4
# - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - -

Query:       HSP70  [M=599]
Accession:   PF00012.23
Description: Hsp70 protein
Scores for complete sequences (score includes all domains):
   --- full sequence ---   --- best 1 domain ---    -#dom-
    E-value  score  bias    E-value  score  bias    exp  N  Sequence Description
    ------- ------ -----    ------- ------ -----   ---- --  -------- -----------

   [No hits detected that satisfy reporting thresholds]

# hmmsearch :: search profile(s) against a sequence database
# HMMER 3.3.2 (Nov 2020); http://hmmer.org/
# Copyright (C) 2020 Howard Hughes Medical Institute.
# Freely distributed under the BSD open source license.
# - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - -
# query HMM file:                  HSP70.hmm
# target sequence database:        archaea/GCF_003852095.1_translated_cds.faa
# per-dom hits tabular output:     HSP70_out_QKc1UJBUGb.psr
# number of worker threads:        4
# - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - -

Query:       HSP70  [M=599]
Accession:   PF00012.23
Description: Hsp70 protein
Scores for complete sequences (score includes all domains):
   --- full sequence ---   --- best 1 domain ---    -#dom-
    E-value  score  bias    E-value  score  bias    exp  N  Sequence Description
    ------- ------ -----    ------- ------ -----   ---- --  -------- -----------

   [No hits detected that satisfy reporting thresholds]

# hmmsearch :: search profile(s) against a sequence database
# HMMER 3.3.2 (Nov 2020); http://hmmer.org/
# Copyright (C) 2020 Howard Hughes Medical Institute.
# Freely distributed under the BSD open source license.
# - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - -
# query HMM file:                  HSP70.hmm
# target sequence database:        archaea/GCF_003852115.1_translated_cds.faa
# per-dom hits tabular output:     HSP70_out_4MFHJVKzJM.psr
# number of worker threads:        4
# - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - -

Query:       HSP70  [M=599]
Accession:   PF00012.23
Description: Hsp70 protein
Scores for complete sequences (score includes all domains):
   --- full sequence ---   --- best 1 domain ---    -#dom-
    E-value  score  bias    E-value  score  bias    exp  N  Sequence Description
    ------- ------ -----    ------- ------ -----   ---- --  -------- -----------

   [No hits detected that satisfy reporting thresholds]

# hmmsearch :: search profile(s) against a sequence database
# HMMER 3.3.2 (Nov 2020); http://hmmer.org/
# Copyright (C) 2020 Howard Hughes Medical Institute.
# Freely distributed under the BSD open source license.
# - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - -
# query HMM file:                  HSP70.hmm
# target sequence database:        archaea/GCF_003852135.1_translated_cds.faa
# per-dom hits tabular output:     HSP70_out_QiXBwRG4mj.psr
# number of worker threads:        4
# - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - -

Query:       HSP70  [M=599]
Accession:   PF00012.23
Description: Hsp70 protein
Scores for complete sequences (score includes all domains):
   --- full sequence ---   --- best 1 domain ---    -#dom-
    E-value  score  bias    E-value  score  bias    exp  N  Sequence Description
    ------- ------ -----    ------- ------ -----   ---- --  -------- -----------

   [No hits detected that satisfy reporting thresholds]

# hmmsearch :: search profile(s) against a sequence database
# HMMER 3.3.2 (Nov 2020); http://hmmer.org/
# Copyright (C) 2020 Howard Hughes Medical Institute.
# Freely distributed under the BSD open source license.
# - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - -
# query HMM file:                  HSP70.hmm
# target sequence database:        archaea/GCF_003852155.1_translated_cds.faa
# per-dom hits tabular output:     HSP70_out_d46hPoPoG2.psr
# number of worker threads:        4
# - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - -

Query:       HSP70  [M=599]
Accession:   PF00012.23
Description: Hsp70 protein
Scores for complete sequences (score includes all domains):
   --- full sequence ---   --- best 1 domain ---    -#dom-
    E-value  score  bias    E-value  score  bias    exp  N  Sequence Description
    ------- ------ -----    ------- ------ -----   ---- --  -------- -----------

   [No hits detected that satisfy reporting thresholds]

# hmmsearch :: search profile(s) against a sequence database
# HMMER 3.3.2 (Nov 2020); http://hmmer.org/
# Copyright (C) 2020 Howard Hughes Medical Institute.
# Freely distributed under the BSD open source license.
# - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - -
# query HMM file:                  HSP70.hmm
# target sequence database:        archaea/GCF_003852175.1_translated_cds.faa
# per-dom hits tabular output:     HSP70_out_A6DSjG1K2k.psr
# number of worker threads:        4
# - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - -

Query:       HSP70  [M=599]
Accession:   PF00012.23
Description: Hsp70 protein
Scores for complete sequences (score includes all domains):
   --- full sequence ---   --- best 1 domain ---    -#dom-
    E-value  score  bias    E-value  score  bias    exp  N  Sequence Description
    ------- ------ -----    ------- ------ -----   ---- --  -------- -----------

   [No hits detected that satisfy reporting thresholds]

# hmmsearch :: search profile(s) against a sequence database
# HMMER 3.3.2 (Nov 2020); http://hmmer.org/
# Copyright (C) 2020 Howard Hughes Medical Institute.
# Freely distributed under the BSD open source license.
# - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - -
# query HMM file:                  HSP70.hmm
# target sequence database:        archaea/GCF_003852195.1_translated_cds.faa
# per-dom hits tabular output:     HSP70_out_X80x4HnnJr.psr
# number of worker threads:        4
# - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - -

Query:       HSP70  [M=599]
Accession:   PF00012.23
Description: Hsp70 protein
Scores for complete sequences (score includes all domains):
   --- full sequence ---   --- best 1 domain ---    -#dom-
    E-value  score  bias    E-value  score  bias    exp  N  Sequence Description
    ------- ------ -----    ------- ------ -----   ---- --  -------- -----------

   [No hits detected that satisfy reporting thresholds]

# hmmsearch :: search profile(s) against a sequence database
# HMMER 3.3.2 (Nov 2020); http://hmmer.org/
# Copyright (C) 2020 Howard Hughes Medical Institute.
# Freely distributed under the BSD open source license.
# - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - -
# query HMM file:                  HSP70.hmm
# target sequence database:        archaea/GCF_003852215.1_translated_cds.faa
# per-dom hits tabular output:     HSP70_out_iab8y4zsaK.psr
# number of worker threads:        4
# - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - -

Query:       HSP70  [M=599]
Accession:   PF00012.23
Description: Hsp70 protein
Scores for complete sequences (score includes all domains):
   --- full sequence ---   --- best 1 domain ---    -#dom-
    E-value  score  bias    E-value  score  bias    exp  N  Sequence Description
    ------- ------ -----    ------- ------ -----   ---- --  -------- -----------

   [No hits detected that satisfy reporting thresholds]

# hmmsearch :: search profile(s) against a sequence database
# HMMER 3.3.2 (Nov 2020); http://hmmer.org/
# Copyright (C) 2020 Howard Hughes Medical Institute.
# Freely distributed under the BSD open source license.
# - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - -
# query HMM file:                  HSP70.hmm
# target sequence database:        archaea/GCF_900079115.1_translated_cds.faa
# per-dom hits tabular output:     HSP70_out_qd8yiVv2RB.psr
# number of worker threads:        4
# - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - -

Query:       HSP70  [M=599]
Accession:   PF00012.23
Description: Hsp70 protein
Scores for complete sequences (score includes all domains):
   --- full sequence ---   --- best 1 domain ---    -#dom-
    E-value  score  bias    E-value  score  bias    exp  N  Sequence Description
    ------- ------ -----    ------- ------ -----   ---- --  -------- -----------

   [No hits detected that satisfy reporting thresholds]

# hmmsearch :: search profile(s) against a sequence database
# HMMER 3.3.2 (Nov 2020); http://hmmer.org/
# Copyright (C) 2020 Howard Hughes Medical Institute.
# Freely distributed under the BSD open source license.
# - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - -
# query HMM file:                  HSP70.hmm
# target sequence database:        archaea/GCF_902384015.1_translated_cds.faa
# per-dom hits tabular output:     HSP70_out_kNcvXSC6IT.psr
# number of worker threads:        4
# - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - -

Query:       HSP70  [M=599]
Accession:   PF00012.23
Description: Hsp70 protein
Scores for complete sequences (score includes all domains):
   --- full sequence ---   --- best 1 domain ---    -#dom-
    E-value  score  bias    E-value  score  bias    exp  N  Sequence                                   Description
    ------- ------ -----    ------- ------ -----   ---- --  --------                                   

# cmsearch :: search CM(s) against a sequence database
# INFERNAL 1.1.4 (Dec 2020)
# Copyright (C) 2020 Howard Hughes Medical Institute.
# Freely distributed under the BSD open source license.
# - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - -
# query CM file:                         16S_rRNA_archaea.cm
# target sequence database:              archaea/GCF_002355655.1_rna_from_genomic.fna
# tabular output of hits:                16S_rRNA_archaea_out_sLmY2qBduF.psr
# number of worker threads:              4 [--cpu]
# - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - -

Query:       RF01959  [CLEN=1478]
Hit scores:
 rank     E-value  score  bias  sequence                   start    end   mdl trunc   gc  description
 ----   --------- ------ -----  ------------------------- ------ ------   --- ----- ----  -----------
  (1) !         0 1275.6  15.3  lcl|NZ_AP017569.1_rrna_1       2   1469 + hmm     - 0.58  [locus_tag=CPZ01_RS00025] [db_xref=RFAM:RF0

# hmmsearch :: search profile(s) against a sequence database
# HMMER 3.3.2 (Nov 2020); http://hmmer.org/
# Copyright (C) 2020 Howard Hughes Medical Institute.
# Freely distributed under the BSD open source license.
# - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - -
# query HMM file:                  HSP70.hmm
# target sequence database:        archaea/GCF_002355655.1_translated_cds.faa
# per-dom hits tabular output:     HSP70_out_avNSZ2gZgo.psr
# number of worker threads:        4
# - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - -

Query:       HSP70  [M=599]
Accession:   PF00012.23
Description: Hsp70 protein
Scores for complete sequences (score includes all domains):
   --- full sequence ---   --- best 1 domain ---    -#dom-
    E-value  score  bias    E-value  score  bias    exp  N  Sequence                                   Description
    ------- ------ -----    ------- ------ -----   ---- --  --------                                   

# cmsearch :: search CM(s) against a sequence database
# INFERNAL 1.1.4 (Dec 2020)
# Copyright (C) 2020 Howard Hughes Medical Institute.
# Freely distributed under the BSD open source license.
# - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - -
# query CM file:                         16S_rRNA_archaea.cm
# target sequence database:              archaea/GCF_018228765.1_rna_from_genomic.fna
# tabular output of hits:                16S_rRNA_archaea_out_T09jnTfqto.psr
# number of worker threads:              4 [--cpu]
# - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - -

Query:       RF01959  [CLEN=1478]
Hit scores:
 rank     E-value  score  bias  sequence                   start    end   mdl trunc   gc  description
 ----   --------- ------ -----  ------------------------- ------ ------   --- ----- ----  -----------
  (1) !         0 1277.5  15.3  lcl|NZ_CP073695.1_rrna_11      2   1469 + hmm     - 0.58  [locus_tag=J7656_RS01170] [db_xref=RFAM:RF0

# hmmsearch :: search profile(s) against a sequence database
# HMMER 3.3.2 (Nov 2020); http://hmmer.org/
# Copyright (C) 2020 Howard Hughes Medical Institute.
# Freely distributed under the BSD open source license.
# - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - -
# query HMM file:                  HSP70.hmm
# target sequence database:        archaea/GCF_018228765.1_translated_cds.faa
# per-dom hits tabular output:     HSP70_out_v0p1m1VKm8.psr
# number of worker threads:        4
# - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - -

Query:       HSP70  [M=599]
Accession:   PF00012.23
Description: Hsp70 protein
Scores for complete sequences (score includes all domains):
   --- full sequence ---   --- best 1 domain ---    -#dom-
    E-value  score  bias    E-value  score  bias    exp  N  Sequence                                   Description
    ------- ------ -----    ------- ------ -----   ---- --  --------                                   

# cmsearch :: search CM(s) against a sequence database
# INFERNAL 1.1.4 (Dec 2020)
# Copyright (C) 2020 Howard Hughes Medical Institute.
# Freely distributed under the BSD open source license.
# - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - -
# query CM file:                         16S_rRNA_archaea.cm
# target sequence database:              archaea/GCF_001190965.1_rna_from_genomic.fna
# tabular output of hits:                16S_rRNA_archaea_out_UJUphI3mb6.psr
# number of worker threads:              4 [--cpu]
# - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - -

Query:       RF01959  [CLEN=1478]
Hit scores:
 rank     E-value  score  bias  sequence                   start    end   mdl trunc   gc  description
 ----   --------- ------ -----  ------------------------- ------ ------   --- ----- ----  -----------
  (1) !         0 1288.0  12.7  lcl|NZ_CP011947.1_rrna_32      2   1472 + hmm     - 0.57  [locus_tag=ABY42_RS08440] [db_xref=RFAM:RF0

# hmmsearch :: search profile(s) against a sequence database
# HMMER 3.3.2 (Nov 2020); http://hmmer.org/
# Copyright (C) 2020 Howard Hughes Medical Institute.
# Freely distributed under the BSD open source license.
# - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - -
# query HMM file:                  HSP70.hmm
# target sequence database:        archaea/GCF_001190965.1_translated_cds.faa
# per-dom hits tabular output:     HSP70_out_ba4O6N453c.psr
# number of worker threads:        4
# - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - -

Query:       HSP70  [M=599]
Accession:   PF00012.23
Description: Hsp70 protein
Scores for complete sequences (score includes all domains):
   --- full sequence ---   --- best 1 domain ---    -#dom-
    E-value  score  bias    E-value  score  bias    exp  N  Sequence                                   Description
    ------- ------ -----    ------- ------ -----   ---- --  --------                                   

# cmsearch :: search CM(s) against a sequence database
# INFERNAL 1.1.4 (Dec 2020)
# Copyright (C) 2020 Howard Hughes Medical Institute.
# Freely distributed under the BSD open source license.
# - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - -
# query CM file:                         16S_rRNA_archaea.cm
# target sequence database:              archaea/GCF_014969745.1_rna_from_genomic.fna
# tabular output of hits:                16S_rRNA_archaea_out_PAqd2vqTqx.psr
# number of worker threads:              4 [--cpu]
# - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - -

Query:       RF01959  [CLEN=1478]
Hit scores:
 rank     E-value  score  bias  sequence                   start    end   mdl trunc   gc  description
 ----   --------- ------ -----  ------------------------- ------ ------   --- ----- ----  -----------
  (1) !         0 1287.7  12.4  lcl|NZ_CP063205.1_rrna_30      2   1472 + hmm     - 0.57  [locus_tag=HfgLR_RS08715] [db_xref=RFAM:RF0

# hmmsearch :: search profile(s) against a sequence database
# HMMER 3.3.2 (Nov 2020); http://hmmer.org/
# Copyright (C) 2020 Howard Hughes Medical Institute.
# Freely distributed under the BSD open source license.
# - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - -
# query HMM file:                  HSP70.hmm
# target sequence database:        archaea/GCF_014969745.1_translated_cds.faa
# per-dom hits tabular output:     HSP70_out_Kk1YzBFQdN.psr
# number of worker threads:        4
# - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - -

Query:       HSP70  [M=599]
Accession:   PF00012.23
Description: Hsp70 protein
Scores for complete sequences (score includes all domains):
   --- full sequence ---   --- best 1 domain ---    -#dom-
    E-value  score  bias    E-value  score  bias    exp  N  Sequence                                   Description
    ------- ------ -----    ------- ------ -----   ---- --  --------                                   

# cmsearch :: search CM(s) against a sequence database
# INFERNAL 1.1.4 (Dec 2020)
# Copyright (C) 2020 Howard Hughes Medical Institute.
# Freely distributed under the BSD open source license.
# - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - -
# query CM file:                         16S_rRNA_archaea.cm
# target sequence database:              archaea/GCF_002945325.1_rna_from_genomic.fna
# tabular output of hits:                16S_rRNA_archaea_out_clRAIQaJ9O.psr
# number of worker threads:              4 [--cpu]
# - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - -

Query:       RF01959  [CLEN=1478]
Hit scores:
 rank     E-value  score  bias  sequence                   start    end   mdl trunc   gc  description
 ----   --------- ------ -----  ------------------------- ------ ------   --- ----- ----  -----------
  (1) !         0 1251.2  14.2  lcl|NZ_CP026606.1_rrna_20      2   1466 + hmm     - 0.57  [locus_tag=MMJJ_RS05305] [db_xref=RFAM:RF01

# hmmsearch :: search profile(s) against a sequence database
# HMMER 3.3.2 (Nov 2020); http://hmmer.org/
# Copyright (C) 2020 Howard Hughes Medical Institute.
# Freely distributed under the BSD open source license.
# - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - -
# query HMM file:                  HSP70.hmm
# target sequence database:        archaea/GCF_002945325.1_translated_cds.faa
# per-dom hits tabular output:     HSP70_out_ckVDGRqrSE.psr
# number of worker threads:        4
# - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - -

Query:       HSP70  [M=599]
Accession:   PF00012.23
Description: Hsp70 protein
Scores for complete sequences (score includes all domains):
   --- full sequence ---   --- best 1 domain ---    -#dom-
    E-value  score  bias    E-value  score  bias    exp  N  Sequence Description
    ------- ------ -----    ------- ------ -----   ---- --  -------- -----------

   [No hits detected that satisfy reporting thresholds]

# hmmsearch :: search profile(s) against a sequence database
# HMMER 3.3.2 (Nov 2020); http://hmmer.org/
# Copyright (C) 2020 Howard Hughes Medical Institute.
# Freely distributed under the BSD open source license.
# - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - -
# query HMM file:                  HSP70.hmm
# target sequence database:        archaea/GCF_003052125.1_translated_cds.faa
# per-dom hits tabular output:     HSP70_out_E5C5FUGOeW.psr
# number of worker threads:        4
# - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - -

Query:       HSP70  [M=599]
Accession:   PF00012.23
Description: Hsp70 protein
Scores for complete sequences (score includes all domains):
   --- full sequence ---   --- best 1 domain ---    -#dom-
    E-value  score  bias    E-value  score  bias    exp  N  Sequence Description
    ------- ------ -----    ------- ------ -----   ---- --  -------- -----------

   [No hits detected that satisfy reporting thresholds]

# hmmsearch :: search profile(s) against a sequence database
# HMMER 3.3.2 (Nov 2020); http://hmmer.org/
# Copyright (C) 2020 Howard Hughes Medical Institute.
# Freely distributed under the BSD open source license.
# - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - -
# query HMM file:                  HSP70.hmm
# target sequence database:        archaea/GCF_019669925.1_translated_cds.faa
# per-dom hits tabular output:     HSP70_out_uqT2nBtjLG.psr
# number of worker threads:        4
# - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - -

Query:       HSP70  [M=599]
Accession:   PF00012.23
Description: Hsp70 protein
Scores for complete sequences (score includes all domains):
   --- full sequence ---   --- best 1 domain ---    -#dom-
    E-value  score  bias    E-value  score  bias    exp  N  Sequence                                   Description
    ------- ------ -----    ------- ------ -----   ---- --  --------                                   

# cmsearch :: search CM(s) against a sequence database
# INFERNAL 1.1.4 (Dec 2020)
# Copyright (C) 2020 Howard Hughes Medical Institute.
# Freely distributed under the BSD open source license.
# - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - -
# query CM file:                         16S_rRNA_archaea.cm
# target sequence database:              archaea/GCF_003201835.2_rna_from_genomic.fna
# tabular output of hits:                16S_rRNA_archaea_out_2Us7AnZxJb.psr
# number of worker threads:              4 [--cpu]
# - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - -

Query:       RF01959  [CLEN=1478]
Hit scores:
 rank     E-value  score  bias  sequence                   start    end   mdl trunc   gc  description
 ----   --------- ------ -----  ------------------------- ------ ------   --- ----- ----  -----------
  (1) !         0 1163.5  22.1  lcl|NZ_CP029289.2_rrna_46      2   1496 + hmm     - 0.61  [locus_tag=DFR85_RS26870] [db_xref=RFAM:RF0

# hmmsearch :: search profile(s) against a sequence database
# HMMER 3.3.2 (Nov 2020); http://hmmer.org/
# Copyright (C) 2020 Howard Hughes Medical Institute.
# Freely distributed under the BSD open source license.
# - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - -
# query HMM file:                  HSP70.hmm
# target sequence database:        archaea/GCF_003201835.2_translated_cds.faa
# per-dom hits tabular output:     HSP70_out_zO4WDupZB5.psr
# number of worker threads:        4
# - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - -

Query:       HSP70  [M=599]
Accession:   PF00012.23
Description: Hsp70 protein
Scores for complete sequences (score includes all domains):
   --- full sequence ---   --- best 1 domain ---    -#dom-
    E-value  score  bias    E-value  score  bias    exp  N  Sequence Description
    ------- ------ -----    ------- ------ -----   ---- --  -------- -----------

   [No hits detected that satisfy reporting thresholds]

# hmmsearch :: search profile(s) against a sequence database
# HMMER 3.3.2 (Nov 2020); http://hmmer.org/
# Copyright (C) 2020 Howard Hughes Medical Institute.
# Freely distributed under the BSD open source license.
# - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - -
# query HMM file:                  HSP70.hmm
# target sequence database:        archaea/GCF_009729035.1_translated_cds.faa
# per-dom hits tabular output:     HSP70_out_muO4Kl3DY3.psr
# number of worker threads:        4
# - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - -

Query:       HSP70  [M=599]
Accession:   PF00012.23
Description: Hsp70 protein
Scores for complete sequences (score includes all domains):
   --- full sequence ---   --- best 1 domain ---    -#dom-
    E-value  score  bias    E-value  score  bias    exp  N  Sequence Description
    ------- ------ -----    ------- ------ -----   ---- --  -------- -----------

   [No hits detected that satisfy reporting thresholds]

# hmmsearch :: search profile(s) against a sequence database
# HMMER 3.3.2 (Nov 2020); http://hmmer.org/
# Copyright (C) 2020 Howard Hughes Medical Institute.
# Freely distributed under the BSD open source license.
# - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - -
# query HMM file:                  HSP70.hmm
# target sequence database:        archaea/GCF_001266655.1_translated_cds.faa
# per-dom hits tabular output:     HSP70_out_OlzPPqwfwg.psr
# number of worker threads:        4
# - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - -

Query:       HSP70  [M=599]
Accession:   PF00012.23
Description: Hsp70 protein
Scores for complete sequences (score includes all domains):
   --- full sequence ---   --- best 1 domain ---    -#dom-
    E-value  score  bias    E-value  score  bias    exp  N  Sequence                                  Description
    ------- ------ -----    ------- ------ -----   ---- --  --------                                  --

# hmmsearch :: search profile(s) against a sequence database
# HMMER 3.3.2 (Nov 2020); http://hmmer.org/
# Copyright (C) 2020 Howard Hughes Medical Institute.
# Freely distributed under the BSD open source license.
# - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - -
# query HMM file:                  HSP70.hmm
# target sequence database:        archaea/GCF_001266675.1_translated_cds.faa
# per-dom hits tabular output:     HSP70_out_RKi4kvCmV2.psr
# number of worker threads:        4
# - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - -

Query:       HSP70  [M=599]
Accession:   PF00012.23
Description: Hsp70 protein
Scores for complete sequences (score includes all domains):
   --- full sequence ---   --- best 1 domain ---    -#dom-
    E-value  score  bias    E-value  score  bias    exp  N  Sequence                                  Description
    ------- ------ -----    ------- ------ -----   ---- --  --------                                  --

# hmmsearch :: search profile(s) against a sequence database
# HMMER 3.3.2 (Nov 2020); http://hmmer.org/
# Copyright (C) 2020 Howard Hughes Medical Institute.
# Freely distributed under the BSD open source license.
# - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - -
# query HMM file:                  HSP70.hmm
# target sequence database:        archaea/GCF_001266695.1_translated_cds.faa
# per-dom hits tabular output:     HSP70_out_t39PQ2DB6c.psr
# number of worker threads:        4
# - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - -

Query:       HSP70  [M=599]
Accession:   PF00012.23
Description: Hsp70 protein
Scores for complete sequences (score includes all domains):
   --- full sequence ---   --- best 1 domain ---    -#dom-
    E-value  score  bias    E-value  score  bias    exp  N  Sequence                                  Description
    ------- ------ -----    ------- ------ -----   ---- --  --------                                  --

# hmmsearch :: search profile(s) against a sequence database
# HMMER 3.3.2 (Nov 2020); http://hmmer.org/
# Copyright (C) 2020 Howard Hughes Medical Institute.
# Freely distributed under the BSD open source license.
# - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - -
# query HMM file:                  HSP70.hmm
# target sequence database:        archaea/GCF_001266715.1_translated_cds.faa
# per-dom hits tabular output:     HSP70_out_V3aKL64jZ0.psr
# number of worker threads:        4
# - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - -

Query:       HSP70  [M=599]
Accession:   PF00012.23
Description: Hsp70 protein
Scores for complete sequences (score includes all domains):
   --- full sequence ---   --- best 1 domain ---    -#dom-
    E-value  score  bias    E-value  score  bias    exp  N  Sequence                                  Description
    ------- ------ -----    ------- ------ -----   ---- --  --------                                  --

# hmmsearch :: search profile(s) against a sequence database
# HMMER 3.3.2 (Nov 2020); http://hmmer.org/
# Copyright (C) 2020 Howard Hughes Medical Institute.
# Freely distributed under the BSD open source license.
# - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - -
# query HMM file:                  HSP70.hmm
# target sequence database:        archaea/GCF_001266735.1_translated_cds.faa
# per-dom hits tabular output:     HSP70_out_tJLo6gG2BZ.psr
# number of worker threads:        4
# - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - -

Query:       HSP70  [M=599]
Accession:   PF00012.23
Description: Hsp70 protein
Scores for complete sequences (score includes all domains):
   --- full sequence ---   --- best 1 domain ---    -#dom-
    E-value  score  bias    E-value  score  bias    exp  N  Sequence                                  Description
    ------- ------ -----    ------- ------ -----   ---- --  --------                                  --

# hmmsearch :: search profile(s) against a sequence database
# HMMER 3.3.2 (Nov 2020); http://hmmer.org/
# Copyright (C) 2020 Howard Hughes Medical Institute.
# Freely distributed under the BSD open source license.
# - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - -
# query HMM file:                  HSP70.hmm
# target sequence database:        archaea/GCF_005222525.1_translated_cds.faa
# per-dom hits tabular output:     HSP70_out_8nCwQYHaGX.psr
# number of worker threads:        4
# - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - -

Query:       HSP70  [M=599]
Accession:   PF00012.23
Description: Hsp70 protein
Scores for complete sequences (score includes all domains):
   --- full sequence ---   --- best 1 domain ---    -#dom-
    E-value  score  bias    E-value  score  bias    exp  N  Sequence                                   Description
    ------- ------ -----    ------- ------ -----   ---- --  --------                                   

# hmmsearch :: search profile(s) against a sequence database
# HMMER 3.3.2 (Nov 2020); http://hmmer.org/
# Copyright (C) 2020 Howard Hughes Medical Institute.
# Freely distributed under the BSD open source license.
# - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - -
# query HMM file:                  HSP70.hmm
# target sequence database:        archaea/GCF_001592435.1_translated_cds.faa
# per-dom hits tabular output:     HSP70_out_rCcW2Ggoph.psr
# number of worker threads:        4
# - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - -

Query:       HSP70  [M=599]
Accession:   PF00012.23
Description: Hsp70 protein
Scores for complete sequences (score includes all domains):
   --- full sequence ---   --- best 1 domain ---    -#dom-
    E-value  score  bias    E-value  score  bias    exp  N  Sequence Description
    ------- ------ -----    ------- ------ -----   ---- --  -------- -----------

   [No hits detected that satisfy reporting thresholds]

# hmmsearch :: search profile(s) against a sequence database
# HMMER 3.3.2 (Nov 2020); http://hmmer.org/
# Copyright (C) 2020 Howard Hughes Medical Institute.
# Freely distributed under the BSD open source license.
# - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - -
# query HMM file:                  HSP70.hmm
# target sequence database:        archaea/GCF_002214465.1_translated_cds.faa
# per-dom hits tabular output:     HSP70_out_F0Y2JbOWH6.psr
# number of worker threads:        4
# - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - -

Query:       HSP70  [M=599]
Accession:   PF00012.23
Description: Hsp70 protein
Scores for complete sequences (score includes all domains):
   --- full sequence ---   --- best 1 domain ---    -#dom-
    E-value  score  bias    E-value  score  bias    exp  N  Sequence Description
    ------- ------ -----    ------- ------ -----   ---- --  -------- -----------

   [No hits detected that satisfy reporting thresholds]

# hmmsearch :: search profile(s) against a sequence database
# HMMER 3.3.2 (Nov 2020); http://hmmer.org/
# Copyright (C) 2020 Howard Hughes Medical Institute.
# Freely distributed under the BSD open source license.
# - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - -
# query HMM file:                  HSP70.hmm
# target sequence database:        archaea/GCF_014962245.1_translated_cds.faa
# per-dom hits tabular output:     HSP70_out_C6RBHKBTiY.psr
# number of worker threads:        4
# - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - -

Query:       HSP70  [M=599]
Accession:   PF00012.23
Description: Hsp70 protein
Scores for complete sequences (score includes all domains):
   --- full sequence ---   --- best 1 domain ---    -#dom-
    E-value  score  bias    E-value  score  bias    exp  N  Sequence Description
    ------- ------ -----    ------- ------ -----   ---- --  -------- -----------

   [No hits detected that satisfy reporting thresholds]

Traceback (most recent call last):
  File "/tmp/ipykernel_10920/2259543226.py", line 31, in <cell line: 8>
    tcds_f.write(zlib.decompress(urllib.request.urlopen(ftp_prefix+"_translated_cds.faa.gz").read(), 15+32))
  File "/home/olga/miniconda3/envs/eagle/lib/python3.8/urllib/request.py", line 222, in urlopen
    return opener.open(url, data, timeout)
  File "/home/olga/miniconda3/envs/eagle/lib/python3.8/urllib/request.py", line 525, in open
    response = self._open(req, data)
  File "/home/olga/miniconda3/envs/eagle/lib/python3.8/urllib/request.py", line 542, in _open
    result = self._call_chain(self.handle_open, protocol, protocol +
  File "/home/olga/miniconda3/envs/eagle/lib/python3.8/urllib/request.py", line 502, in _call_chain
    result = func(*args)
  File "/home/olga/miniconda3/envs/eagle/lib/python3.8/urllib/request.py", line 1397, in https_open
    return self.do_open(http.client.HTTPSConnection, req,
  File "/home/olga/miniconda3/envs/eagle/lib/python3.8/urllib/request

# hmmsearch :: search profile(s) against a sequence database
# HMMER 3.3.2 (Nov 2020); http://hmmer.org/
# Copyright (C) 2020 Howard Hughes Medical Institute.
# Freely distributed under the BSD open source license.
# - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - -
# query HMM file:                  HSP70.hmm
# target sequence database:        archaea/GCF_900012635.1_translated_cds.faa
# per-dom hits tabular output:     HSP70_out_NSd1nupGPU.psr
# number of worker threads:        4
# - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - -

Query:       HSP70  [M=599]
Accession:   PF00012.23
Description: Hsp70 protein
Scores for complete sequences (score includes all domains):
   --- full sequence ---   --- best 1 domain ---    -#dom-
    E-value  score  bias    E-value  score  bias    exp  N  Sequence                                   Description
    ------- ------ -----    ------- ------ -----   ---- --  --------                                   

# hmmsearch :: search profile(s) against a sequence database
# HMMER 3.3.2 (Nov 2020); http://hmmer.org/
# Copyright (C) 2020 Howard Hughes Medical Institute.
# Freely distributed under the BSD open source license.
# - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - -
# query HMM file:                  HSP70.hmm
# target sequence database:        archaea/GCF_001433455.1_translated_cds.faa
# per-dom hits tabular output:     HSP70_out_J4bgupyXge.psr
# number of worker threads:        4
# - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - -

Query:       HSP70  [M=599]
Accession:   PF00012.23
Description: Hsp70 protein
Scores for complete sequences (score includes all domains):
   --- full sequence ---   --- best 1 domain ---    -#dom-
    E-value  score  bias    E-value  score  bias    exp  N  Sequence                                   Description
    ------- ------ -----    ------- ------ -----   ---- --  --------                                   

# hmmsearch :: search profile(s) against a sequence database
# HMMER 3.3.2 (Nov 2020); http://hmmer.org/
# Copyright (C) 2020 Howard Hughes Medical Institute.
# Freely distributed under the BSD open source license.
# - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - -
# query HMM file:                  HSP70.hmm
# target sequence database:        archaea/GCF_004803735.1_translated_cds.faa
# per-dom hits tabular output:     HSP70_out_uhqBbUuQOZ.psr
# number of worker threads:        4
# - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - -

Query:       HSP70  [M=599]
Accession:   PF00012.23
Description: Hsp70 protein
Scores for complete sequences (score includes all domains):
   --- full sequence ---   --- best 1 domain ---    -#dom-
    E-value  score  bias    E-value  score  bias    exp  N  Sequence                                   Description
    ------- ------ -----    ------- ------ -----   ---- --  --------                                   

# cmsearch :: search CM(s) against a sequence database
# INFERNAL 1.1.4 (Dec 2020)
# Copyright (C) 2020 Howard Hughes Medical Institute.
# Freely distributed under the BSD open source license.
# - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - -
# query CM file:                         16S_rRNA_archaea.cm
# target sequence database:              archaea/GCF_002813655.1_rna_from_genomic.fna
# tabular output of hits:                16S_rRNA_archaea_out_q7zFuKmupR.psr
# number of worker threads:              4 [--cpu]
# - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - -

Query:       RF01959  [CLEN=1478]
Hit scores:
 rank     E-value  score  bias  sequence                   start    end   mdl trunc   gc  description
 ----   --------- ------ -----  ------------------------- ------ ------   --- ----- ----  -----------
  (1) !         0 1293.4  11.8  lcl|NZ_CP017766.1_rrna_24      2   1480 + hmm     - 0.56  [locus_tag=BK007_RS05635] [db_xref=RFAM:RF0

# hmmsearch :: search profile(s) against a sequence database
# HMMER 3.3.2 (Nov 2020); http://hmmer.org/
# Copyright (C) 2020 Howard Hughes Medical Institute.
# Freely distributed under the BSD open source license.
# - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - -
# query HMM file:                  HSP70.hmm
# target sequence database:        archaea/GCF_002813655.1_translated_cds.faa
# per-dom hits tabular output:     HSP70_out_aQawTISSpU.psr
# number of worker threads:        4
# - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - -

Query:       HSP70  [M=599]
Accession:   PF00012.23
Description: Hsp70 protein
Scores for complete sequences (score includes all domains):
   --- full sequence ---   --- best 1 domain ---    -#dom-
    E-value  score  bias    E-value  score  bias    exp  N  Sequence                                  Description
    ------- ------ -----    ------- ------ -----   ---- --  --------                                  --

# cmsearch :: search CM(s) against a sequence database
# INFERNAL 1.1.4 (Dec 2020)
# Copyright (C) 2020 Howard Hughes Medical Institute.
# Freely distributed under the BSD open source license.
# - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - -
# query CM file:                         16S_rRNA_archaea.cm
# target sequence database:              archaea/GCF_002813695.1_rna_from_genomic.fna
# tabular output of hits:                16S_rRNA_archaea_out_uchsYm4lja.psr
# number of worker threads:              4 [--cpu]
# - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - -

Query:       RF01959  [CLEN=1478]
Hit scores:
 rank     E-value  score  bias  sequence                   start    end   mdl trunc   gc  description
 ----   --------- ------ -----  ------------------------- ------ ------   --- ----- ----  -----------
  (1) !         0 1293.4  11.8  lcl|NZ_CP017768.1_rrna_17      2   1480 + hmm     - 0.56  [locus_tag=BK009_RS06625] [db_xref=RFAM:RF0

# hmmsearch :: search profile(s) against a sequence database
# HMMER 3.3.2 (Nov 2020); http://hmmer.org/
# Copyright (C) 2020 Howard Hughes Medical Institute.
# Freely distributed under the BSD open source license.
# - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - -
# query HMM file:                  HSP70.hmm
# target sequence database:        archaea/GCF_002813695.1_translated_cds.faa
# per-dom hits tabular output:     HSP70_out_DFGj2UANHj.psr
# number of worker threads:        4
# - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - -

Query:       HSP70  [M=599]
Accession:   PF00012.23
Description: Hsp70 protein
Scores for complete sequences (score includes all domains):
   --- full sequence ---   --- best 1 domain ---    -#dom-
    E-value  score  bias    E-value  score  bias    exp  N  Sequence                                   Description
    ------- ------ -----    ------- ------ -----   ---- --  --------                                   

# cmsearch :: search CM(s) against a sequence database
# INFERNAL 1.1.4 (Dec 2020)
# Copyright (C) 2020 Howard Hughes Medical Institute.
# Freely distributed under the BSD open source license.
# - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - -
# query CM file:                         16S_rRNA_archaea.cm
# target sequence database:              archaea/GCF_010692885.1_rna_from_genomic.fna
# tabular output of hits:                16S_rRNA_archaea_out_rZSmfihu1K.psr
# number of worker threads:              4 [--cpu]
# - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - -

Query:       RF01959  [CLEN=1478]
Hit scores:
 rank     E-value  score  bias  sequence                   start    end   mdl trunc   gc  description
 ----   --------- ------ -----  ------------------------- ------ ------   --- ----- ----  -----------
  (1) !         0 1274.7  11.3  lcl|NZ_CP048739.1_rrna_56      2   1471 + hmm     - 0.57  [locus_tag=G3I44_RS19290] [db_xref=RFAM:RF0

# hmmsearch :: search profile(s) against a sequence database
# HMMER 3.3.2 (Nov 2020); http://hmmer.org/
# Copyright (C) 2020 Howard Hughes Medical Institute.
# Freely distributed under the BSD open source license.
# - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - -
# query HMM file:                  HSP70.hmm
# target sequence database:        archaea/GCF_010692885.1_translated_cds.faa
# per-dom hits tabular output:     HSP70_out_MKBZ1PpgsK.psr
# number of worker threads:        4
# - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - -

Query:       HSP70  [M=599]
Accession:   PF00012.23
Description: Hsp70 protein
Scores for complete sequences (score includes all domains):
   --- full sequence ---   --- best 1 domain ---    -#dom-
    E-value  score  bias    E-value  score  bias    exp  N  Sequence                                   Description
    ------- ------ -----    ------- ------ -----   ---- --  --------                                   

# hmmsearch :: search profile(s) against a sequence database
# HMMER 3.3.2 (Nov 2020); http://hmmer.org/
# Copyright (C) 2020 Howard Hughes Medical Institute.
# Freely distributed under the BSD open source license.
# - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - -
# query HMM file:                  HSP70.hmm
# target sequence database:        archaea/GCF_004799645.1_translated_cds.faa
# per-dom hits tabular output:     HSP70_out_joJSlGD3mB.psr
# number of worker threads:        4
# - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - -

Query:       HSP70  [M=599]
Accession:   PF00012.23
Description: Hsp70 protein
Scores for complete sequences (score includes all domains):
   --- full sequence ---   --- best 1 domain ---    -#dom-
    E-value  score  bias    E-value  score  bias    exp  N  Sequence                                   Description
    ------- ------ -----    ------- ------ -----   ---- --  --------                                   

Traceback (most recent call last):
  File "/tmp/ipykernel_10920/2259543226.py", line 25, in <cell line: 8>
    rna_f.write(zlib.decompress(urllib.request.urlopen(ftp_prefix+"_rna_from_genomic.fna.gz").read(), 15+32))
  File "/home/olga/miniconda3/envs/eagle/lib/python3.8/urllib/request.py", line 222, in urlopen
    return opener.open(url, data, timeout)
  File "/home/olga/miniconda3/envs/eagle/lib/python3.8/urllib/request.py", line 525, in open
    response = self._open(req, data)
  File "/home/olga/miniconda3/envs/eagle/lib/python3.8/urllib/request.py", line 542, in _open
    result = self._call_chain(self.handle_open, protocol, protocol +
  File "/home/olga/miniconda3/envs/eagle/lib/python3.8/urllib/request.py", line 502, in _call_chain
    result = func(*args)
  File "/home/olga/miniconda3/envs/eagle/lib/python3.8/urllib/request.py", line 1397, in https_open
    return self.do_open(http.client.HTTPSConnection, req,
  File "/home/olga/miniconda3/envs/eagle/lib/python3.8/urllib/reques

# hmmsearch :: search profile(s) against a sequence database
# HMMER 3.3.2 (Nov 2020); http://hmmer.org/
# Copyright (C) 2020 Howard Hughes Medical Institute.
# Freely distributed under the BSD open source license.
# - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - -
# query HMM file:                  HSP70.hmm
# target sequence database:        archaea/GCF_000009965.1_translated_cds.faa
# per-dom hits tabular output:     HSP70_out_Rhsz4AEEAt.psr
# number of worker threads:        4
# - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - -

Query:       HSP70  [M=599]
Accession:   PF00012.23
Description: Hsp70 protein
Scores for complete sequences (score includes all domains):
   --- full sequence ---   --- best 1 domain ---    -#dom-
    E-value  score  bias    E-value  score  bias    exp  N  Sequence Description
    ------- ------ -----    ------- ------ -----   ---- --  -------- -----------

   [No hits detected that satisfy reporting thresholds]

# hmmsearch :: search profile(s) against a sequence database
# HMMER 3.3.2 (Nov 2020); http://hmmer.org/
# Copyright (C) 2020 Howard Hughes Medical Institute.
# Freely distributed under the BSD open source license.
# - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - -
# query HMM file:                  HSP70.hmm
# target sequence database:        archaea/GCF_005890195.1_translated_cds.faa
# per-dom hits tabular output:     HSP70_out_avwnVbJnWP.psr
# number of worker threads:        4
# - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - -

Query:       HSP70  [M=599]
Accession:   PF00012.23
Description: Hsp70 protein
Scores for complete sequences (score includes all domains):
   --- full sequence ---   --- best 1 domain ---    -#dom-
    E-value  score  bias    E-value  score  bias    exp  N  Sequence                                   Description
    ------- ------ -----    ------- ------ -----   ---- --  --------                                   

# cmsearch :: search CM(s) against a sequence database
# INFERNAL 1.1.4 (Dec 2020)
# Copyright (C) 2020 Howard Hughes Medical Institute.
# Freely distributed under the BSD open source license.
# - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - -
# query CM file:                         16S_rRNA_archaea.cm
# target sequence database:              archaea/GCF_009729055.1_rna_from_genomic.fna
# tabular output of hits:                16S_rRNA_archaea_out_gfHEt3cDG3.psr
# number of worker threads:              4 [--cpu]
# - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - -

Query:       RF01959  [CLEN=1478]
Hit scores:
 rank     E-value  score  bias  sequence                   start    end   mdl trunc   gc  description
 ----   --------- ------ -----  ------------------------- ------ ------   --- ----- ----  -----------
  (1) !         0 1176.5  28.1  lcl|NZ_CP045484.1_rrna_33      2   1494 + hmm     - 0.64  [locus_tag=D1869_RS06875] [db_xref=RFAM:RF0

# hmmsearch :: search profile(s) against a sequence database
# HMMER 3.3.2 (Nov 2020); http://hmmer.org/
# Copyright (C) 2020 Howard Hughes Medical Institute.
# Freely distributed under the BSD open source license.
# - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - -
# query HMM file:                  HSP70.hmm
# target sequence database:        archaea/GCF_009729055.1_translated_cds.faa
# per-dom hits tabular output:     HSP70_out_TiJiS3dpO7.psr
# number of worker threads:        4
# - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - -

Query:       HSP70  [M=599]
Accession:   PF00012.23
Description: Hsp70 protein
Scores for complete sequences (score includes all domains):
   --- full sequence ---   --- best 1 domain ---    -#dom-
    E-value  score  bias    E-value  score  bias    exp  N  Sequence                                   Description
    ------- ------ -----    ------- ------ -----   ---- --  --------                                   

# hmmsearch :: search profile(s) against a sequence database
# HMMER 3.3.2 (Nov 2020); http://hmmer.org/
# Copyright (C) 2020 Howard Hughes Medical Institute.
# Freely distributed under the BSD open source license.
# - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - -
# query HMM file:                  HSP70.hmm
# target sequence database:        archaea/GCF_002214385.1_translated_cds.faa
# per-dom hits tabular output:     HSP70_out_sZsC77sNOg.psr
# number of worker threads:        4
# - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - -

Query:       HSP70  [M=599]
Accession:   PF00012.23
Description: Hsp70 protein
Scores for complete sequences (score includes all domains):
   --- full sequence ---   --- best 1 domain ---    -#dom-
    E-value  score  bias    E-value  score  bias    exp  N  Sequence Description
    ------- ------ -----    ------- ------ -----   ---- --  -------- -----------

   [No hits detected that satisfy reporting thresholds]

Traceback (most recent call last):
  File "/tmp/ipykernel_10920/2259543226.py", line 31, in <cell line: 8>
    tcds_f.write(zlib.decompress(urllib.request.urlopen(ftp_prefix+"_translated_cds.faa.gz").read(), 15+32))
  File "/home/olga/miniconda3/envs/eagle/lib/python3.8/http/client.py", line 472, in read
    s = self._safe_read(self.length)
  File "/home/olga/miniconda3/envs/eagle/lib/python3.8/http/client.py", line 613, in _safe_read
    data = self.fp.read(amt)
  File "/home/olga/miniconda3/envs/eagle/lib/python3.8/socket.py", line 669, in readinto
    return self._sock.recv_into(b)
  File "/home/olga/miniconda3/envs/eagle/lib/python3.8/ssl.py", line 1241, in recv_into
    return self.read(nbytes, buffer)
  File "/home/olga/miniconda3/envs/eagle/lib/python3.8/ssl.py", line 1099, in read
    return self._sslobj.read(len, buffer)
KeyboardInterrupt

GCF_002214485.1 Thermococcus pacificus strain=P-4
# cmsearch :: search CM(s) against a sequence database
# INFERNAL 1.1.4 (Dec 2020)
# Copy

# hmmsearch :: search profile(s) against a sequence database
# HMMER 3.3.2 (Nov 2020); http://hmmer.org/
# Copyright (C) 2020 Howard Hughes Medical Institute.
# Freely distributed under the BSD open source license.
# - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - -
# query HMM file:                  HSP70.hmm
# target sequence database:        archaea/GCF_002214505.1_translated_cds.faa
# per-dom hits tabular output:     HSP70_out_nit9hEBE2p.psr
# number of worker threads:        4
# - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - -

Query:       HSP70  [M=599]
Accession:   PF00012.23
Description: Hsp70 protein
Scores for complete sequences (score includes all domains):
   --- full sequence ---   --- best 1 domain ---    -#dom-
    E-value  score  bias    E-value  score  bias    exp  N  Sequence Description
    ------- ------ -----    ------- ------ -----   ---- --  -------- -----------

   [No hits detected that satisfy reporting thresholds]

# cmsearch :: search CM(s) against a sequence database
# INFERNAL 1.1.4 (Dec 2020)
# Copyright (C) 2020 Howard Hughes Medical Institute.
# Freely distributed under the BSD open source license.
# - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - -
# query CM file:                         16S_rRNA_archaea.cm
# target sequence database:              archaea/GCF_000145295.1_rna_from_genomic.fna
# tabular output of hits:                16S_rRNA_archaea_out_gDrbYyzuoq.psr
# number of worker threads:              4 [--cpu]
# - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - -

Query:       RF01959  [CLEN=1478]
Hit scores:
 rank     E-value  score  bias  sequence                 start    end   mdl trunc   gc  description
 ----   --------- ------ -----  ----------------------- ------ ------   --- ----- ----  -----------
  (1) !         0 1286.6  16.8  lcl|NC_014408.1_rrna_8       3   1481 + hmm     - 0.58  [locus_tag=MTBMA_RS01620] [db_xref=RFAM:RF01959] 

# hmmsearch :: search profile(s) against a sequence database
# HMMER 3.3.2 (Nov 2020); http://hmmer.org/
# Copyright (C) 2020 Howard Hughes Medical Institute.
# Freely distributed under the BSD open source license.
# - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - -
# query HMM file:                  HSP70.hmm
# target sequence database:        archaea/GCF_000145295.1_translated_cds.faa
# per-dom hits tabular output:     HSP70_out_17EHaDNT9s.psr
# number of worker threads:        4
# - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - -

Query:       HSP70  [M=599]
Accession:   PF00012.23
Description: Hsp70 protein
Scores for complete sequences (score includes all domains):
   --- full sequence ---   --- best 1 domain ---    -#dom-
    E-value  score  bias    E-value  score  bias    exp  N  Sequence                                 Description
    ------- ------ -----    ------- ------ -----   ---- --  --------                                 ----

# cmsearch :: search CM(s) against a sequence database
# INFERNAL 1.1.4 (Dec 2020)
# Copyright (C) 2020 Howard Hughes Medical Institute.
# Freely distributed under the BSD open source license.
# - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - -
# query CM file:                         16S_rRNA_archaea.cm
# target sequence database:              archaea/GCF_900036045.1_rna_from_genomic.fna
# tabular output of hits:                16S_rRNA_archaea_out_kOW8fcABHY.psr
# number of worker threads:              4 [--cpu]
# - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - -

Query:       RF01959  [CLEN=1478]
Hit scores:
 rank     E-value  score  bias  sequence                   start    end   mdl trunc   gc  description
 ----   --------- ------ -----  ------------------------- ------ ------   --- ----- ----  -----------
  (1) !         0 1261.4  13.7  lcl|NZ_LT158599.1_rrna_12      2   1466 + hmm     - 0.56  [locus_tag=MAB1_RS02835] [db_xref=RFAM:RF01

# hmmsearch :: search profile(s) against a sequence database
# HMMER 3.3.2 (Nov 2020); http://hmmer.org/
# Copyright (C) 2020 Howard Hughes Medical Institute.
# Freely distributed under the BSD open source license.
# - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - -
# query HMM file:                  HSP70.hmm
# target sequence database:        archaea/GCF_900036045.1_translated_cds.faa
# per-dom hits tabular output:     HSP70_out_A3UgH4i5ft.psr
# number of worker threads:        4
# - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - -

Query:       HSP70  [M=599]
Accession:   PF00012.23
Description: Hsp70 protein
Scores for complete sequences (score includes all domains):
   --- full sequence ---   --- best 1 domain ---    -#dom-
    E-value  score  bias    E-value  score  bias    exp  N  Sequence                                   Description
    ------- ------ -----    ------- ------ -----   ---- --  --------                                   

KeyboardInterrupt: 

In [5]:
arch_16s_profile_ss = SeqsProfile.build(mult_aln="RF01959.stockholm", name="16S_rRNA_archaea_ss", method="infernal", seqs_type="rna", noss=False)

# cmbuild :: covariance model construction from multiple sequence alignments
# INFERNAL 1.1.4 (Dec 2020)
# Copyright (C) 2020 Howard Hughes Medical Institute.
# Freely distributed under the BSD open source license.
# - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - -
# CM file:                                            16S_rRNA_archaea_ss.cm
# alignment file:                                     RF01959.stockholm
# - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - -
#                                                                      rel entropy
#                                                                      -----------
# idx    name                     nseq eff_nseq   alen  clen  bps bifs    CM   HMM description
# ------ -------------------- -------- -------- ------ ----- ---- ---- ----- ----- -----------
       1 SSU_rRNA_archaea           86     1.06   1958  1478  457   30 0.589 0.302 Archaeal small subunit ribosomal RNA
#
# CPU time

Killed


In [5]:
psr = arch_16s_profile.search(seqdb="archaea/GCF_000762265.1_rna_from_genomic.fna", threads=4)

# cmsearch :: search CM(s) against a sequence database
# INFERNAL 1.1.4 (Dec 2020)
# Copyright (C) 2020 Howard Hughes Medical Institute.
# Freely distributed under the BSD open source license.
# - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - -
# query CM file:                         16S_rRNA_archaea.cm
# target sequence database:              archaea/GCF_000762265.1_rna_from_genomic.fna
# tabular output of hits:                16S_rRNA_archaea_out_4KCpOJatpl.psr
# number of worker threads:              4 [--cpu]
# - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - -

Query:       RF01959  [CLEN=1478]
Hit scores:
 rank     E-value  score  bias  sequence                   start    end   mdl trunc   gc  description
 ----   --------- ------ -----  ------------------------- ------ ------   --- ----- ----  -----------
  (1) !         0 1293.4  12.7  lcl|NZ_CP006933.1_rrna_4       3   1475 + hmm     - 0.56  [locus_tag=BRM9_RS01355] [db_xref=RFAM:RF01

In [6]:
psr["score"].idxmax()

0

In [7]:
psr1 = hsp70_profile.search(seqdb="archaea/GCF_000762265.1_translated_cds.faa", threads=4)

# hmmsearch :: search profile(s) against a sequence database
# HMMER 3.3.2 (Nov 2020); http://hmmer.org/
# Copyright (C) 2020 Howard Hughes Medical Institute.
# Freely distributed under the BSD open source license.
# - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - -
# query HMM file:                  HSP70.hmm
# target sequence database:        archaea/GCF_000762265.1_translated_cds.faa
# per-dom hits tabular output:     HSP70_out_GFNDWEhvCq.psr
# number of worker threads:        4
# - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - -

Query:       HSP70  [M=599]
Accession:   PF00012.23
Description: Hsp70 protein
Scores for complete sequences (score includes all domains):
   --- full sequence ---   --- best 1 domain ---    -#dom-
    E-value  score  bias    E-value  score  bias    exp  N  Sequence                                   Description
    ------- ------ -----    ------- ------ -----   ---- --  --------                                   

In [12]:
psr1[['target name', 'seq_score', 'domain_score', 'ali_from', 'ali_to']]

Unnamed: 0,target name,seq_score,domain_score,ali_from,ali_to
0,lcl|NZ_CP006933.1_prot_WP_048085785.1_2239,760.4,73.0,7,85
1,lcl|NZ_CP006933.1_prot_WP_048085785.1_2239,760.4,690.1,85,584
2,lcl|NZ_CP006933.1_prot_WP_048085701.1_2093,50.6,4.4,22,70
3,lcl|NZ_CP006933.1_prot_WP_048085701.1_2093,50.6,45.3,112,342


In [8]:
df = pd.DataFrame(columns=['target name', 'seq_score', 'domain_score', 'ali_from', 'ali_to'])
df['seq_score'].idxmax()

TypeError: reduction operation 'argmax' not allowed for this dtype