diff --git a/app/MainBase.py b/app/MainBase.py
index 3312217..16e7250 100644
--- a/app/MainBase.py
+++ b/app/MainBase.py
@@ -6,6 +6,7 @@
 import app.Parser
 import app.load
 import app.clean
+import app.build_kmer_sets
 import app.card_annotation
 import app.wildcard_annotation
 import app.remove_duplicates
@@ -30,11 +31,12 @@ def __init__(self, api=False):
                baits_annotation create fasta files with annotations from baits (Experimental)
                remove_duplicates removes duplicate sequences (Experimental)
                heatmap  heatmap for multiple analysis (Experimental)
+               kmer     Build kmer sets (Experimental)
                database Information on installed card database'''
 
         parser = argparse.ArgumentParser(prog="rgi", description='{} - {}'.format(APP_NAME, SOFTWARE_VERSION), epilog=SOFTWARE_SUMMARY, usage=USAGE)
         parser.add_argument('command', choices=['main', 'tab', 'parser', 'load',
-                                                'clean', 'galaxy', 'database', 'bwt', 'card_annotation', 'wildcard_annotation', 'baits_annotation', 'remove_duplicates', 'heatmap'],
+                                                'clean', 'galaxy', 'database', 'bwt', 'card_annotation', 'wildcard_annotation', 'baits_annotation', 'remove_duplicates', 'heatmap', 'kmer'],
                                                 help='Subcommand to run')
 
         if api == False:
@@ -121,6 +123,18 @@ def load_args(self):
     def load_run(self, args):
         app.load.main(args)
 
+    def kmer(self):
+        parser = self.kmer_args()
+        args = parser.parse_args(sys.argv[2:])
+        self.kmer_run(args)
+
+    def kmer_args(self):
+        parser = app.build_kmer_sets.create_parser()
+        return parser
+
+    def kmer_run(self, args):
+        app.build_kmer_sets.main(args)
+        
     def card_annotation(self):
         parser = self.card_annotation_args()
         args = parser.parse_args(sys.argv[2:])
diff --git a/app/build_kmer_sets.py b/app/build_kmer_sets.py
new file mode 100644
index 0000000..f1e958d
--- /dev/null
+++ b/app/build_kmer_sets.py
@@ -0,0 +1,317 @@
+import os, sys, json, csv, argparse
+import app.make_kmer_json
+from Bio import SeqIO, Seq
+
+"""
+This script builds the CARD*k-mer sets.
+Please provide location to the CARD*Resistomes&Variants nucleotide FASTAs
+    and the location to the index file (index-for-model-sequences.txt).
+Please ensure that scripts "v3_kmer.py", "v3_kmer_species.py", and "plasmids.py"
+    are all located in your working directory.
+"""
+
+working_directory = os.path.join(os.getcwd())
+
+def combine_variant_sequences(f1, f2, f3, f4):
+    os.system(
+        "cat {fasta_one} {fasta_two} {fasta_three} {fasta_four} \
+        > nucleotide_prevalence_all.fasta"
+        .format(fasta_one=f1, fasta_two=f2,
+        fasta_three=f3, fasta_four=f4)
+    )
+
+def split_variant_sequences(index, fasta):
+    # Plasmids/Data type
+    print('Getting CARD*Resistomes&Variants sequences metadata...')
+    id_plas = set()
+    id_chr = set()
+    id_contig = set()
+    id_path = {}
+    with open(index, 'r') as index:
+        i = csv.reader(index, delimiter='\t')
+        for line in i:
+            # Genomic
+            if line[7] == 'ncbi_chromosome':
+                if line[0] not in id_chr:
+                    id_chr.add(line[0])
+            elif line[7] == 'ncbi_plasmid':
+                if line[0] not in id_plas:
+                    id_plas.add(line[0])
+            elif line[7] == 'ncbi_contig':
+                if line[0] not in id_contig:
+                    id_contig.add(line[0])
+
+            # Taxonomy
+            if line[0] not in id_path:
+                id_path[line[0]] = [line[5]]
+            elif line[0] in id_path:
+                if line[5] not in id_path[line[0]]:
+                    id_path[line[0]].append(line[5])
+            else:
+                print('Error')
+
+    # Discard shared kmers in genomic sets
+    p_and_chr = id_plas.intersection(id_chr)
+    tplas = id_plas.difference(id_chr) # plasmid sequqences not in chromosomes
+    tchr = id_chr.difference(id_plas)
+    tcontig = id_contig.difference(tchr) # contig sequences not in chr
+    tcontig = tcontig.difference(tplas)
+    tcontig = tcontig.difference(p_and_chr)
+    print('Writing genomic outputs...')
+    with open('both.fasta', 'w') as both:
+        with open('plasmid.fasta', 'w') as plasmids:
+            with open('chr.fasta', 'w') as chrs:
+                with open('contig.fasta', 'w') as contigs:
+                    for entry in SeqIO.parse(fasta, 'fasta'):
+                        prev_id = entry.id.split(":")[1].split("|")[0]
+                        if prev_id in p_and_chr:
+                            SeqIO.write(entry, both, 'fasta')
+                        elif prev_id in tchr:
+                            SeqIO.write(entry, chrs, 'fasta')
+                        elif prev_id in tplas:
+                            SeqIO.write(entry, plasmids, 'fasta')
+                        elif prev_id in tcontig:
+                            SeqIO.write(entry, contigs, 'fasta')
+                        else:
+                            print('Error')
+    print('# of sequences in chr + plasmid:', len(p_and_chr))
+    print('# of sequences in just plasmids', len(tplas))
+    print('# of sequences in just chromosomes:', len(tchr))
+    print('# of sequences in just contigs:', len(tcontig))
+
+    # Discard prev sequences in multiple pathogens
+    print('Writing taxonomic outputs...')
+    species_count = 0
+    genus_count = 0
+    multi_count = 0
+    with open('species.fasta', 'w') as single_path:
+        with open('genus.fasta', 'w') as single_genus:
+            with open('multi.fasta', 'w') as multi_path:
+                for entry in SeqIO.parse(fasta, 'fasta'):
+                    prev_id = entry.id.split(":")[1].split("|")[0]
+                    if len(id_path[prev_id]) == 1:
+                        # single[prev_id] = entry.seq
+                        SeqIO.write(entry, single_path, 'fasta')
+                        species_count += 1
+                    else:
+                        genus = [p.split()[0] for p in id_path[prev_id]]
+                        if len(set(genus)) == 1:
+                            SeqIO.write(entry, single_genus, 'fasta')
+                            genus_count += 1
+                        else:
+                            # multi[prev_id] = entry.seq
+                            SeqIO.write(entry, multi_path, 'fasta')
+                            multi_count += 1
+    print('# of sequences in single species:', species_count)
+    print('# of sequences in single genus:', genus_count)
+    print('# of sequences in multiple genus:', multi_count)
+
+def count_kmers(k):
+    # Species
+    print("Counting single species kmers...")
+    # Counts kmers
+    os.system(
+        "jellyfish count -m {k} -t 24 -s 2G {fasta}"
+        .format(k=k, fasta=os.path.join(working_directory, "species.fasta"))
+    )
+    # Writes jellyfish output to text file
+    species_kmers = "species_{k}.out".format(k=k)
+    os.system(
+        "jellyfish dump -c -t -o {name} mer_counts.jf"
+        .format(name=os.path.join(working_directory, species_kmers))
+    )
+    os.system(
+        "rm mer_counts.jf"
+    )
+
+    # Genus
+    print("Counting single genus kmers...")
+    # Counts kmers
+    os.system(
+        "jellyfish count -m {k} -t 24 -s 2G {fasta}"
+        .format(k=k, fasta=os.path.join(working_directory, "genus.fasta"))
+    )
+    # Writes jellyfish output to text file
+    genus_kmers = "genus_{k}.out".format(k=k)
+    os.system(
+        "jellyfish dump -c -t -o {name} mer_counts.jf"
+        .format(name=os.path.join(working_directory, genus_kmers))
+    )
+    os.system(
+        "rm mer_counts.jf"
+    )
+
+    # Multi genus
+    print("Counting multi genus kmers...")
+    # Counts kmers
+    os.system(
+        "jellyfish count -m {k} -t 24 -s 2G {fasta}"
+        .format(k=k, fasta=os.path.join(working_directory, "multi.fasta"))
+    )
+    # Writes jellyfish output to text file
+    multi_kmers = "multi_{k}.out".format(k=k)
+    os.system(
+        "jellyfish dump -c -t -o {name} mer_counts.jf"
+        .format(name=os.path.join(working_directory, multi_kmers))
+    )
+    os.system(
+        "rm mer_counts.jf"
+    )
+
+    # Chr + Plasmid kmers
+    print("Counting chr + plasmid kmers...")
+    # Counts kmers
+    os.system(
+        "jellyfish count -m {k} -t 24 -s 2G {fasta}"
+        .format(k=k, fasta=os.path.join(working_directory, "both.fasta"))
+    )
+    # Writes jellyfish output to text file
+    both_kmers = "both_{k}.out".format(k=k)
+    os.system(
+        "jellyfish dump -c -t -o {name} mer_counts.jf"
+        .format(name=os.path.join(working_directory, both_kmers))
+    )
+    os.system(
+        "rm mer_counts.jf"
+    )
+
+    # Plasmid kmers
+    print("Counting plasmid kmers...")
+    # Counts kmers
+    os.system(
+        "jellyfish count -m {k} -t 24 -s 2G {fasta}"
+        .format(k=k, fasta=os.path.join(working_directory, "plasmid.fasta"))
+    )
+    # Writes jellyfish output to text file
+    plasmid_kmers = "plasmid_{k}.out".format(k=k)
+    os.system(
+        "jellyfish dump -c -t -o {name} mer_counts.jf"
+        .format(name=os.path.join(working_directory, plasmid_kmers))
+    )
+    os.system(
+        "rm mer_counts.jf"
+    )
+
+    # Chr kmers
+    print("Counting chromosome kmers...")
+    # Counts kmers
+    os.system(
+        "jellyfish count -m {k} -t 24 -s 2G {fasta}"
+        .format(k=k, fasta=os.path.join(working_directory, "chr.fasta"))
+    )
+    # Writes jellyfish output to text file
+    chr_kmers = "chr_{k}.out".format(k=k)
+    os.system(
+        "jellyfish dump -c -t -o {name} mer_counts.jf"
+        .format(name=os.path.join(working_directory, chr_kmers))
+    )
+    os.system(
+        "rm mer_counts.jf"
+    )
+
+    return species_kmers, genus_kmers, multi_kmers, both_kmers, plasmid_kmers, chr_kmers
+
+def create_kmer_db(sk, gk, mk, pk, ck, bk, variants, index, k):
+    os.system(
+        "python3 v3_kmer_species.py -s {species_kmers} -m {multi_kmers} \
+        -prev {species_fasta} -k {k}"
+        .format(species_kmers=os.path.join(working_directory, species_kmers),
+        multi_kmers=os.path.join(working_directory, multi_kmers),
+        species_fasta=os.path.join(working_directory, "species.fasta"),
+        k=k)
+    )
+
+def is_tool(name):
+    import distutils.spawn
+    if distutils.spawn.find_executable(name) is not None:
+        return True
+    else:
+        return False
+
+def main(args):
+    # check if jellyfish is installed
+    if is_tool("jellyfish") is False:
+        print("Missing dependency: jellyfish.\nPlease install from https://github.com/gmarcais/Jellyfish/releases")
+        exit("Debug")
+
+    # some code
+    f1 = args.fasta_one
+    f2 = args.fasta_two
+    f3 = args.fasta_three
+    f4 = args.fasta_four
+    index = args.index
+    k = args.k
+
+
+    if not args.skip:
+        if all(a is None for a in [f1,f2,f3,f4]):
+            print("ERROR: CARD*R&V fastas needed.")
+            exit()
+        else:
+            print("-- CONCATENATING ALL CARD*R&V SEQUENCES --")
+            combine_variant_sequences(f1, f2, f3, f4)
+            print("Done \n")
+
+            print("-- SPLITTING CARD*R&V SEQUENCES --")
+            split_variant_sequences(index, os.path.join(
+                working_directory, "nucleotide_prevalence_all.fasta"))
+            print("DONE \n")
+    else:
+        pass
+
+    print("-- COUNTING KMERS --")
+    species_kmers, genus_kmers, multi_kmers, both_kmers, plasmid_kmers, chr_kmers = count_kmers(k)
+    print(species_kmers)
+    print("DONE \n")
+
+    # """DEBUG"""
+    # species_kmers = "species_temp.out"
+    # genus_kmers = "genus_temp.out"
+    # multi_kmers = "multi_temp.out"
+    # plasmid_kmers = "plasmid_temp.out"
+    # chr_kmers = "chr_temp.out"
+    # both_kmers = "both_temp.out"
+
+    species_file = os.path.join(working_directory, species_kmers)
+    genus_file = os.path.join(working_directory, genus_kmers)
+    multi_file = os.path.join(working_directory, multi_kmers)
+    both_file = os.path.join(working_directory, both_kmers)
+    plasmid_file = os.path.join(working_directory, plasmid_kmers)
+    chr_file = os.path.join(working_directory, chr_kmers)
+    variant_sequences = os.path.join(working_directory, \
+                        "nucleotide_prevalence_all.fasta")
+
+    print("-- CREATING KMER SETS --")
+    app.make_kmer_json.make_json(plasmid_file, chr_file, both_file, genus_file, \
+    species_file, multi_file, variant_sequences, index, k)
+    print("DONE \n")
+
+    print("Finished creating CARD*kmers set.")
+
+def create_parser():
+    parser = argparse.ArgumentParser(
+        description="Builds the kmer sets for CARD*kmers")
+    parser.add_argument('-1', dest="fasta_one",
+        help="nucleotide_fasta_protein_homolog_model_variants.fasta")
+    parser.add_argument('-2', dest="fasta_two",
+        help="nucleotide_fasta_protein_overexpression_model_variants.fasta")
+    parser.add_argument('-3', dest="fasta_three",
+        help="nucleotide_fasta_protein_variant_model_variants.fasta")
+    parser.add_argument('-4', dest="fasta_four",
+        help="nucleotide_fasta_rRNA_gene_variant_model_variants.fasta")
+    parser.add_argument('-i', '--index', dest="index", required=True,
+        help="CARD*R&V Variants index file (index-for-model-sequences.txt)")
+    parser.add_argument('-k', dest="k", required=True,
+        help="k-mer size (e.g., 61)")
+    parser.add_argument('--skip', dest="skip", action='store_true',
+        help="Skips the concatenation and splitting of the CARD*R*V sequences.")
+    
+    return parser
+
+def run():
+    parser = create_parser()
+    args = parser.parse_args()
+    main(args)
+
+if __name__ == '__main__':
+    run()
diff --git a/app/make_kmer_json.py b/app/make_kmer_json.py
new file mode 100644
index 0000000..e30a3e7
--- /dev/null
+++ b/app/make_kmer_json.py
@@ -0,0 +1,253 @@
+import csv, re, argparse, multiprocessing, math, json, ahocorasick, os
+from Bio import Seq, SeqIO
+
+"""
+This scripts creates the JSON to hold all kmer sets.
+"""
+
+def split_list(l,n):
+    for i in range(0, len(l), n):
+        yield l[i:i+n]
+
+def query_kmers(l, t, fasta, o):
+    """Finds kmers in variant sequences"""
+    try:
+        print('THREAD {t}: Adding kmers'.format(t=t))
+        A = ahocorasick.Automaton()
+        for k in l:
+            A.add_word(k,k)
+        print('THREAD {t}: Making automaton'.format(t=t))
+        A.make_automaton()
+
+        f = {}
+        r = {}
+        seq_num = 0
+        for entry in SeqIO.parse(fasta, "fasta"):
+            seq_num += 1
+            if seq_num % 25000 == 0:
+                print('THREAD {t}: Done querying {n} sequences'.format(t=t, n=seq_num))
+            prev_id = entry.id.split(":")[1].split("|")[0]
+            pathogens = id_path[prev_id] # get a list of pathogens that prev sequence is found in
+            for tup in A.iter(str(entry.seq)):
+                kmer = tup[1]
+                rev_kmer = str(Seq.Seq(kmer).reverse_complement())
+                if kmer not in f:
+                    f[kmer] = []
+                if rev_kmer not in r:
+                    r[rev_kmer] = []
+                for p in pathogens:
+                    if p not in f[kmer]:
+                        f[kmer].append(p)
+                    if p not in r[rev_kmer]:
+                        r[rev_kmer].append(p)
+        print(t, 'Done')
+
+    except Exception as e:
+        print(e)
+    o.put((t, f, r))
+
+def get_genomic_kmers(plasmid_file, chr_file, both_file):
+    with open(plasmid_file, 'r') as f1:
+        print('Getting plasmid kmers...')
+        p = csv.reader(f1, delimiter='\t')
+        pkmers = {row[0] for row in p}
+    with open(chr_file, 'r') as f2:
+        print('Getting chromosome kmers...')
+        np = csv.reader(f2, delimiter='\t')
+        nkmers = {row[0] for row in np}
+    with open(both_file, 'r') as f3:
+        print('Getting both (plasmid + chr) kmers...')
+        b = csv.reader(f3, delimiter='\t')
+        bkmers = {row[0] for row in b}
+
+        # Discard itnersect
+        both_separate = pkmers.intersection(nkmers)
+        # Remove non specific plasmid kmers
+        pf = pkmers.difference(nkmers)
+        pf = pf.difference(bkmers)
+        # Remove non specific chromosome kmers
+        cf = nkmers.difference(pkmers)
+        cf = cf.difference(bkmers)
+        for kmer in both_separate:
+            bkmers.add(kmer)
+
+        print('Reverse complementing kmers...')
+        # Reverse complement non-plasmid kmers
+        rev_chr = {str(Seq.Seq(ck).reverse_complement()) for ck in cf}
+        rev_plas = {str(Seq.Seq(pk).reverse_complement()) for pk in pf}
+        rev_both = {str(Seq.Seq(bk).reverse_complement()) for bk in bkmers}
+
+        # print(rev_chr)
+        for false_hit in pf.intersection(rev_chr):
+            # F kmer in plasmid is a R kmer in chr
+            pf.remove(false_hit)
+            bkmers.add(false_hit)
+        for false_hit in pf.intersection(rev_both):
+            pf.remove(false_hit)
+            bkmers.add(false_hit)
+        for false_hit in cf.intersection(rev_plas):
+            cf.remove(false_hit)
+            bkmers.add(false_hit)
+        for false_hit in cf.intersection(rev_both):
+            cf.remove(false_hit)
+            bkmers.add(false_hit)
+
+        return list(pf), list(cf), list(bkmers) # type: sets
+
+def get_taxon_kmers(single_file, multi_file, variant_sequences, index_file, k, type):
+    """Gets taxonomic kmer sets"""
+    global id_path
+    id_path = {}
+    print('Getting pathogen metadata...')
+    with open(index_file, 'r') as index:
+        i = csv.reader(index, delimiter='\t')
+        for line in i:
+            if line[0] not in id_path:
+                id_path[line[0]] = [line[5]]
+            elif line[0] in id_path:
+                if line[5] not in id_path[line[0]]:
+                    id_path[line[0]].append(line[5])
+            else:
+                print('Error')
+
+    with open(single_file, 'r') as f1:
+        print('Getting single species kmers...')
+        single = csv.reader(f1, delimiter='\t')
+        skmers = {row[0] for row in single}
+    with open(multi_file, 'r') as f2:
+        print('Getting multi species kmers...')
+        multi = csv.reader(f2, delimiter='\t')
+        mkmers = {row[0] for row in multi}
+
+    print('Discarding shared single- and multi-species kmers...')
+    sf = list(skmers.difference(mkmers)) # kmers in only the single set
+    print('Remaining unique kmers:', len(sf))
+    list_size = math.ceil(len(sf)/24) # For 24 threads
+    l = list(split_list(sf, list_size))
+
+    print('Querying kmers...')
+    # Threading
+    output = multiprocessing.Queue()
+    processes = []
+    for ind in range(len(l)):
+        process = multiprocessing.Process(target=query_kmers, args=(l[ind], ind, variant_sequences, output))
+        process.start()
+        processes.append(process)
+        # print(processes)
+    results = [output.get() for process in processes]
+    for process in processes:
+        process.join()
+
+    print('Concatenating all data from all threads')
+    f = {}
+    r = {}
+    for i in range(len(results)):
+        f = {**f, **results[i][1]}
+        r = {**r, **results[i][2]}
+
+    # Filters kmers that are not unique
+    to_delete = []
+    for k,v in f.items():
+        if type == "species":
+            if len(v) > 1:
+                to_delete.append(k)
+        elif type == "genus":
+            genus = [p.split()[0] for p in v]
+            if len(set(genus)) > 1:
+                to_delete.append(k)
+        else:
+            print("Error")
+
+    print('before:', len(f))
+    print('to delete:', len(to_delete))
+    for k in to_delete:
+        del f[k]
+        del r[Seq.Seq(k).reverse_complement()]
+    print('after:', len(f))
+
+    rev_multi = {Seq.Seq(mk).reverse_complement() for mk in mkmers}
+    rev_temp = {Seq.Seq(tk).reverse_complement() for tk in to_delete}
+
+    shared = 0
+    same = 0
+    single = 0
+    forward = 0
+    reverse = 0
+    to_also_delete = []
+    for k in r:
+        if k in f:
+            shared += 1
+            if r[k] == f[k]:
+                same += 1
+            else: # Shared kmers to different pathogens in diff orientations
+                single += 1
+                to_also_delete.append(k)
+    for k in to_also_delete:
+        del f[k]
+        del r[k]
+
+    ts = set(f.keys())
+    # print(len(ts.intersection(rev_temp)))
+    for x in ts.intersection(rev_multi):
+        del f[x]
+        forward += 1
+    for x2 in ts.intersection(rev_temp):
+        del f[x2]
+        forward += 1
+
+    return f
+
+def make_json(plasmid_file, chr_file, both_file, genus_file, species_file, \
+                multi_file, variant_sequences, index_file, k):
+    print(plasmid_file, chr_file, both_file, genus_file, species_file, multi_file, variant_sequences, index_file, k)
+    exit()
+    p, c, b = get_genomic_kmers(plasmid_file, chr_file, both_file)
+    s = get_taxon_kmers(species_file, multi_file, variant_sequences, index_file, k, "species")
+    g = get_taxon_kmers(genus_file, multi_file, variant_sequences, index_file, k, "genus")
+
+    final = {"p": p, "c": c, "b": b, "s": s, "g": g}
+
+    with open(os.path.join(os.path.join(os.getcwd()), '{k}_kmer_db.json'.format(k=k)), 'w') as out:
+        json.dump(final, out)
+
+
+def main(args):
+    plasmid_file = args.plasmid
+    chr_file = args.chromosome
+    both_file = args.both
+    species_file = args.species
+    genus_file = args.genus
+    multi_file = args.multi
+    variant_sequences = args.variants
+    k = int(args.k)
+    index_file = args.index
+
+    make_json(plasmid_file, chr_file, both_file, genus_file, species_file, \
+    multi_file, variant_sequences, index_file, k)
+
+def run():
+    parser = argparse.ArgumentParser(
+        description='Creates a kmer catalogue')
+    parser.add_argument('-p', dest="plasmid", required=True,
+        help="Plasmid sequence kmers output from Jellyfish")
+    parser.add_argument('-c', dest="chromosome", required=True,
+        help="Chromosome sequence kmers output from Jellyfish")
+    parser.add_argument('-b', dest="both", required=True,
+        help="Plasmid + chromosome sequence kmers output from Jellyfish")
+    parser.add_argument('-s', dest="species", required=True,
+        help="Single species kmer output from Jellyfish")
+    parser.add_argument('-g', dest="genus", required=True,
+        help="Single genus kmer output from Jellyfish")
+    parser.add_argument('-m', dest="multi", required=True,
+        help="Multi species kmer output from Jellyfish")
+    parser.add_argument('-v', dest="variants", required=True,
+        help="CARD*Resistomes&Variants sequences (FASTA)")
+    parser.add_argument('-i', dest="index", required=True,
+        help="CARD*Resitomes&Variants index (index-for-model-sequences.txt)")
+    parser.add_argument('-k', dest="k", required=True,
+        help="kmer length")
+    args = parser.parse_args()
+    main(args)
+
+if __name__ == '__main__':
+    run()
diff --git a/requirements.txt b/requirements.txt
index 3c10c0a..bed15a7 100644
--- a/requirements.txt
+++ b/requirements.txt
@@ -6,3 +6,4 @@ pandas>=0.15.0
 matplotlib>=2.1.2
 seaborn>=0.8.1
 pyfaidx>=0.5.4.1
+pyahocorasick==1.1.8
\ No newline at end of file
diff --git a/test-requirements.txt b/test-requirements.txt
index 81fddc2..3f2cad6 100644
--- a/test-requirements.txt
+++ b/test-requirements.txt
@@ -7,4 +7,5 @@ requests==2.18.2
 six==1.10.0
 matplotlib>=2.1.2
 seaborn>=0.8.1
-pyfaidx>=0.5.4.1 
\ No newline at end of file
+pyfaidx>=0.5.4.1 
+pyahocorasick==1.1.8
\ No newline at end of file