In [1]:
def getCombinedVariants(
    seq, 
    distance,
    post_context="",
    correct_snps=True,
    correct_indels=True,
    bases={'A', 'C', 'G', 'T', 'N'}
):
    """
    Generates all unique, fixed-length variants of a sequence within a given
    combined correction distance.

    This version has been corrected to remove the flawed pre_context logic.
    All deletions are now correctly compensated by the post_context.
    
    Returns a map of {variant: (post_context, distance)}.
    """
    # The state now tracks variant, post_context, and distance
    all_variants_map = {seq: (post_context, 0)}
    last_generation = [(seq, post_context)]

    for d in range(distance):
        current_generation = []
        
        for var, post in last_generation:
            
            # 1. Generate SNPs (these don't change post_context)
            if correct_snps:
                for i in range(len(var)):
                    original_base = var[i]
                    for new_base in bases:
                        if new_base != original_base:
                            new_variant = var[:i] + new_base + var[i+1:]
                            if new_variant not in all_variants_map:
                                all_variants_map[new_variant] = (post, d + 1)
                                current_generation.append((new_variant, post))

            # 2. Generate Indels
            if correct_indels:
                # Deletions within the slice (compensated by post_context)
                for i in range(len(var)):
                    deleted_in_slice = var[:i] + var[i+1:]
                    if post:
                        new_variant = deleted_in_slice + post[0]
                        if new_variant not in all_variants_map:
                            # New post_context is one char shorter
                            new_post = post[1:]
                            all_variants_map[new_variant] = (new_post, d + 1)
                            current_generation.append((new_variant, new_post))
                    else: # Fallback: No post_context, pad with all bases
                        for b in bases:
                            new_variant = deleted_in_slice + b
                            if new_variant not in all_variants_map:
                                all_variants_map[new_variant] = ("", d + 1)
                                current_generation.append((new_variant, ""))
                
                # Insertions anywhere (compensated by truncation)
                for i in range(len(var) + 1):
                    for b in bases:
                        inserted = var[:i] + b + var[i:]
                        new_variant = inserted[:len(seq)]
                        if new_variant not in all_variants_map:
                            # New post_context is the truncated char + old post
                            new_post = inserted[-1] + post
                            all_variants_map[new_variant] = (new_post, d + 1)
                            current_generation.append((new_variant, new_post))
        
        last_generation = current_generation
        
    return all_variants_map

In [3]:
getCombinedVariants("ACTGAGATAG",distance=2,post_context="T")

{'ACTGAGATAG': ('T', 0),
 'CCTGAGATAG': ('T', 1),
 'GCTGAGATAG': ('T', 1),
 'NCTGAGATAG': ('T', 1),
 'TCTGAGATAG': ('T', 1),
 'AGTGAGATAG': ('T', 1),
 'ANTGAGATAG': ('T', 1),
 'AATGAGATAG': ('T', 1),
 'ATTGAGATAG': ('T', 1),
 'ACCGAGATAG': ('T', 1),
 'ACGGAGATAG': ('T', 1),
 'ACNGAGATAG': ('T', 1),
 'ACAGAGATAG': ('T', 1),
 'ACTCAGATAG': ('T', 1),
 'ACTNAGATAG': ('T', 1),
 'ACTAAGATAG': ('T', 1),
 'ACTTAGATAG': ('T', 1),
 'ACTGCGATAG': ('T', 1),
 'ACTGGGATAG': ('T', 1),
 'ACTGNGATAG': ('T', 1),
 'ACTGTGATAG': ('T', 1),
 'ACTGACATAG': ('T', 1),
 'ACTGANATAG': ('T', 1),
 'ACTGAAATAG': ('T', 1),
 'ACTGATATAG': ('T', 1),
 'ACTGAGCTAG': ('T', 1),
 'ACTGAGGTAG': ('T', 1),
 'ACTGAGNTAG': ('T', 1),
 'ACTGAGTTAG': ('T', 1),
 'ACTGAGACAG': ('T', 1),
 'ACTGAGAGAG': ('T', 1),
 'ACTGAGANAG': ('T', 1),
 'ACTGAGAAAG': ('T', 1),
 'ACTGAGATCG': ('T', 1),
 'ACTGAGATGG': ('T', 1),
 'ACTGAGATNG': ('T', 1),
 'ACTGAGATTG': ('T', 1),
 'ACTGAGATAC': ('T', 1),
 'ACTGAGATAN': ('T', 1),
 'ACTGAGATAA': ('T', 1),
