In [2]:
def encode_dna_sequence(seq):
    """Encodes a DNA sequence of 30 letters (A, C, G, T) into a single integer."""
    mapping = {'A': 0, 'C': 1, 'G': 2, 'T': 3}
    packed = 0
    for char in seq:
        packed = (packed << 2) | mapping[char]  # Shift left by 2 bits and add the mapped number
    return (packed,)  # Store as a tuple to ensure hashability

def decode_dna_sequence(encoded):
    """Decodes the integer back into the original DNA sequence."""
    rev_mapping = ['A', 'C', 'G', 'T']
    packed = encoded[0]
    seq = []
    for _ in range(30):
        seq.append(rev_mapping[packed & 0b11])  # Extract the last 2 bits
        packed >>= 2  # Shift right by 2 bits
    return ''.join(seq[::-1])  # Reverse to get the original order

# Example usage
dna_seq = "ACGTACGTACGTACGTACGTACGTACGTAC"  # 30 bases
encoded = encode_dna_sequence(dna_seq)
print("Encoded:", encoded)  # A tuple with one integer
decoded = decode_dna_sequence(encoded)
print("Decoded:", decoded)  # Should match the original DNA sequence

Encoded: (122074041664254385,)
Decoded: ACGTACGTACGTACGTACGTACGTACGTAC


In [3]:
decoded == dna_seq

True