In [6]:
# defining the sets for nucleic acids, purines, and pyrimidines
nucleic_acids = {'DNA', 'RNA'}
purines = {'Adenine', 'Guanine'}
pyrimidines = {'Cytosine', 'Thymine', 'Uracil'}

# defining nitrogen bases for DNA and RNA using purines and pyrimidines
dna_bases = purines.union({'Cytosine', 'Thymine'})
rna_bases = purines.union({'Cytosine', 'Uracil'})

print("DNA Bases:", dna_bases)
print("RNA Bases:", rna_bases)

DNA Bases: {'Adenine', 'Thymine', 'Cytosine', 'Guanine'}
RNA Bases: {'Uracil', 'Adenine', 'Cytosine', 'Guanine'}


In [9]:
# simplified codon to amino acid mapping for demonstration
codon_map = {
    'AUG': 'M',  # start codon
    'UUU': 'F', 'UUC': 'F',  # phenylalanine
    'UUA': 'L', 'UUG': 'L', 'CUU': 'L', 'CUC': 'L', 'CUA': 'L', 'CUG': 'L',  # leucine
    # add more codons as needed
}

# example: we want to find sequences for a protein: "FL" (phenylalanine - leucine)
protein_sequence = "FL"

def find_sequences(protein, codon_map):
    # reverse the codon_map to map amino acids to codons
    amino_to_codons = {}
    for codon, amino in codon_map.items():
        if amino in amino_to_codons:
            amino_to_codons[amino].append(codon)
        else:
            amino_to_codons[amino] = [codon]

    # generate all possible codon sequences for the protein sequence
    possible_sequences = [[]]
    for amino in protein:
        possible_sequences = [prev_seq + [new_codon] for prev_seq in possible_sequences for new_codon in amino_to_codons.get(amino, [])]

    return [''.join(seq) for seq in possible_sequences]

# example:
sequences = find_sequences(protein_sequence, codon_map)
print("Possible RNA sequences encoding the protein sequence '{}':".format(protein_sequence))
for seq in sequences:
    print(seq)

# to get DNA sequences, replace 'U' in RNA sequences with 'T'
dna_sequences = [seq.replace('U', 'T') for seq in sequences]

print("\nPossible DNA sequences encoding the protein sequence '{}':".format(protein_sequence))
for seq in dna_sequences:
    print(seq)

Possible RNA sequences encoding the protein sequence 'FL':
UUUUUA
UUUUUG
UUUCUU
UUUCUC
UUUCUA
UUUCUG
UUCUUA
UUCUUG
UUCCUU
UUCCUC
UUCCUA
UUCCUG

Possible DNA sequences encoding the protein sequence 'FL':
TTTTTA
TTTTTG
TTTCTT
TTTCTC
TTTCTA
TTTCTG
TTCTTA
TTCTTG
TTCCTT
TTCCTC
TTCCTA
TTCCTG
