# protein to dna seq


In [None]:
from Bio import SeqIO
from Bio.Seq import Seq
from Bio.SeqRecord import SeqRecord
import random

# Define a codon usage table for standard codon table
codon_usage_table = {
    'A': ['GCT', 'GCC', 'GCA', 'GCG'],
    'C': ['TGT', 'TGC'],
    'D': ['GAT', 'GAC'],
    'E': ['GAA', 'GAG'],
    'F': ['TTT', 'TTC'],
    'G': ['GGT', 'GGC', 'GGA', 'GGG'],
    'H': ['CAT', 'CAC'],
    'I': ['ATT', 'ATC', 'ATA'],
    'K': ['AAA', 'AAG'],
    'L': ['TTA', 'TTG', 'CTT', 'CTC', 'CTA', 'CTG'],
    'M': ['ATG'],
    'N': ['AAT', 'AAC'],
    'P': ['CCT', 'CCC', 'CCA', 'CCG'],
    'Q': ['CAA', 'CAG'],
    'R': ['CGT', 'CGC', 'CGA', 'CGG', 'AGA', 'AGG'],
    'S': ['TCT', 'TCC', 'TCA', 'TCG', 'AGT', 'AGC'],
    'T': ['ACT', 'ACC', 'ACA', 'ACG'],
    'V': ['GTT', 'GTC', 'GTA', 'GTG'],
    'W': ['TGG'],
    'Y': ['TAT', 'TAC'],
    '*': ['TAA', 'TAG', 'TGA']
}

def reverse_translate(protein_seq):
    """
    Reverse translates a protein sequence into a DNA sequence using the defined codon usage table.

    :param protein_seq: The protein sequence as a string.
    :return: The reverse-translated DNA sequence as a string.
    """
    dna_seq = ""

    for aa in protein_seq:
        if aa in codon_usage_table:
            codon = random.choice(codon_usage_table[aa])
        else:
            return None  # Return None if a non-coding amino acid is found
        dna_seq += codon

    return dna_seq

def process_fasta(input_file, output_file):
    # Read the input FASTA file
    protein_records = SeqIO.parse(input_file, "fasta")

    # List to hold DNA sequence records
    dna_records = []

    for protein_record in protein_records:
        # Reverse translate the protein sequence
        protein_seq = str(protein_record.seq)
        dna_seq = reverse_translate(protein_seq)

        if dna_seq:  # Only add the DNA sequence if it's valid
            # Create a new SeqRecord for the DNA sequence
            dna_record = SeqRecord(Seq(dna_seq), id=protein_record.id, description="Reverse translated DNA sequence")
            dna_records.append(dna_record)

    # Write the DNA sequences to the output FASTA file
    with open(output_file, "w") as output_handle:
        SeqIO.write(dna_records, output_handle, "fasta")


In [None]:
input_fasta = path+"uniprotkb_drosophila_2024_07_18.fasta"
output_fasta = path+"EnT3.fasta"
process_fasta(input_fasta, output_fasta)