A program designed to map genes using the genetic code. 
This program takes a DNA sequence as input, transcribes it into mRNA, and then translates the mRNA into a protein sequence based on the genetic code. Additionally, it identifies specific breast cancer related genes based on their protein sequences in the example.

In [65]:
# Genetic code dictionary
# This dictionary maps RNA codons to their corresponding amino acids, essential for the translation process.

genetic_code = {
    'AUG': 'Methionine', 'UUU': 'Phenylalanine', 'UUC': 'Phenylalanine', 
    'UUA': 'Leucine', 'UUG': 'Leucine', 'UCU': 'Serine', 'UCC': 'Serine', 
    'UCA': 'Serine', 'UCG': 'Serine', 'UAU': 'Tyrosine', 'UAC': 'Tyrosine', 
    'UGU': 'Cysteine', 'UGC': 'Cysteine', 'UGG': 'Tryptophan', 
    'CUU': 'Leucine', 'CUC': 'Leucine', 'CUA': 'Leucine', 'CUG': 'Leucine', 
    'CCU': 'Proline', 'CCC': 'Proline', 'CCA': 'Proline', 'CCG': 'Proline', 
    'CAU': 'Histidine', 'CAC': 'Histidine', 'CAA': 'Glutamine', 
    'CAG': 'Glutamine', 'CGU': 'Arginine', 'CGC': 'Arginine', 
    'CGA': 'Arginine', 'CGG': 'Arginine', 'AUU': 'Isoleucine', 
    'AUC': 'Isoleucine', 'AUA': 'Isoleucine', 'ACU': 'Threonine', 
    'ACC': 'Threonine', 'ACA': 'Threonine', 'ACG': 'Threonine', 
    'AAU': 'Asparagine', 'AAC': 'Asparagine', 'AAA': 'Lysine', 
    'AAG': 'Lysine', 'AGU': 'Serine', 'AGC': 'Serine', 'AGA': 'Arginine', 
    'AGG': 'Arginine', 'GUU': 'Valine', 'GUC': 'Valine', 'GUA': 'Valine', 
    'GUG': 'Valine', 'GCU': 'Alanine', 'GCC': 'Alanine', 'GCA': 'Alanine', 
    'GCG': 'Alanine', 'GAU': 'Aspartic Acid', 'GAC': 'Aspartic Acid', 
    'GAA': 'Glutamic Acid', 'GAG': 'Glutamic Acid', 'GGU': 'Glycine', 
    'GGC': 'Glycine', 'GGA': 'Glycine', 'GGG': 'Glycine', 
    'UAA': 'Stop', 'UAG': 'Stop', 'UGA': 'Stop'
}

In [66]:
# Error handling for invalid DNA sequences
# This function ensures the validity of a DNA sequence by checking if it contains only the nucleotides A, T, C, and G.

def validate_dna_sequence(dna_sequence):
    valid_nucleotides = {'A', 'T', 'C', 'G'}
    return all(nucleotide in valid_nucleotides for nucleotide in dna_sequence.upper())
   

In [67]:
# Transcription (DNA to mRNA)- converting a DNA sequence into an mRNA sequence by replacing thymine with uracil.

def transcribe(dna_sequence):
    if not validate_dna_sequence(dna_sequence):
        raise ValueError("Invalid DNA sequence. Please enter a sequence containing only A, T, C, G.")
    return dna_sequence.replace('T', 'U')

In [68]:
# Translation (mRNA to protein) - converting an mRNA sequence into a protein sequence.
# It reads the mRNA in groups of three nucleotides (codons) and translates each codon into the corresponding amino acid using the genetic_code dictionary. 
# The translation stops if a stop codon is encountered.

def translate(mrna_sequence):
    protein = []
    for i in range(0, len(mrna_sequence), 3):
        codon = mrna_sequence[i:i+3]
        amino_acid = genetic_code.get(codon)
        if amino_acid == 'Stop':
            break
        if amino_acid:
            protein.append(amino_acid)
    return protein

In [69]:
# Genetic Mapping (DNA sequence to protein)- combines transcription and translation to map a DNA sequence to a protein sequence.

def map_genes(dna_sequence):
    mrna_sequence = transcribe(dna_sequence)
    protein_sequence = translate(mrna_sequence)
    return protein_sequence

In [70]:
# Genetic Mapping program
# Takes a DNA sequence as input from scientist, maps it to a protein sequence, and prints the protein sequence.

if __name__ == "__main__":
    # Example DNA sequence
    dna_sequence = input("Enter DNA sequence: ").upper()

    # Map genes
    protein_sequence = map_genes(dna_sequence)
    
    # Output the results
    print("Protein Sequence: ", ' - '.join(protein_sequence))

Enter DNA sequence: ATGGATTTTGGTCAGTCAACAAAGAAAGCTGAGAACTTGGACACTAGGGTCTGACTGGAAGAAATCTGGA
Protein Sequence:  Methionine - Aspartic Acid - Phenylalanine - Glycine - Glutamine - Serine - Threonine - Lysine - Lysine - Alanine - Glutamic Acid - Asparagine - Leucine - Aspartic Acid - Threonine - Arginine - Valine


In [73]:
# For a more practical example using Breast cancer-related genes
# This includes another dictionary of breast cancer-related genes with their DNA sequences and uses reverse mapping to identify the gene based on the protein sequence.
# This program is designed to map genes using the genetic code. 
# It takes a DNA sequence as input, transcribes it into mRNA, and then translates the mRNA into a protein sequence based on the genetic code. 
# Additionally, it can identify specific breast cancer-related genes based on their protein sequences.

# This dictionary contains example DNA sequences for genes commonly associated with breast cancer.
breast_cancer_genes = {
    'BRCA1': 'ATGGATTTTGGTCAGTCAACAAAGAAAGCTGAGAACTTGGACACTAGGGTCTGACTGGAAGAAATCTGGA',
    'BRCA2': 'ATGGATTTGGAGGTTTTTGTTTGCTGCTGCTGCTGCTGAGCTTGCTGAAACTGGAAGGAAACAGG',
    'TP53': 'ATGGAGGAGCCGCAGTCAGATCCTAGCGTCGAGCCCCTGCTCGGAACATCTCGAAGTGTTTGTGC'
}

# Reverse mapping from protein sequences to gene names
# a reverse mapping dictionary that maps protein sequences (as tuples) back to their corresponding gene names. 
# This allows for the identification of genes based on their protein sequences.

protein_to_gene = {tuple(map_genes(v)): k for k, v in breast_cancer_genes.items()}
    
def main():
    # Input DNA sequence
    dna_sequence = input("Enter DNA sequence: ").upper()
    
    # Validate DNA sequence
    if not validate_dna_sequence(dna_sequence):
        print("Invalid DNA sequence. Please enter a sequence containing only A, T, C, G.")
        return
    
    # Map genes
    protein_sequence = map_genes(dna_sequence)
    
    # Identify gene
    gene_name = protein_to_gene.get(tuple(protein_sequence), "Unknown gene")
    
    # Output the gene name and protein sequence
    print(f"Protein Sequence: {' - '.join(protein_sequence)}")
    if gene_name != "Unknown gene":
        print(f"Identified Gene: {gene_name}")
    else:
        print("Non Breast Cancer Gene Found.")
        
    # Gene identification and mapping program
if __name__ == "__main__":
    main()
        


Enter DNA sequence: vvv
Invalid DNA sequence. Please enter a sequence containing only A, T, C, G.
