# Common variables

In [13]:
all_nucleotides = ["a", "t", "g", "c", "u"]
dna_bases = ["A", "C", "G", "T"]
codons = [a+b+c for a in dna_bases for b in dna_bases for c in dna_bases]
amino_acids = 'FFLLSSSSYY**CC*WLLLLPPPPHHQQRRRRIIIMTTTTNNKKSSRRVVVVAAAADDEEGGGG'
codon_table = dict(zip(codons, amino_acids))

# Common functions linked to DN

In [8]:
# Count codons
def count_codons(sequence:str, frame:int=0):
    ''' 
    This function returns a dictionary with the different codons within a sequence and the number of their occurrence.
    There is also the possibility to provide the reading frame in which to start, but then whatever comes before is not taken into account.
    
    '''
    codons = {}
    for i in range(frame, len(sequence), 3):
        codon = sequence[i: i+3]
        if codon in codons:
            codons[codon] += 1
        else:
            codons[codon] = 1
    return codons

In [17]:
# Translate sequence

def translate_sequence(sequence: str, frame: int=0, start_at = ""):
    '''
    This function returns a string containing the translated amino acids to the sequence provided. The sequence can either be RNA or DNA. 
    However, this needs the codon_table variable!
    If you want to find the ATG codon, just add find_start
    
    '''
    
    sequence = sequence.upper().replace("U", "T")
    start = 0
    translated_sequence = ""
    if len(start_at) > 0: 
        start = sequence.find(start_at.upper())
    for j in range(start, len(sequence), 3):
        codon = sequence[j:j+3]
        if codon in codon_table:
            translated_sequence += codon_table[codon]
    return translated_sequence
        
        
    

### Random DNA generator

In [11]:
def gen_random_dna(seq_length: int = 100, nucleotides:str="ATGC", use_as_start: str = ""):
    '''
    This function generates a random string of DNA. You could also make it RNA by passing on the nucleotides you want as arguments. 
    You can also specify the sequence you want it to start with.
    '''
    import random as rd
    sub_this = len(use_as_start)
    sequence = use_as_start
    for n in range(0,seq_length - sub_this):
        nucleotide = rd.choice(nucleotides)
        sequence += nucleotide
    return sequence


# Assignment Challenge: Loops within dictionaries (Day 2)

"For this assignment, we're going to write a program that translates DNA sequences into protein sequences.

Loop over the DNA sequence 'ACTGACTGACTGAATTCGACTG' in steps of 3 such that you get the codons.
Translate each of these codons to their amino acids as defined in the dictionary codon_table defined below. Print the entire translated protein sequence.
Did you take the reading frames into account? If not, get the codons for the other reading frames as well. Translate each reading frame into a protein sequence.
Now translate all of the valid DNA sequences in the list seqs. Invalid sequences should not be translated.""


In [4]:
### make sure you understands what happens here and run this cell
bases = ['T', 'C', 'A', 'G']
codons = [a+b+c for a in bases for b in bases for c in bases]
amino_acids = 'FFLLSSSSYY**CC*WLLLLPPPPHHQQRRRRIIIMTTTTNNKKSSRRVVVVAAAADDEEGGGG'
codon_table = dict(zip(codons, amino_acids))
### The zip creates a list of tuples between two types of containers that have the same amounts of entries


In [None]:
# Translate the following sequence while reading frames into account
### If you want to make the code even better, you can combine the search codon logic with that of the next cell
sequence='ACTGACTGACTGAATTCGACTG'
frames = {}
translated_sequences = {}

for i in range(3):
    cods = []
    for j in range(i, len(sequence), 3):
        cods.append(sequence[j:j+3])
    print(cods)
    frames[f'Reading frame {i}'] = cods

print(frames)

for k, frame in frames.items():
    translation = ""
    for codon in frame:
        if codon in codon_table:
            translation += codon_table[codon]
    translated_sequences[k] = translation


print(translated_sequences)

In [None]:
# Loop over the seqs and translate the valid DNA sequences
seqs = ['actgactgactgaattcgactg','caucgaucgcgauacacgaucagcuacg','augcagacgacguacgu','atcgatcgatcgatcacgt','atcgtagctactagctagc','acgatcgtagctacgta','cgaucagucgaucgauccagcga','cguacguagcacaugcagucaguauacguacggacgacgac','catgactgactgatcgatgctgactgactg','atcggatctgaactgactg','actgactgactgactg','caucgaucgcgauacacgaucagcuacg','augcagacgacguacgu','atcgatcgaattcgatcgatcacgt','atcgtagctactagctagc','acgatcgaattcgtagctacgta','cgaucagucgaucgauccagcga','cguacguagcacaugcagucaguauacguacggacgacgac','catgactgactgatcgatgaattcgctgactgactg','aucggauccgaaccgacag']
translated_seqs = []

for seq in seqs:
    seq = seq.upper()
    seq = seq.replace("U", "T")
    translation = ""
    for i in range(0, len(seq), 3):
        cod = seq[i:i+3]
        if cod in codon_table:
            translation += codon_table[cod]
    translated_seqs.append(translation)
    print(translation)


# File Handling Good practices:

You should close the file after doing everything