In [1]:
from Bio.Seq import Seq
from Bio.Alphabet import generic_dna

In [2]:
def find_start(dna):
    '''
    Input: Sequence of DNA 
    Return: Positions of start codon 
    
    Function finds positions of start codons
    '''
    start_position = []
    for i in range(len(dna)):
        if dna[i:i+3] == 'ATG':
            start_position.append(i)
    return(start_position)

In [3]:
def find_stop(dna):
    '''
    Input: Sequence of DNA 
    Return: Positions of stop codon
    
    Function finds positions of stop codons
    '''
    stop_position = []
    for i in range(len(dna)):
        if dna[i:i+3] == 'TAA' or dna[i:i+3] == 'TAG' or dna[i:i+3] == 'TGA':
            stop_position.append(i)
    return(stop_position)

In [4]:
def find_compl_dna(dna):
    '''
    Input: Sequence of DNA 
    Return: Genes which were found in reverse-complement sequense of DNA
    
    Function finds genes in reverse-complement sequense of DNA 
    '''
    dna = Seq(dna, generic_dna)
    dna = dna.reverse_complement()
    dna = str(dna)
    start = find_start(dna)
    stop = find_stop(dna)
    rev_gene = []
    for start_elem in start:
        for stop_elem in stop:
            if (stop_elem > start_elem) and ((stop_elem - start_elem + 3) % 3 ==0) and ((stop_elem + 3 - start_elem) / 3 > 4):
                rev_gene.append(dna[start_elem:stop_elem+3])
    return(rev_gene)

In [5]:
def find_gene(dna):
    '''
    Input: Sequence of DNA 
    Return: Genes which were found in sequense of DNA
    
    Function finds genes in sequense of DNA 
    '''
    start = find_start(dna)
    stop = find_stop(dna)
    gene = []
    for start_elem in start:
        for stop_elem in stop:
            if (stop_elem > start_elem) and ((stop_elem - start_elem + 3) % 3 ==0) and ((stop_elem + 3 - start_elem) / 3 > 4):
                gene.append(dna[start_elem:stop_elem+3])
    return(gene + find_compl_dna(dna))

In [8]:
dna = str(input('Enter the DNA sequence '))
# Remove the case-sensitive
dna = dna.upper()
print('DNA, finded genes:',find_gene(dna))

Enter the DNA sequence TTAAAAAAAAAAAAACAT
DNA, finded genes: ['ATGTTTTTTTTTTTTTAA']
