In [1]:
import math, random

In [2]:
## Fragments of the human , mouse and puffer fish hemeoglobin gene.
human_hb = 'AGCTCCTAAGCCACTGCCTGCTGGTGACCCTGGCCGCCCACCTCCCCGCCGAGTTCACCC'
mouse_hb = 'AGCTCCTGAGCCACTGCCTGCTGGTGACCTTGGCTAGCCACCACCCTGCCGATTTCACCC'
puffer_hb = 'AGCTCCTGGCTCAGTGCATGTTGGTGGTCATCGCCACCATGTACCCTGCTGACTTCACCC'

mouse_evolved = ''
puffer_evolved = ''
nucleotides = 'ATCG'


def percent_identical(s1, s2):
    """Return the number of mutations between equal-length sequences"""
    
    if len(s1) != len(s2):
        raise ValueError("Undefined for sequences of unequal length")
    difference = sum(ch1 != ch2 for ch1, ch2 in zip (s1, s2))
    
    return math.ceil(100*(1-difference/len(s1)))

## Average mutation rate is 1E-8 per nucleotide per generation
mutation_rate = 1e-8

## Asuming a generation is a year (an overastimate for mouse and fish,  and underestimate for humans)
human_mouse_generations = 16e+7
human_puffer_generations = 9e+8

## Probability of nucleotide remaining unchanged 
human_mouse_probability = (1-mutation_rate)**human_mouse_generations
human_puffer_probability = (1-mutation_rate)**human_puffer_generations

## Evolve the mouse and puffer fish sequences randomly over the generations

for i,c in enumerate(human_hb):
    mouse_rand = random.uniform(0,1)
    puffer_rand = random.uniform(0,1)
    
    if mouse_rand > human_mouse_probability:
        nuc = random.randrange(0,3) # pick an index to obtain nucleotide from nucleotides = 'ATCG'
        mouse_evolved = mouse_evolved + nucleotides[nuc] # shorter version: mouse_evolved += nucleotides[nuc]
    else:
        mouse_evolved = mouse_evolved + mouse_hb[i]
        
    if puffer_rand > human_puffer_probability:
        nuc = random.randrange(0,3)
        puffer_evolved = puffer_evolved + nucleotides[nuc]
    else:
        puffer_evolved = puffer_evolved + puffer_hb[i]

print("Human and mouse actual percent identical: ", percent_identical(human_hb, mouse_hb))
print(" "+human_hb,"\n", mouse_hb)
print("Human and puffer actual percent identical: ", percent_identical(human_hb, puffer_hb))
print(" "+human_hb,"\n", puffer_hb,"\n")

print("Human and mouse simulated(forward evolution) percent identical: ", percent_identical(human_hb, mouse_evolved))
print(" "+human_hb,"\n", mouse_evolved)
print("Human and puffer simulated(forward evolution) percent identical: ", percent_identical(human_hb, puffer_evolved))
print(" "+human_hb,"\n", puffer_evolved)


Human and mouse actual percent identical:  87
 AGCTCCTAAGCCACTGCCTGCTGGTGACCCTGGCCGCCCACCTCCCCGCCGAGTTCACCC 
 AGCTCCTGAGCCACTGCCTGCTGGTGACCTTGGCTAGCCACCACCCTGCCGATTTCACCC
Human and puffer actual percent identical:  67
 AGCTCCTAAGCCACTGCCTGCTGGTGACCCTGGCCGCCCACCTCCCCGCCGAGTTCACCC 
 AGCTCCTGGCTCAGTGCATGTTGGTGGTCATCGCCACCATGTACCCTGCTGACTTCACCC 

Human and mouse simulated(forward evolution) percent identical:  39
 AGCTCCTAAGCCACTGCCTGCTGGTGACCCTGGCCGCCCACCTCCCCGCCGAGTTCACCC 
 AGTAATCGACTTACACTTTTCCTTTAACCTCTGATCAATTCACCCCATCCACCTTCCCAC
Human and puffer simulated(forward evolution) percent identical:  22
 AGCTCCTAAGCCACTGCCTGCTGGTGACCCTGGCCGCCCACCTCCCCGCCGAGTTCACCC 
 ATAAATCACAATTCTCTCATAATATCCTCACAACCTTTTCTCATTTAAACATTTCCCATA
