In [2]:
import random
import numpy as np
import matplotlib as plt
import Levenshtein
import itertools
import RNA

nucleotides = {'A':0,'G':1,'U':2,'C':3}
nucleotides_text = ['A','G','U','C']

**Parameters**

In [28]:
initial_rna = "ACCCAA"
target = "AGCCCG"
global_enviornmental_factors = []
generations = 3
base_mutation = np.array([[0.925, 0.025, 0.025, 0.025], [0.025, 0.925, 0.025, 0.025], [0.025, 0.025, 0.925, 0.025], [0.025, 0.025, 0.025, 0.925]])

**Update Parameters**

In [4]:
def calculate_environmental_factors (rna_population, enviornmental_factors):
    return enviornmental_factors

def calculate_mutation_rate (factors, mutation_rate = base_mutation):
    return mutation_rate

**Find Indel Probability**

In [5]:
def calculate_insertion_rate (sequence, factors, insertion_rate = 0.01):
    return insertion_rate

def calculate_deletion_rate (sequence, factors, deletion_rate = 0.01):
    return deletion_rate

**Transition Matrix Calculation**

In [27]:
def mutation_chance_list(mutation_list, mutation_rate):
    """Takes in matrix of mutations, outputs matrix of probabilities of each mutation"""
    probability_list = np.ones((len(mutation_list)))

    original = mutation_list[0]
    length_of_sequence = len(original)

    for i, column in enumerate(mutation_list):
        for k in range(length_of_sequence):
            original_index = nucleotides[original[k]]
            possibility_index = nucleotides[column[k]]
            probability_list[i] *= mutation_rate[original_index, possibility_index]

    probability_list[0] = np.maximum(0, 1 - np.sum(probability_list[1:]))

    return probability_list

def generate_mutation_matrix(n):
    sequences = [''.join(seq) for seq in itertools.product(nucleotides, repeat=n)]
    matrix = [[seq] + [s for s in sequences if s != seq] for seq in sequences]
    return np.array(matrix)


**Fitness**

In [7]:
def fitness (target_distance, target_mfe, sequence):
    sequence_distance = Levenshtein.distance(sequence, target)
    if (sequence_distance > target_distance):
        return False
    sequence_mfe = RNA.fold(sequence)[1]
    if (sequence_mfe > target_mfe):
        return False

    return True
    

**Simulation**

In [14]:
def replicate_rna (initial_rna, mutation_rate, generations, target):

    rna_population = [initial_rna]
    full_population = [initial_rna]
    
    target_mfe = abs(RNA.fold(target)[1]-RNA.fold(initial_rna)[1])
    target_distance = Levenshtein.distance(initial_rna, target)

    enviornmental_factors = calculate_environmental_factors(rna_population, global_enviornmental_factors)

    for _ in range(generations):

        enviornmental_factors = calculate_environmental_factors(rna_population, enviornmental_factors)
        mutation_rate = calculate_mutation_rate(enviornmental_factors, mutation_rate)
        
        new_population = []

        for rna in rna_population:
            old_rna = rna
            o_len = len(old_rna)
            new_rna = ""
            nucleotides_gain = 0
            
            insertion_rate = calculate_insertion_rate(rna, enviornmental_factors)
            deletion_rate = calculate_deletion_rate(rna, enviornmental_factors)

            while (insertion_rate > random.random()):
                nucleotides_gain += 1
            while (deletion_rate > random.random()):
                nucleotides_gain -= 1

            if (nucleotides_gain > 0):
                for _ in range(nucleotides_gain):
                    index = random.randint(0, o_len)
                    old_rna = old_rna[:index] + random.choice(nucleotides_text) + old_rna[index:]
                    o_len += 1
            elif (nucleotides_gain < 0):
                for _ in range(-nucleotides_gain):
                    index = random.randint(0, o_len)
                    old_rna = old_rna[:index] + old_rna[index+1:]
                    o_len -= 1

            mutation_matrix = generate_mutation_matrix(o_len)
            #mutation_chance = mutation_chance_matrix(mutation_matrix, mutation_rate)

            mutation_list = mutation_matrix[mutation_matrix[:, 0] == old_rna][0]
            #mutation_chance_list = mutation_chance[mutation_matrix[:, 0] == old_rna][0]
            chance_list = mutation_chance_list(mutation_list, mutation_rate)
            new_rna = mutation_list[np.random.choice(len(chance_list), p=chance_list/chance_list.sum())]
            
            new_population.append(str(new_rna))

        full_population += new_population.copy()
        rna_population += [rna for rna in new_population if fitness(target_distance, target_mfe, rna)]
    
    return rna_population, full_population

In [26]:
mutation_rate = calculate_mutation_rate(global_enviornmental_factors)

print(replicate_rna(initial_rna, mutation_rate, generations, target))

(['ACCCAAC', 'ACCCAAC', 'ACCCAAC', 'ACCCAAC'], ['ACCCAAC', 'CUCCAAC', 'ACCCAAC', 'ACCCAAC', 'ACCCAAC'])


In [30]:
import cProfile

cProfile.run('replicate_rna(initial_rna, mutation_rate, generations, target)')

         50093 function calls (50089 primitive calls) in 11.455 seconds

   Ordered by: standard name

   ncalls  tottime  percall  cumtime  percall filename:lineno(function)
        1    0.118    0.118    6.035    6.035 2958138522.py:1(replicate_rna)
        6    0.000    0.000    0.001    0.000 3520087228.py:1(fitness)
        6    0.000    0.000    0.000    0.000 3690251470.py:1(calculate_insertion_rate)
        6    0.000    0.000    0.000    0.000 3690251470.py:4(calculate_deletion_rate)
        4    0.000    0.000    0.000    0.000 3819459897.py:1(calculate_environmental_factors)
        3    0.000    0.000    0.000    0.000 3819459897.py:4(calculate_mutation_rate)
        6    0.038    0.006    0.040    0.007 3971689967.py:1(mutation_chance_list)
        6    2.379    0.397    9.718    1.620 3971689967.py:17(generate_mutation_matrix)
        1    0.014    0.014    6.049    6.049 <string>:1(<module>)
        6    0.000    0.000    0.001    0.000 RNA.py:8189(fold)
        7    0.0

**Data Analysis**