In [192]:
import random
import numpy as np
import matplotlib as plt
import Levenshtein
import itertools
import RNA

nucleotides = {'A':0,'G':1,'U':2,'C':3}
nucleotides_text = ['A','G','U','C']

**Parameters**

In [341]:
initial_rna = "ACCCC"
target = "AGCCCGA"
global_enviornmental_factors = []
generations = 5
base_mutation = np.array([[0.925, 0.025, 0.025, 0.025], [0.025, 0.925, 0.025, 0.025], [0.025, 0.025, 0.925, 0.025], [0.025, 0.025, 0.025, 0.925]])

**Update Parameters**

In [3]:
def calculate_environmental_factors (rna_population, enviornmental_factors):
    return enviornmental_factors

def calculate_mutation_rate (factors, mutation_rate = base_mutation):
    return mutation_rate

**Find Indel Probability**

In [4]:
def calculate_insertion_rate (sequence, factors, insertion_rate = 0.01):
    return insertion_rate

def calculate_deletion_rate (sequence, factors, deletion_rate = 0.01):
    return deletion_rate

**Transition Matrix Calculation**

In [5]:
def mutation_chance_matrix(mutation_matrix, mutation_rate):
    """Takes in matrix of mutations, outputs matrix of probabilities of each mutation"""
    probability_matrix = np.ones((len(mutation_matrix), len(mutation_matrix[0])))

    for i, row in enumerate(mutation_matrix):
        original = row[0]
        o_length = len(original)
        for j in range(o_length):
            original_index = nucleotides[original[j]]
        for j, possibility in enumerate(row):
            for k in range(o_length):
                possibility_index = nucleotides[possibility[k]]
                probability_matrix[i, j] *= mutation_rate[original_index, possibility_index]

    probability_matrix[:, 0] = np.maximum(0, 1 - np.sum(probability_matrix[:, 1:], axis=1))

    return probability_matrix

def generate_mutation_matrix(n):
    sequences = [''.join(seq) for seq in itertools.product(nucleotides, repeat=n)]
    matrix = [[seq] + [s for s in sequences if s != seq] for seq in sequences]
    return np.array(matrix)


**Fitness**

In [6]:
def fitness (target_distance, target_mfe, sequence):
    sequence_distance = Levenshtein.distance(sequence, target)
    if (sequence_distance > target_distance):
        return False
    sequence_mfe = RNA.fold(sequence)[1]
    if (sequence_mfe > target_mfe):
        return False

    return True
    

**Simulation**

In [303]:
def replicate_rna (initial_rna, mutation_rate, generations, target):

    rna_population = [initial_rna]
    full_population = [initial_rna]
    
    target_mfe = abs(RNA.fold(target)[1]-RNA.fold(initial_rna)[1])
    target_distance = Levenshtein.distance(initial_rna, target)

    enviornmental_factors = calculate_environmental_factors(rna_population, global_enviornmental_factors)

    for _ in range(generations):

        enviornmental_factors = calculate_environmental_factors(rna_population, enviornmental_factors)
        mutation_rate = calculate_mutation_rate(enviornmental_factors, mutation_rate)
        
        new_population = []

        for rna in rna_population:
            old_rna = rna
            o_len = len(old_rna)
            new_rna = ""
            nucleotides_gain = 0
            
            insertion_rate = calculate_insertion_rate(rna, enviornmental_factors)
            deletion_rate = calculate_deletion_rate(rna, enviornmental_factors)

            while (insertion_rate > random.random()):
                nucleotides_gain += 1
            while (deletion_rate > random.random()):
                nucleotides_gain -= 1

            if (nucleotides_gain > 0):
                for _ in range(nucleotides_gain):
                    index = random.randint(0, o_len)
                    old_rna = old_rna[:index] + random.choice(nucleotides_text) + old_rna[index:]
                    o_len += 1
            elif (nucleotides_gain < 0):
                for _ in range(-nucleotides_gain):
                    index = random.randint(0, o_len)
                    old_rna = old_rna[:index] + old_rna[index+1:]
                    o_len -= 1

            mutation_matrix = generate_mutation_matrix(o_len)
            mutation_chance = mutation_chance_matrix(mutation_matrix, mutation_rate)

            mutation_list = mutation_matrix[mutation_matrix[:, 0] == old_rna][0]
            mutation_chance_list = mutation_chance[mutation_matrix[:, 0] == old_rna][0]
            new_rna = mutation_list[np.random.choice(len(mutation_chance_list), p=mutation_chance_list/mutation_chance_list.sum())]
            
            new_population.append(str(new_rna))

        full_population += new_population.copy()
        rna_population += [rna for rna in new_population if fitness(target_distance, target_mfe, rna)]
    
    return rna_population, full_population

In [343]:
mutation_rate = calculate_mutation_rate(global_enviornmental_factors)

print(replicate_rna(initial_rna, mutation_rate, generations, target))

(['ACCCC', 'CGCCCC'], ['ACCCC', 'ACACC', 'CCCCC', 'CUCCC', 'CCCCC', 'CGCCCC'])


**Data Analysis**