In [41]:
import random
import numpy as np
import matplotlib as plt
import Levenshtein
import itertools

nucleotides = ['A', 'G', 'U', 'C']

**Parameters**

In [267]:
initial_rna = "AUG"
global_enviornmental_factors = []
generations = 3
base_mutation = np.array([[0.925, 0.025, 0.025, 0.025], [0.025, 0.925, 0.025, 0.025], [0.025, 0.025, 0.925, 0.025], [0.025, 0.025, 0.025, 0.925]])
#base_mutation = np.array([[1,0,0,0], [0,1,0,0], [0,0,1,0], [0,0,0,1]])

**Update Parameters**

In [270]:
def calculate_environmental_factors (rna_population, enviornmental_factors):
    return enviornmental_factors

def calculate_mutation_rate (factors, mutation_rate = base_mutation):
    return mutation_rate

**Find Indel Probability**

In [44]:
def calculate_insertion_rate (sequence, factors, insertion_rate = 0.01):
    return insertion_rate

def calculate_deletion_rate (sequence, factors, deletion_rate = 0.01):
    return deletion_rate

**Transition Matrix Calculation**

In [266]:
def mutation_chance_matrix(mutation_matrix, mutation_rate):
    """Takes in matrix of mutations, outputs matrix of probabilities of each mutation"""
    probability_matrix = np.ones((len(mutation_matrix), len(mutation_matrix[0])))

    for i, row in enumerate(mutation_matrix):
        original = row[0]
        for j, possibility in enumerate(row):
            for k in range(len(original)):
                original_index = nucleotides.index(original[k])
                possibility_index = nucleotides.index(possibility[k])
                probability_matrix[i, j] *= mutation_rate[original_index, possibility_index]

    probability_matrix[:, 0] = np.maximum(0, 1 - np.sum(probability_matrix[:, 1:], axis=1))

    return probability_matrix

def generate_mutation_matrix(n):
    sequences = [''.join(seq) for seq in itertools.product(nucleotides, repeat=n)]
    matrix = [[seq] + [s for s in sequences if s != seq] for seq in sequences]
    return np.array(matrix)


**Replication Verification**

In [283]:
def does_replicate (sequence, factors):
    return True

**Simulation**

In [272]:
def replicate_rna (initial_rna, mutation_rate, generations):
    rna_population = [initial_rna]
    full_population = [initial_rna]

    enviornmental_factors = calculate_environmental_factors(rna_population, global_enviornmental_factors)

    for _ in range(generations):

        enviornmental_factors = calculate_environmental_factors(rna_population, enviornmental_factors)
        mutation_rate = calculate_mutation_rate(enviornmental_factors, mutation_rate)
        
        new_population = []

        for rna in rna_population:
            old_rna = rna
            new_rna = ""
            nucleotides_gain = 0
            
            insertion_rate = calculate_insertion_rate(rna, enviornmental_factors)
            deletion_rate = calculate_deletion_rate(rna, enviornmental_factors)

            while (insertion_rate > random.random()):
                nucleotides_gain += 1
            while (deletion_rate > random.random()):
                nucleotides_gain -= 1

            if (nucleotides_gain > 0):
                for _ in range(nucleotides_gain):
                    index = random.randint(0, len(old_rna))
                    old_rna = old_rna[:index] + random.choice(nucleotides) + old_rna[index:]
            elif (nucleotides_gain < 0):
                for _ in range(-nucleotides_gain):
                    index = random.randint(0, len(old_rna))
                    old_rna = old_rna[:index] + old_rna[index+1:]

            mutation_matrix = generate_mutation_matrix(len(old_rna))
            mutation_chance = mutation_chance_matrix(mutation_matrix, mutation_rate)

            mutation_list = mutation_matrix[mutation_matrix[:, 0] == old_rna][0]
            mutation_chance_list = mutation_chance[mutation_matrix[:, 0] == old_rna][0]
            new_rna = mutation_list[np.random.choice(len(mutation_chance_list), p=mutation_chance_list/mutation_chance_list.sum())]
            
            new_population.append(str(new_rna))

        full_population += new_population
        rna_population += [rna for rna in new_population if does_replicate(rna, enviornmental_factors)]
    
    return rna_population, full_population

In [282]:
mutation_rate = calculate_mutation_rate(global_enviornmental_factors)

print(replicate_rna(initial_rna, mutation_rate, generations))



['AUG', 'AUG', 'AUG', 'AUG', 'AGG', 'ACG', 'AUG', 'AUG']


**Data Analysis**