# P01: Replication Process

In [137]:
import random
import time
import threading

from polymerase import Polymerase

In [138]:
nitro_bases = {'A': 'T', 'T': 'A', 'C': 'G', 'G': 'C'}

## Dataset

In [139]:
dna_seq = ""
filename = "./data/Data1.txt"

with open(filename, 'r') as filename:
    for line in filename:
        dna_seq += line.strip()
        break

## Generating complementary DNA sequences

In [140]:
dna_seq_comp = ""

for base in dna_seq:
    dna_seq_comp += nitro_bases[base]

In [141]:
print(len(dna_seq_comp))
print(len(dna_seq))

935
935


In [142]:
helicase_rate = 0.25
total_nucleotides = len(dna_seq)
total_helicase = total_nucleotides // int(helicase_rate * total_nucleotides)

## Simulation process

In [143]:
helicase_positions = [total_nucleotides // total_helicase * i for i in range(1, total_helicase)]
for i in range(total_helicase - 1):
    rand_value = random.randint(0, 50)
    rand_sign = random.randint(1, 2)
    helicase_positions[i] += (-1)**rand_sign * rand_value

print(helicase_positions)

[224, 432, 700]


In [144]:
helicase_positions.insert(0, 0)

In [145]:
def replication_leading_strand(dna_seq, index, start, end):
        global replicated_fragments
        replication = ''

        for base in dna_seq[start:end]:
            replication += nitro_bases[base]
        
        replicated_fragments[index] = replication

In [146]:

print("The total number of nucleotides is: ", total_nucleotides)
print("The total number of helicase is: ", total_helicase)
print("----------------------------------------------------------------------------------------------------")
time.sleep(2)

print("Helicases bind to the DNA strand and unwind the double helix at the positions:", helicase_positions)
time.sleep(2)

print("Leading strand replication process...")
time.sleep(1)
primer_positions = helicase_positions
print("Helicase unwinds the DNA double helix...")
print("The unique primer of each the leading strand is placed at the 5' end of the DNA strand in positions: ", primer_positions)
print("----------------------------------------------------------------------------------------------------")
print("The ADN polymerase enzyme is added to each primer and begins to synthesize the new DNA strand...")
time.sleep(5)

replicated_fragments = ['' for i in range(total_helicase)]
for i in range(total_helicase):
    if i < total_helicase - 1:
        threading.Thread(target=replication_leading_strand, args=(dna_seq, i, primer_positions[i], helicase_positions[i+1])).start()
    
    else:
        threading.Thread(target=replication_leading_strand, args=(dna_seq, i, primer_positions[i], total_nucleotides)).start()

print("The leading strand replication process is complete.")
print("----------------------------------------------------------------------------------------------------")

print("Lagging strand replication process...") 
time.sleep(1)
print("The lagging strand is synthesized in fragments called Okazaki fragments.")

The total number of nucleotides is:  935
The total number of helicase is:  4
----------------------------------------------------------------------------------------------------
Helicases bind to the DNA strand and unwind the double helix at the positions: [0, 224, 432, 700]
Leading strand replication process...
Helicase unwinds the DNA double helix...
The unique primer of each the leading strand is placed at the 5' end of the DNA strand in positions:  [0, 224, 432, 700]
----------------------------------------------------------------------------------------------------
The ADN polymerase enzyme is added to each primer and begins to synthesize the new DNA strand...
The leading strand replication process is complete.
----------------------------------------------------------------------------------------------------
Lagging strand replication process...
The lagging strand is synthesized in fragments called Okazaki fragments.


In [147]:
helicase_positions.append(total_nucleotides)
scale = 20

def okazaki_fragment_positions(helicase_positions, total_helicase, scale):
    for i in range(total_helicase):
        lagging_primer_positions = []
        fragment_lenght = helicase_positions[i+1] - helicase_positions[i]
        num_of_fragments = random.randint(total_helicase, fragment_lenght // scale)
        okazaki_length = fragment_lenght // num_of_fragments

        for j in range(num_of_fragments):
            lagging_primer_positions.append(primer_positions[i] + okazaki_length * j)

        lagging_primer_positions.append(helicase_positions[i+1])
        yield lagging_primer_positions
    

In [212]:
def replication_lagging_strand(dna_seq_comp, index, positions):
        global replicated_fragments, nitro_bases
        replication = ''
        
        for i in range(len(positions)-1):
                fragment = dna_seq_comp[positions[i]:positions[i+1]]
                replicated_fragment = ''
                for base in fragment[::-1]:
                        replicated_fragment += nitro_bases[base]

                replication += replicated_fragment[::-1]
        
        replicated_fragments[index] = replication

        

In [213]:
replicated_fragments = [[] for i in range(total_helicase)]

In [214]:
for i,positions in enumerate(okazaki_fragment_positions(helicase_positions, total_helicase, scale)):
        
        replication_lagging_strand(dna_seq_comp, i, positions)

In [215]:
replicated_fragments[0]

'ACGCAACACGCAACTAGCAAGAACGTATTCCCGACGCAACTTCCCGACGCAACACGCAACGAACGTATAGCAATTCCCGACGCAACTAGCAAGAACGTATAGCAATTCCCGATTGTCGAACGTAGAACGTAACGCAACACGCAACACGCAACGAACGTATAGCAATTCCCGGAACGTAATTGTCATTGTCTTCCCGGAACGTAATTGTCTTCCCGATTGTCATT'

In [216]:
dna_seq_comp

'TGCGTTGTGCGTTGATCGTTCTTGCATAAGGGCTGCGTTGAAGGGCTGCGTTGTGCGTTGCTTGCATATCGTTAAGGGCTGCGTTGATCGTTCTTGCATATCGTTAAGGGCTAACAGCTTGCATCTTGCATTGCGTTGTGCGTTGTGCGTTGCTTGCATATCGTTAAGGGCCTTGCATTAACAGTAACAGAAGGGCCTTGCATTAACAGAAGGGCTAACAGTAACAGCTTGCATAAGGGCCTTGCATTGCGTTGAAGGGCTGCGTTGCTTGCATTGCGTTGAAGGGCCTTGCATTAACAGATCGTTATCGTTTGCGTTGATCGTTTGCGTTGAAGGGCTGCGTTGCTTGCATCTTGCATATCGTTATCGTTCTTGCATTAACAGTAACAGTGCGTTGTGCGTTGATCGTTCTTGCATATCGTTCTTGCATCTTGCATATCGTTCTTGCATTGCGTTGCTTGCATATCGTTTGCGTTGAAGGGCTAACAGTGCGTTGTAACAGTAACAGATCGTTAAGGGCTAACAGCTTGCATATCGTTATCGTTTGCGTTGTAACAGTAACAGATCGTTTGCGTTGATCGTTTGCGTTGCTTGCATTAACAGATCGTTAAGGGCTAACAGAAGGGCAAGGGCCTTGCATTAACAGCTTGCATATCGTTATCGTTCTTGCATTAACAGTAACAGATCGTTATCGTTCTTGCATATCGTTCTTGCATAAGGGCTGCGTTGCTTGCATTGCGTTGAAGGGCAAGGGCTGCGTTGTAACAGCTTGCATTAACAGTAACAGCTTGCATATCGTTATCGTTATCGTTTAACAGCTTGCATCTTGCATCTTGCATCTTGCATAAGGGCAAGGGCTGCGTTGTGCGTTGATCGTTTGCGTTGAAGGGCAAGGGCATCGTTCTTGCATTGCGTTGATCGTTATCGTTTAACAG'

In [217]:
def test_replication(dna_original, dna_replica):
    return dna_original == dna_replica

In [226]:
test_replication(dna_seq, ''.join(replicated_fragments))

True

Falta:
- Dejarlo bonito
- Dataset
