In [1]:
%load_ext autoreload
%autoreload 2

In [None]:
from __init__ import *

# Task 1 - Counting DNA bases

In [7]:
import collections

with open("problems/rosalind_dna.txt", 'r') as f:
    seq = f.read().strip()
counts = collections.Counter(seq)

print(f"{counts['A']} {counts['C']} {counts['G']} {counts['T']}")

219 211 216 190


# Task 2 - Transcribing into RNA

In [None]:
seq = readFile("problems/rosalind_rna.txt")
seq = seq.replace('T', 'U')

print(seq)

GGGCGUGCUGAUGCUAUUGGCAUCCACACCAAUUGAGUGCUCAUAUUAUGAACACCUCUGCUUAUGUGUGAGCCUCCCUCGCCAUCAGUGUUCAACCUGAAGAAGAUGGAGUAGCUAUUCGGUUGGCUAAACGCGCGAUUGGCAACGCGCCUGUAAUUGUCAAGUCAGAGUCCCAGAUAUGUCGCAGACCGUUGGAGUCGCCAUGGACAAGCUCGCUUGGUGGUAGUAUUUGCCGGCAGGAGAAACUAUACUGCAUGUGCGCGCCGCUAUACACGUGGCCGCCCCCACAAGCGCGUCAUUACAGUCCGGCCUGCGCCCACCUCGCCUCGUGCUUGCCAACAGUUCGAGUAGCUAUAAGUUGACGGAACCAAUGUUCACCAAAAGUCGCCACGUAGUAUACAAUUUCAGCAAGAAUUCAAUAUGGCUGUCGAAGCGGGAGUAUGGUUAGCGCACAUCGUACUUGCCUCCCGAUUGCCAGAUGUCUCGGCAUUGCUAGCUGACCCUGUAAUCAGCCGCAAAUGCACAUGGAGUCGAAAGGGAGACCGAGCUACGCCGAUCGGCGAUCGUCAAGUAAGGGCGUGGUUGACGAUUAGACACUGUAGCACAUUGGCCGUUAUGGUCAGCAGGGGAGUUCUGGAUUGUUCAGGCAGCGCGUGACUUCAAGCUCAGCGGUGCAAACAACCUGACGUGUUAAGAAUUGGUGUCCAUGCAACUUUGGGCCCAUCAAAAGCAGCUUAGUACAUGGUGGUCCCCUCAGACUCUUGGCGCCACCGAGUGAGAAUGAGCAAAUUCGGAAGAGCGAUUGAAACGAUCGCUCAUCAUGAAGCCUAAGAUUACUUGUGUGUGGCCGAAAGGACUAACGGUCAGACUCAACUCACAAACUACAAGGCGUUACCUGUGAACGUUCGAUCGAGCCAUGGUUUAUGUUUUAAGUG


# Task 3 - Reverse complementing DNA

DNA sequences have directions which run from 5' to 3' with 5' being the phosphate end and 3' being the hydroxy end. String representions are also ordered as such. So to form a sequence complementary to another, not only should the bases be flipped to their complements, the order of the sequence is also reversed.

```
5' ---> 3'
ATCGTAGCAG <- input sequence
TAGCATCGTC <- output sequence
3' ---> 5'

output sequence (5'-3'): CTGCTACGAT
```

In [None]:
complementTable = str.maketrans("ACGT", "TGCA")

seq = readFile("problems/rosalind_revc.txt")
seq = seq.translate(complementTable)[::-1]

print(seq)

TGCAGGCAGGGTCTATTTACTGATCGTACTTGTACAACGAAGCCTCTAGGAGGAGTCACATACTGTTCCGTTAGCAACACCATTACCGATCGAACGAGTGACACCGCGTGGCCCATCGGGCTAGCGGGCTCCCTACTGGCCGTAGGTGATCGATACACGAAAGCCAGACAAGTCGCGGATGGGTATGAGCCTACTAGAGTGCACTGGGGTAAGTGACTAGTTCGATCCTTTTATCGATCGTGATCCAGCGCGGGCAAGACGACACCAATGCAAAGATCCTCCGTCGTGTGGATCGCCCTGCCAGTCGAGGCGCGGGGTCAAAAGATCAGGTGGTCAATCTACAGTGCCATCTGTATTGTGAGGGAGAATTTTAAAGTATAGCGTCGATCAAAGTCGGTTAATCAGTTAACTTAGCGGATGTCCACGGAGCGTCGTAGCAGGATTTTGTCCCGTAACGAAGTGGTCAACGGGTGTAGTTCAGTAAGGAAAGGCAATGCCAGGCCTCGGTTCAGCAGGCTGGGATTGACTGGATCGCGTCATAGGCTTCAAAGCACGTGTATATTCAAGTAGCCTCCTTCTTACGGAGTAGTAGACGGAAAACGGTGAGTAGCAACAAGTAATCAAAGACGGTATCTAACCACCAGAGCGTACAAGTGAAAAGGTATGGTTAACCGTTGCGAATAGGAAAGCTTGAGAGATAGTCGGGTGGCATCTGAACAATCGCTAACTTGCAGCCATATTATGTCCCTATTCGAGACAGTGCTTTGGTAGTGAGGTTACAGCTTCGAATGATAAGAAACTAAACGAATCACTCGTACGTCATGGAACGGCCCCTAATAGGTTAAACACAGGTGTGGTCCACAGTATTGTGAGCCGGTATGACCGGTTCTTGTCACTGTGCTCAGATCCTGGAGGTGCTCATTCGCTGTGACCTGGGGTCCAGTGGGCCTTGAATCTGAGGGCTCCATTATTACG


# Task 4 - GC Content

In [None]:
seq = DNASeq(randomSeq())
gcCount = seq.count('G') + seq.count('C')
content = gcCount / len(seq)

print(f"{content * 100}%")


46.0%


## Rosalind Problem
using FASTA format as input

In [None]:
seqs = readFasta("problems/rosalind_gc.txt")
seqs = {key: (s.count('G') + s.count('C')) / len(s) * 100 for key, s in seqs.items()}
maxGC = max(seqs, key=seqs.get)

print(maxGC)
print(round(seqs[maxGC], 3))

Rosalind_0469
55.056


# Task 5 - Codons

## Transcribing DNA to Protein

In [None]:
seq = randomSeq()

pseq = ''
for i in range(0, len(seq) // 3):
    frame = seq[i*3:i*3 + 3]
    p = dnaCondons.get(frame)
    assert(p != None)
    pseq += p

print(seq)
print(''.join([ ' ' + p + ' ' for p in pseq ]))

CACATCGGGTGGATGTAGCTGGAAACCCAGCGAGGGCCGTTAACTGCCGG
 H  I  G  W  M  _  L  E  T  Q  R  G  P  L  T  A 


## Calculating codon frequency

In [22]:
from collections import Counter

targetProtein = 'M'
seq = utils.randomSeq()

codons = []

for i in range(0, len(seq) // 3):
    frame = seq[i*3:i*3 + 3]
    if dnaCondons[frame] == targetProtein:
        codons.append(frame)

freqs = {codon: count / len(codons) for codon, count in Counter(codons).items()}

for codon, freq in freqs.items():
    print(f"{codon}: {freq * 100}")

ATG: 100.0
