## 001 - Counting DNA Nucleotides

In [1]:
with open('Data/001_rosalind_dna.txt') as data:
    dna = str(data.readlines())

def count_bases(dna_string):
    A = dna_string.count('A')
    C = dna_string.count('C')
    G = dna_string.count('G')
    T = dna_string.count('T')
    return A, C, G, T

count_bases(dna)

(228, 258, 236, 247)

## 002 - Transcribing DNA into RNA

In [2]:
with open('Data/002_rosalind_rna.txt') as data:
    dna = str(data.readlines())

def dna_to_rna(dna_string):
    return dna_string.replace("T", "U")

dna_to_rna(dna)

"['AGUGCGUAUGCGAGGGGUGAAGAUAGCUAUUCCAGCCUGUCAAGUGAAGCGCGGAUCACGGAGGAGAGGAAUAAGUUUUAGGCGGUUCGUUUGAGGAUUUGGUGAUGAGAUUUCGCAUGCGGCGGUCUUCAUUAGUUUGACCAAGUAACUUCUGGUUUCGGCCAUUGGGCAUUGCAGGGCUAUCCAGCCGGAGUUUUGACAGGUUCACAACCGCCCUAGGAGCUCGGAUUAAGUCACGGGCCCUAGAGGGCUUGGGACGCUGGGAAAGUAUGGAUACUGCGUGGACAGCGAUCCUGAUUCUGUGGAACUAAACGCGGCCUUGACGCAGUGUCAUGUUUCCAUGAGUUAGGCCUUAUAGCAGGAAUGGCUUUUCGAGCGCCGUAUCUCGUGACAAACGAAGUCAGGGUCCGGAUGGUGCAUAUGCUAUUGUGGUACUCAACGGGCUAGUCCGGUGCUUUUAUGGGCGCUGUUCGCAACAGAUACGAGCUGCUAGGUGAGCGGUCUAAGCCAUGGUGCGUAGGUGACCCCCAUGAACGGCCUGAAGGCUCCACGAUGGGUGUAUUCAGUCAUCCGAGAAUCCCGAGUUACAUGGUCGACAUAAUGCUUAUGUAUCGGGGGACAUCUGCACGUGUGAGGGAUGACAGUUUUAGGUUGAGAAAAGACACCUGUCUCCGUAACUCUUUAAAUUUGCCAUGGCGCGUCUACUCCCAUUUAAUCAGAAGCGUGGAUAGUCGUUUCGACGGGGACACUGAAGCUCGCCCAAAAGCAGCAUUCGACCCGGAGGAGUAUGGCCUCUAUGUGGAGAACGCAUGAAGGGACGUACCGCGCGGUGGAACCGGAUCACAGCCUCACAUCCCCCGCCCGGCCCACCGCAUCCAGAUUUGCCAGGCAUGAAGGUAGUCCAACGACUC\\n']"

## 003 - Complementing a Strand of DNA

In [3]:
with open('Data/003_rosalind_revc.txt') as data:
    dna = str(data.readlines())

def find_reverse_complement(dna_string):
    s_c = dna_string.replace('A', '%temp%').replace('T', 'A').replace('%temp%', 'T')
    s_c = s_c.replace('G', '%temp%').replace('C', 'G').replace('%temp%', 'C')
    s_c = s_c[::-1]
    return s_c

find_reverse_complement(dna)

"]'n\\GATGAAGCCATTCACTACCCAAAACCTAAAACAGGCATTACATTCTCCGTGGGCAATTTTTTACGGAAAATGGTGCAGAAGGAAAGTCTCCATACATTATTATGGGGAGCTTACTCGCTCACAACGGGGACGACGACCGCCAAGGACGGAGTTGATAACGTATCTACGATATTGAGGTCTCGTCAAACAGTTAAAACGTGTTGCATGAAGCAGGGTTACTTTCACAGAATCAATATCCTTCTCGTCATGTATGATCAAGAGTCGCCCGAGATGGAATCCCCGTAACGGATTACGTAAAACTTACTAAGGTGAGCAGCGCTTAGACTATGATTCTACCGATGTATCGCAAAGCCTTTCGTAGCGCCTTGCGATAAGGGAGCCTAATGGTTGCTACTGCTCTCGGCGATTCTCGTGGAGTTTTTTGAATACCCAAACAGGACCTGCTCTGCATGCTCAATCTCCCATAGTGGGAACCCTAACTCAAATAATTTGATGCGAAACCACAATCCACGACAGGGCCAAAAGCCAAGGTAATGTTGGTATGGTATCTCCAGGTTAAAGCCGGTGTAATGTGTTGACGGCGAGCGATTGAGATCCTATGGGGTCTCCGCAGATCTGGGACGAATCAGTGCTATAGGGCTCTCACATGATACGGAACGCCCAAATCGATATGGTGATGACTCAAGGATAGCGTTATACCCCCCCTGCCACGGGGACTCATCAACCAGTCCTTTATTGCACCTCCTCCCGTTTCACATAGTCAAGCGTTCCGCGTGCGTGGTTACCACGTCGTTCTCTCATTGTTCAATCGTGTCTCGATATGTGAACCGGAATTAACAGACCCCTAAGTATTTAGGATAGGTAGTG'["

## 004 - Rabbits and Recurrence Relations

In [4]:
def find_fibonacci_rabbits(n, k, i):
    # find the number of rabbits at the nth month
    # starting with i newborn pairs
    # each mate producing a litter of k rabbit pairs
    rep = 0 # reproductive pairs
    new = 1 # newborn pairs
    for j in range(1, n):
        temp = rep
        rep = rep + new
        new = 3 * temp

    return rep + new

find_fibonacci_rabbits(30, 3, 1)

20444528200

## 005 - Computing GC Content

In [5]:
from Bio import SeqIO
import pandas as pd

In [6]:
def find_gc_content(dna_string):
    bases = count_bases(dna_string)
    return (bases[1] + bases[2]) / len(dna_string)

In [7]:
with open('Data/005_rosalind_gc.fasta') as fasta_file:
    identifiers = []
    gc_contents = []
    for seq_record in SeqIO.parse(fasta_file, 'fasta'):
        identifiers.append(seq_record.id)
        gc_contents.append(find_gc_content(seq_record.seq)*100)

In [8]:
df = pd.DataFrame(zip(identifiers, gc_contents), columns = ("id", "gc_content"))
df

Unnamed: 0,id,gc_content
0,Rosalind_9535,50.345622
1,Rosalind_5771,48.17898
2,Rosalind_4778,49.739854
3,Rosalind_0462,46.338798
4,Rosalind_0099,49.594438
5,Rosalind_6528,51.398964
6,Rosalind_6101,52.696629
7,Rosalind_6960,50.378378
8,Rosalind_0980,50.0


In [9]:
print(df[df.gc_content == df.gc_content.max()])

              id  gc_content
6  Rosalind_6101   52.696629


## 006 - Counting Point Mutations

In [10]:
data = open("Data/006_rosalind_hamm.txt", "r")
pair = data.read().splitlines()
count = 0
for i in range(0, len(pair[1])):
    if pair[0][i] != pair[1][i]:
        count += 1

count

497

## 007 - Mendel's First Law

In [11]:
k = 27  # AA
m = 26  # Aa
n = 17  # aa

T = k + m + n

pr =  (n / T) * ((n - 1) / (T - 1)) * 1     # aa & aa
pr += (m / T) * (n / (T - 1))       * 0.5   # Aa & aa
pr += (n / T) * (m / (T - 1))       * 0.5   # aa & Aa
pr += (m / T) * ((m - 1) / (T - 1)) * 0.25  # Aa & Aa

1 - pr

0.8185300207039338

## 008 - Translating RNA into Protein

In [12]:
from Bio.Seq import Seq

In [13]:
data = open("Data/008_rosalind_prot.txt", "r")
rna  = Seq(data.read())

In [14]:
print(rna.translate())

MVIRSLATPLHRDSRISEPRGRLFTMVAASIFSWNCIRAPSFYTCSVGTAGEPIRGAGTHFCWSPNALAMVCGQVRLNHHRSTRKPTRPTCALRGLTAGTLLQRGVFVRYNLEERLHGLRLKLCGRTSLTLYDLDIGIGHDFRYTRVLKANSGRSYSSLHPDAVPLGGCGIRQKSPPCVKRSRVKEPPAIHLRISREEFVLRYVELLSKDPSDVNLMARPNVGLAAIPSGVVCVNWLSYRKLNPLSVIQLGHTGWQHCSNGENLMHVPGTNGTHFSYQRLNCDNVIVAVAAIFLEIVAAHARRELARCPAGAKRAALEFHPSSCCLHQLRGLSVPSRVLPIKYDSLTTSEKRLTDEGVYNITYGIDDPTSSDRTRYSRYSIIASAILSRAKLPVLDLPLTRLSTPYPEIWESAPIPLQLWYISQLNRFYCRGLTISHQKLCSAIHPSGPWEFVLLGPACNGIATVCRRSRHTLAHWAYWVLTRATQLVYPTSKPTTCREIFNSAPGKYEENFNFPLNQPRFLPACDRSKLHNSQLNAHRQFTRRSATEMPYIRLSIRNRGDQGLRKMPTICDELINVISDELILFFHRHCLHTSVGPLRSSQPTRTGKQPGPVRRVSPTTSSPTFHLTEPSRYLNRRHNVHTIPVSPVGSRALHTFAVFPPRCLAVHGIEQHYTRCSGRNDALRPLYRAMEAHECQLGRQQHSSLSSIRRSHAICCDVYSLESTASRSPTHCLKIRSKNCEGKRRTKTAFCTAILRITAAVIASRGSQELARTGAARMRAQVQTTKPGRLYEPQRRAIIDILKSTLYDVKNQSILTYSVTNDSISQVSHPPCLWKFTDGNARNFDSVTRVSIGGKLHCENLIFAGLWSKWSEQRPGPRSKGLSVTSRPGILYAQCHSPEYRSGARLREHFKVDSKGSLYCRNNVPPRFVPFGGTGTSLSHRIFRSRSCPLEPGHPLANSLNCHLESEHPGTNYKGAFQRIFNNVSWVSSEASKCGPTGNV



## 009 - Finding a Motif in DNA

In [15]:
data = open("Data/009_rosalind_subs.txt", "r")
dna  = data.read().splitlines()

def find_instances(dna_string, subset):
    out = []
    for i in range(0, len(dna_string)):
        if (dna_string[i:i+len(subset)] == subset):
            out.append(i+1)
    return out

find_instances(dna[0], dna[1])

[5,
 12,
 28,
 46,
 53,
 104,
 187,
 194,
 274,
 389,
 396,
 411,
 475,
 518,
 558,
 573,
 601,
 659,
 666,
 683,
 771,
 828,
 853,
 869,
 876,
 901,
 916]

## 010 - Mortal Fibonacci Rabbits

In [29]:
def find_mortal_fibonacci_rabbits(n, m):
    # find the number of rabbits at the nth month
    # starting with 1 newborn pair
    # each mate producing one rabbit pair
    # and live for m months
    rep = 0             # reproductive pairs
    new = [0] * (m+1)   # newborn pairs
    new[-1] = 1

    for j in range(1, n):
        new.pop(0)
        new.append(rep)
        rep = rep + new[-2] - new[0]

    return rep + new[-1]

find_mortal_fibonacci_rabbits(86, 16)

414110157126129233