## [Counting DNA Nucleotides](https://rosalind.info/problems/dna/)

In [9]:
def count_dna_naive(dna_string):
    num_a = 0
    num_g = 0
    num_t = 0
    num_c = 0
    if dna_string is None:
        return "Invalid string."
    dna_string = dna_string.upper()
    for char in dna_string:
        if char == "A":
            num_a = num_a + 1
        elif char == "C":
            num_c = num_c + 1
        elif char == "G":
            num_g = num_g + 1
        elif char == "T":
            num_t = num_t + 1
    return f"{num_a} {num_c} {num_g} {num_t}"


In [10]:
def count_dna_inbuilt(dna_string):
    if dna_string is None:
        return "Invalid string."
    dna_string = dna_string.upper()
    num_a = dna_string.count('A')
    num_g = dna_string.count('G')
    num_t = dna_string.count('T')
    num_c = dna_string.count('C')
    return f"{num_a} {num_c} {num_g} {num_t}"

In [11]:
from collections import Counter

In [12]:
def count_dna_counter(dna_string):
    if str is None:
        return "Invalid string."
    counts = Counter(dna_string.upper())
    return f"{counts.get('A',0)}  {counts.get('C',0)} {counts.get('G',0)} {counts.get('T',0)}"

In [38]:
from utils import dna_utils

In [39]:
dna_string = dna_utils.read_dna_from_txt_file("../data/rosalind_dna.txt")   
print(dna_utils.benchmark(count_dna_naive, dna_string))
print(dna_utils.benchmark(count_dna_inbuilt, dna_string))
print(dna_utils.benchmark(count_dna_counter, dna_string))

function count_dna_naive: time: 0.07390975952148438, dna_length: 853
function count_dna_inbuilt: time: 0.06103515625, dna_length: 853
function count_dna_counter: time: 0.06198883056640625, dna_length: 853


In [29]:
dna_string = dna_utils.create_dna_string(10000)
print(dna_utils.benchmark(count_dna_naive, dna_string))
print(dna_utils.benchmark(count_dna_inbuilt, dna_string))
print(dna_utils.benchmark(count_dna_counter, dna_string))

function count_dna_naive: time: 0.9570121765136719, dna_length: 10000
function count_dna_inbuilt: time: 0.11420249938964844, dna_length: 10000
function count_dna_counter: time: 0.720977783203125, dna_length: 10000


In [30]:
dna_string = dna_utils.create_dna_string(100000)
print(dna_utils.benchmark(count_dna_naive, dna_string))
print(dna_utils.benchmark(count_dna_inbuilt, dna_string))
print(dna_utils.benchmark(count_dna_counter, dna_string))

function count_dna_naive: time: 15.34581184387207, dna_length: 100000
function count_dna_inbuilt: time: 1.1157989501953125, dna_length: 100000
function count_dna_counter: time: 13.82303237915039, dna_length: 100000


# Benchmark Results:
## 1. Rosalind test DNA string (854 bases)
    function count_dna_naive: time: 0.09393692016601562, dna_length: 854
    function count_dna_inbuilt: time: 0.016689300537109375, dna_length: 854
    function count_dna_counter: time: 0.06031990051269531, dna_length: 854
## 2. Generated DNA string (10000 bases)
    function count_dna_naive: time: 0.9570121765136719, dna_length: 10000
    function count_dna_inbuilt: time: 0.11420249938964844, dna_length: 10000
    function count_dna_counter: time: 0.720977783203125, dna_length: 10000
## 3. Generated DNA string (100000 bases)
    function count_dna_naive: time: 15.34581184387207, dna_length: 100000
    function count_dna_inbuilt: time: 1.1157989501953125, dna_length: 100000
    function count_dna_counter: time: 13.82303237915039, dna_length: 100000
## 4. Generated DNA string (100000000 bases)
    function count_dna_naive: time: 13549.36408996582, dna_length: 100000000
    function count_dna_inbuilt: time: 1398.7159729003906, dna_length: 100000000
    function count_dna_counter: time: 6434.010028839111, dna_length: 100000000
## Result:
count_dna_inbuilt >> count_dna_counter > count_dna_naive
### Reasoning:
Python string methods are implemented in C, and hence are much faster than a Python loop + if-else comparisons. 
For just four known possible characters, the overhead associated with Counter is not worth it.