# Rosalind Bioinformatics Stronghold Solutions

## Prerequisites

In [None]:
from IPython.core.interactiveshell import InteractiveShell
InteractiveShell.ast_node_interactivity = "all"

In [62]:
def read_dataset_str(filename):
    '''Returns a string.'''
    with open(filename) as inf:
        return [i.strip() for i in inf.readlines()][0]

In [63]:
def read_dataset_list(filename):
    '''Returns a list.'''
    with open(filename) as inf:
        return [i.strip() for i in inf.readlines()]

## DNA: Counting DNA Nucleotides

In [30]:
def dna(str_in):
    answer = [str_in.count(nucl) for nucl in ['A', 'C', 'G', 'T']]
    print(*answer)

dna('AGCTTTTCATTCTGACTGCAACGGGCAATATGTCTCTGTGTGGATTAAAAAAAGAGTGTCTGATAGCAGC')
dna(read_dataset_str('rosalind_dna.txt'))

20 12 17 21
246 248 231 264


## RNA: Transcribing DNA into RNA

In [31]:
def rna(str_in):
    return str_in.replace('T', 'U')

rna('GATGGAACTTGACTACGTAAATT')
rna(read_dataset_str('rosalind_rna.txt'))

'GAUGGAACUUGACUACGUAAAUU'

'GGUCACCCGACGAUCCUGGGGGGUCGGAACGGCUGGGACGCAGCCCAGUCCAUAAUACCUGGACGUGGCCCGCCGCUCAUGAGCCUGCGUAACGGGCAAUUCGAACUCAUUACUCAGCUCAGACGAAAAGUAAUUAGUGGUCCGGCUUAUAGCUUGAUAACCCGUAAGUGCUCUAGCCCCCAUACCUUCAAAGGCUAGCGUAGCAAGGGCAGAGAGUCCUGGGUUCAGGCCGUCGAUUGACGAUACAUACCUGAGCGGCUUACUAAAGCUUAGGUAUGUCGGGGGGCCAGACGUAUGCCCCCGCGGAUAAGGUCGUCUUAGGGGUAUCGUCAGUAACAGUUGCAUUAUGGCCCCACGCUGGCAUCGUUUGAAGAUCCGCUUAAAGGCGAAGAAUAAUAAGUUGGGAAUUCUAUCUUCAUCCGAUCCUAUAGUUCUAACCAUGUCAUAGUUCAUAGGUAUAUAGAGUACGCUCAGUGCGGCUAACCUAAAAGUAAAGUGCGUGAUAGGACUGCAGAUGUGAUUCAAAACAGGAGUGUCGAGCACCAUAAGUAUACAAGAACUUCGUGCGGGCCGGCACUAUGCUAUCAUUUAGGACCCGUGUGCUCGAGAAAGGUCUUUGCUGGCAGCUGUGUUGCUAACUACCGUUGAUAUUAAACUGAGCUCGUAAUAUAGAAAUAAGAUGACACAUCAUUCGCAAGCACGCUGUAUGGGCUAUGGUAAAGACCUUAAUGAAAAGGCCAGUAGGCCCGAGCGCAACGCUGCAAUGCGUGGACAACGACCGGCUACCAAUUUUUCCGAAAAGUCGUACUAAGGUAAGACAAGGGCCCCAGUUCGUCCUUCAUGAGGGUAGAAAAUACCCUCGUUGCGACAGUUGCGAUCCCAGAGGAAUUAGUAAUUGACGAAGUCCCCCUAGCCCUCGCCGCCCGUG'

## REVC: Complementing a Strand of DNA

In [39]:
def revc(str_in):
    tr_dict = {'A': 'T', 'T': 'A', 'G': 'C', 'C': 'G'}
    str_out = ''.join(tr_dict.get(nucl) for nucl in str_in)
    return str_out[::-1]

revc('AAAACCCGGT')
revc(read_dataset_str('rosalind_revc.txt'))

'ACCGGGTTTT'

'ACAGAGACGGCGGGTGTGCGTGCCCACAGTTCGAACCCTGGACAACCGCGCAGATGGATGATTTAAGATGCCTCACCTCTTGCATGAGTTGCCTTACACCCGCACTCTATCCGGACCGATCCAACAGACTAGCCCAAAGCTGAGGGAAGACGGGCATAGCGGCACTGCATTAGAAAAACTCCCGCAACACCGGGTAGTCCGAGACCCCCACCCTTAAACATGGTCTAGCGTGGTACAAAGATAGCCCTCGTAATACAACTACCCCTGCAACGTCATCGCAACTTATCGTTGACATACTTGGATAATGCCCTTTTAAGGTTGTGAGTTGGTAATAGTGCCCGGATTGCCGTTGCGGCGGGCAAGTTCTTCCACCAATATCAGGGGTGTTCACCACCTTCGTAGAATTTGACAGATGACCGACCAACAACGCTGTAGGAATCAAGTGAAGTAAACCTGCGGTTCTGAATTTTATGCGTCCTGCCTTGATGGTGGAGTGTAAGTGGACCGTTACGGAATACTCGTACAAAACTGACCCGGGGATCACCGCTCGAACATTCGGCTGCGCATAAACCCGAGGTCCGAATGAGGCCATGGTAAGAGTTAGAGTGGTGACCCTGCGATCGGAGAGAGATAACGGCCGTATGATTGCCTGGCGTCAAGACCCAGGTCCCAGCGTTGGCTTACGATTCCTCAATATCGGTGACGGGAAAGGTGACAAGAGTACAGGTGATCCGACGTCCGGTGTTACGCATACGATCGACAAGCGAAGACTCTACAGGATGCTTTTGATTAGGTATAGGCTTAGGTCACGAGCAGACTAGCCTATGTAGGCCAGCAAATAAATGTCAAGGAAGTTCAGCTCAACGATAAATAGCACGATAATTTGTGGTTCGGTGTACACAAATCATGTCGCCTGGATTGGTGGCGTCCGGACTACTCGCTGTGGCGC'

## GC: Computing GC Content

In [61]:
def fasta_read(fastafile):
    '''Transform fasta in txt into dictionary: name[sequence]'''
    with open(fastafile) as inf:
        fasta_list = [i.strip() for i in inf.readlines()]
    records_dict = {}
    for line in fasta_list:
        if '>' in line:
            new_key = line[1:]
            records_dict[new_key] = ''
            continue
        else:
            records_dict[new_key] += line[:]
    return records_dict

def gc(fastafile):
    records_dict = fasta_read(fastafile)
    GC_content_max = 0

    for record in records_dict.keys():
        dna = records_dict[record]
        GC_content = ((dna.count('G')+dna.count('C'))/len(dna)*100)
        if GC_content > GC_content_max:
            GC_content_max = GC_content
            record_max = record
    print(record_max, round(GC_content_max, 6), sep='\n')

gc('input_gc.txt')
gc('rosalind_gc.txt')

Rosalind_0808
60.91954
Rosalind_2124
53.125


## SUBS: Finding a Motif in DNA

In [84]:
def subs(lst_in):
    string, sub_string = lst_in
    res_lst = [0]
    
    while -1 not in res_lst:
        res_lst.append(string.find(sub_string, res_lst[-1] + 1))
    res_lst = [str(i + 1) for i in res_lst[1:-1]]
    print(' '.join(res_lst))

subs(['GATATATGCATATACTT', 'ATAT'])
subs(read_dataset_list('rosalind_subs.txt'))

2 4 10
5 28 35 45 52 69 76 103 240 262 287 488 506 525 604 663 678 702 739
