## 1. Complementing a Strand of DNA

In [1]:
def reverse_complement(dna_seq):
    complement = {'A': 'T', 'C': 'G', 'G': 'C', 'T': 'A'}
    rev_seq = dna_seq[::-1]
    rev_comp_seq = [complement[base] for base in rev_seq]
    return ''.join(rev_comp_seq)

In [2]:
dna_seq = 'ATCGATCGATCG'
reverse_complement(dna_seq)

'CGATCGATCGAT'

## 2. Computing GC Content

In [3]:
#Create a list separating id's and sequences
def process_input(fasta):
    fasta_list = fasta.strip().split(">")[1:]
    dna_strings = []
    for string in fasta_list:
      parts = string.split("\n", 1)
      seq_id = parts[0]
      seq_data = parts[1].replace("\n", "")
      dna_strings.append((seq_id, seq_data))
    return dna_strings

#Calculate GC-content
def gc_content(dna_seq):
    gc_count = dna_seq.count('G') + dna_seq.count('C')
    return gc_count / len(dna_seq)

def highest_gc_content(dna_strings):
  highest_gc_id = ""
  highest_gc_content = 0.0
  for string in dna_strings:
    gc = gc_content(string[1])
    if gc > highest_gc_content:
      highest_gc_id = string[0]
      highest_gc_content = gc
  return print(f'{highest_gc_id}\n{round(highest_gc_content*100, 6):.6f}')

In [4]:
fasta = '''>Rosalind_6404
CCTGCGGAAGATCGGCACTAGAATAGCCAGAACCGTTTCTCTGAGGCTTCCGGCCTTCCC
TCCCACTAATAATTCTGAGG
>Rosalind_5959
CCATCGGTAGCGCATCCTTAGTCCAATTAAGTCCCTATCCAGGCGCTCCGCCGAAGGTCT
ATATCCATTTGTCAGCAGACACGC
>Rosalind_0808
CCACCCTCGTGGTATGGCTAGGCATTCAGGAACCGGAGAACGCTTCAGACCAGCCCGGAC
TGGGAACCTGCGGGCAGTAGGTGGAAT'''

highest_gc_content(process_input(input_string))

Rosalind_0808
60.919540


## 3. Translating RNA into Protein

In [13]:
codon_table = {
        'UUU': 'F', 'UUC': 'F', 'UUA': 'L', 'UUG': 'L',
        'UCU': 'S', 'UCC': 'S', 'UCA': 'S', 'UCG': 'S',
        'UAU': 'Y', 'UAC': 'Y', 'UAA': ' ', 'UAG': ' ',
        'UGU': 'C', 'UGC': 'C', 'UGA': ' ', 'UGG': 'W',
        'CUU': 'L', 'CUC': 'L', 'CUA': 'L', 'CUG': 'L',
        'CCU': 'P', 'CCC': 'P', 'CCA': 'P', 'CCG': 'P',
        'CAU': 'H', 'CAC': 'H', 'CAA': 'Q', 'CAG': 'Q',
        'CGU': 'R', 'CGC': 'R', 'CGA': 'R', 'CGG': 'R',
        'AUU': 'I', 'AUC': 'I', 'AUA': 'I', 'AUG': 'M',
        'ACU': 'T', 'ACC': 'T', 'ACA': 'T', 'ACG': 'T',
        'AAU': 'N', 'AAC': 'N', 'AAA': 'K', 'AAG': 'K',
        'AGU': 'S', 'AGC': 'S', 'AGA': 'R', 'AGG': 'R',
        'GUU': 'V', 'GUC': 'V', 'GUA': 'V', 'GUG': 'V',
        'GCU': 'A', 'GCC': 'A', 'GCA': 'A', 'GCG': 'A',
        'GAU': 'D', 'GAC': 'D', 'GAA': 'E', 'GAG': 'E',
        'GGU': 'G', 'GGC': 'G', 'GGA': 'G', 'GGG': 'G',
    }

def translate(rna_seq):
    protein_seq = ''
    for i in range(0, len(rna_seq)-2, 3):
        codon = rna_seq[i:i+3]
        if codon_table.get(codon):
            amino_acid = codon_table[codon]
            if amino_acid == ' ':
                break  # stop translation at the first stop codon
            protein_seq += amino_acid
        else:
            protein_seq += 'X'
    return protein_seq


In [14]:
rna_seq = 'AUGGCCAUGGCGCCCAGAACUGAGAUCAAUAGUACCCGUAUUAACGGGUGA'
translate(rna_seq)


'MAMAPRTEINSTRING'

##4. Inferring mRNA from Protein

In [20]:
#Create codon counts dictionary from codon table
amino_acids = set(codon_table.values()) # Get all unique amino acids
codon_counts = {amino_acid: 0 for amino_acid in amino_acids} # Initialize codon_counts with 0 for each amino acid

for codon, amino_acid in codon_table.items():
    codon_counts[amino_acid] += 1

print(codon_counts)

{'T': 4, 'I': 3, 'V': 4, 'D': 2, 'K': 2, 'R': 6, 'N': 2, 'C': 2, 'Y': 2, 'P': 4, ' ': 3, 'S': 6, 'F': 2, 'E': 2, 'Q': 2, 'W': 1, 'H': 2, 'L': 6, 'A': 4, 'M': 1, 'G': 4}


In [21]:
def count_rna_strings(protein_seq):
    num_possible_codons = [codon_counts[aa] for aa in protein_seq]
    num_possible_rnas = 1
    for num_codons in num_possible_codons:
        num_possible_rnas = (num_possible_rnas * num_codons) % 1000000
    return (num_possible_rnas * 3) % 1000000


In [22]:
count_rna_strings('MAMAPRTEINSTRING')

102976