## Part 1

In [5]:
def complement_base(base):
    """Returns the Watson-Crick complement of a base."""
    if base in 'Aa':
        return 'T'
    elif base in 'Tt':
        return 'A'
    elif base in 'Gg':
        return 'C'
    else:
        return 'G'


def reverse_complement(seq):
    """Compute reverse complement of a sequence."""
    # Initialize reverse complement
    rev_seq = ''

    # Loop through and populate list with reverse complement
    for i in range(len(seq)-1, -1, -1):
        rev_seq += complement_base(seq[i])

    return rev_seq

In [6]:
reverse_complement('GCAGTTGCA')

'TGCAACTGC'

In [39]:
def reverse_complement2(seq):
    """Compute reverse complement of a sequence."""
    # Initialize reverse of sequence
    rev_seq = seq[::-1]
    rev_seq = rev_seq.replace('A', 't')
    rev_seq = rev_seq.replace('T', 'a')
    rev_seq = rev_seq.replace('G', 'c')
    rev_seq = rev_seq.replace('C', 'g')
    rev_compl = rev_seq.upper()
    
    return rev_compl

In [40]:
reverse_complement2('GCAGTTGCA')

'TGCAACTGC'

## Part 2

In [33]:
def longest_common_substring(s1, s2):
    length = 0
    lcs = ""
    
    for i, x in enumerate(s1):         # iterate through the first string
        temp_length = 0
        temp_lcs = ""
        for j, y in enumerate(s2):     # iterate through the second string
            if s1[i] == s2[j]:         # if we find a match, then 
                temp_length = 0
                c = 0
                while s1[i + c] == s2[j + c]:  # loop through both strings while they match
                    temp_length += 1
                    c += 1
                    if (i + c == len(s1)) or (j + c == len(s2)):  # if we have reached the end of either string, then we are done
                        break
                if temp_length > length:       # if this is the longest substring found so far, update our values
                    length = temp_length
                    lcs = s1[i:i+c]
    return lcs                

In [35]:
longest_common_substring('ATGC', 'ATGCA')

'ATGC'

In [36]:
longest_common_substring('GATGCCATGCA', 'ATGCC')

'ATGCC'

In [34]:
longest_common_substring('ACGTGGAAAGCCA', 'GTACACACGTTTTGAGAGACAC')

'ACGT'

## Part 3

In [25]:
def equal_parentheses(p):
    if p.count('(') == p.count(')'):
        return True
    return False

In [38]:
equal_parentheses('(((....)))')

True

In [39]:
equal_parentheses('(((....))')

False

In [27]:
def dotparen_to_bp(seq):
    a = 0
    z = len(seq) - 1
    bp = []
    for i, x in enumerate(seq):
        if x == '(':
            while seq[z] != ')':
                z -= 1
            bp.append((i, z))
            z -= 1
    return bp

In [28]:
dotparen_to_bp('(((..((...)))))')

[(0, 14), (1, 13), (2, 12), (5, 11), (6, 10)]

In [18]:
def hairpin_length(seq):
    i = 0
    while seq[i] != ')':
        i += 1
    j = i
    while seq[j] != '(':
        j -= 1
    return i - j - 1

In [19]:
hairpin_length("(((..((...)))))")

3

In [20]:
hairpin_length("(.(....).)")

4

In [21]:
hairpin_length("((((..)))))")

2

In [22]:
def is_valid_hairpin(seq):
    if hairpin_length(seq) > 2:
        return True
    return False

In [23]:
is_valid_hairpin("(((..((...)))))")

True

In [24]:
is_valid_hairpin("((((..)))))")

False

In [29]:
def rna_ss_validator(seq, sec_struc, wobble=True):
    if not equal_parentheses(sec_struc):
        return False
    if not is_valid_hairpin(sec_struc):
        return False
    bp_list = dotparen_to_bp(sec_struc)
    for bp in bp_list:
        b1 = seq[bp[0]]
        b2 = seq[bp[1]]
        if b1 == 'A':
            if b2 != 'U':
                return False
        elif b1 == 'U':
            if b2 != 'A':
                return False
        elif b1 == 'C':
            if b2 != 'G':
                return False
        elif b1 == 'G':
            if wobble:
                if b2 not in 'CU':
                    return False
            else:
                if b2 != 'C':
                    return False
        else:
            return False
    return True
            
    

In [30]:
rna_ss_validator('GCAUCUAUGC', '(((....)))')

True

In [31]:
rna_ss_validator('GCAUCUAUGU', '(((....)))')

True

In [32]:
rna_ss_validator('GCAUCUAUGU', '(.(....).)')

True

In [33]:
rna_ss_validator('GCAUCUACGC', '(((....)))')

False

In [38]:
rna_ss_validator('GCAUCUAUGU', '(((....)))', wobble=False)

False

In [35]:
rna_ss_validator('GCAUCUAUGU', '(.(....)).')

False

In [36]:
rna_ss_validator('GCCCUUGGCA', '(.((..))).')

False