Day 2
-

### Lesson 10: Dictionaries (hash tables)

In [2]:
my_dict = {'a': 6, 'b': 7, 'c': 27.6}
my_dict

{'a': 6, 'b': 7, 'c': 27.6}

#### Useful dictionaries in bioinformatics

In [3]:
aa_dict = {'A': 'Ala',
           'R': 'Arg',
           'N': 'Asn',
           'D': 'Asp',
           'C': 'Cys',
           'Q': 'Gln',
           'E': 'Glu',
           'G': 'Gly',
           'H': 'His',
           'I': 'Ile',
           'L': 'Leu',
           'K': 'Lys',
           'M': 'Met',
           'F': 'Phe',
           'P': 'Pro',
           'S': 'Ser',
           'T': 'Thr',
           'W': 'Trp',
           'Y': 'Tyr',
           'V': 'Val'}

In [4]:
# The set of DNA bases
bases = ['T', 'C', 'A', 'G']

# Build list of codons
codon_list = []
for first_base in bases:
    for second_base in bases:
        for third_base in bases:
            codon_list += [first_base + second_base + third_base]

# The amino acids that are coded for (* = STOP codon)
amino_acids = 'FFLLSSSSYY**CC*WLLLLPPPPHHQQRRRRIIIMTTTTNNKKSSRRVVVVAAAADDEEGGGG'

# Build dictionary from tuple of 2-tuples (technically an iterator, but it works)
codons = dict(zip(codon_list, amino_acids))

# Show that we did it
print(codons)

{'TTT': 'F', 'TTC': 'F', 'TTA': 'L', 'TTG': 'L', 'TCT': 'S', 'TCC': 'S', 'TCA': 'S', 'TCG': 'S', 'TAT': 'Y', 'TAC': 'Y', 'TAA': '*', 'TAG': '*', 'TGT': 'C', 'TGC': 'C', 'TGA': '*', 'TGG': 'W', 'CTT': 'L', 'CTC': 'L', 'CTA': 'L', 'CTG': 'L', 'CCT': 'P', 'CCC': 'P', 'CCA': 'P', 'CCG': 'P', 'CAT': 'H', 'CAC': 'H', 'CAA': 'Q', 'CAG': 'Q', 'CGT': 'R', 'CGC': 'R', 'CGA': 'R', 'CGG': 'R', 'ATT': 'I', 'ATC': 'I', 'ATA': 'I', 'ATG': 'M', 'ACT': 'T', 'ACC': 'T', 'ACA': 'T', 'ACG': 'T', 'AAT': 'N', 'AAC': 'N', 'AAA': 'K', 'AAG': 'K', 'AGT': 'S', 'AGC': 'S', 'AGA': 'R', 'AGG': 'R', 'GTT': 'V', 'GTC': 'V', 'GTA': 'V', 'GTG': 'V', 'GCT': 'A', 'GCC': 'A', 'GCA': 'A', 'GCG': 'A', 'GAT': 'D', 'GAC': 'D', 'GAA': 'E', 'GAG': 'E', 'GGT': 'G', 'GGC': 'G', 'GGA': 'G', 'GGG': 'G'}


In [5]:
#Note, however, that like lists, the items that come out of the my_dict.items() iterator are not items in the dictionary, but copies of them. 
#If you make changes within the for loop, you will not change entries in the dictionary.

for key, value in my_dict.items():
    value = 'this string will not be in dictionary.'
    
my_dict

{'a': 6, 'b': 7, 'c': 27.6}

In [7]:
for key, _ in my_dict.items():
    my_dict[key] = 'this will be in the dictionary.'
    print (_)
my_dict

this will be in the dictionary.
this will be in the dictionary.
this will be in the dictionary.


{'a': 'this will be in the dictionary.',
 'b': 'this will be in the dictionary.',
 'c': 'this will be in the dictionary.'}

#### Built-in functions for dictionaries

In [8]:
# Create my_list and my_dict for reference
my_dict = dict(a=1, b=2, c=3, d=4)
my_list = [1, 2, 3, 4]

# Print them
print('my_dict:', my_dict)
print('my_list:', my_list)

# Get lengths
print('length of my_dict:', len(my_dict))
print('length of my_list:', len(my_list))

# Delete a key from my_dict
del my_dict['b']

# Delete entry from my_list
del my_list[1]

# Show post-deleted objects
print('post-deleted my_dict:', my_dict)
print('post-deleted my_list:', my_list)

my_dict: {'a': 1, 'b': 2, 'c': 3, 'd': 4}
my_list: [1, 2, 3, 4]
length of my_dict: 4
length of my_list: 4
post-deleted my_dict: {'a': 1, 'c': 3, 'd': 4}
post-deleted my_list: [1, 3, 4]


method	effect
d.keys()	return keys
d.pop(key)	return value associated with key and delete key from d
d.values()	return the values in d

In [10]:
"""Using dictionaries as kwargs
A nifty feature of dictionaries is that they can be passed into functions as keyword arguments. 
We covered named keyword arguments in the Intro to functions lesson. 
In addition to the named keyword arguments, a function can take in arbitrary keyword arguments (not arbitrary non-keyword arguments). 
This is specified in the function definition by including a last argument with a double-asterisk, **. 
The kwargs with the double-asterisk get passed in as a dictionary."""

def concatenate_sequences(a, b, **kwargs):
    """Concatenate (combine) 2 or more sequences."""
    seq = a + b

    for key in kwargs:
        seq += kwargs[key]
        
    return seq
#Let's try it!

concatenate_sequences('TGACAC', 'CAGGGA', c='GGGGGGGGG', d='AAAATTTTT')
'TGACACCAGGGAGGGGGGGGGAAAATTTTT'
#Now, imagine we have a dictionary that contains our values.

my_dict = {'a': 'TGACAC', 
           'b': 'CAGGGA', 
           'c': 'GGGGGGGGG', 
           'd': 'AAAATTTTT'}
#We can now pass this directly into the function by preceding it with a double asterisk.

concatenate_sequences(**my_dict)
'TGACACCAGGGAGGGGGGGGGAAAATTTTT'
#Beautiful! This example is kind of trivial, but you can imagine that it can come in handy, e.g. with large sets of sequence fragments that you read in from a file. We will use **kwargs later in the bootcamp.

'TGACACCAGGGAGGGGGGGGGAAAATTTTT'

In [11]:
import numpy as np


In [13]:
print(np.mean([1,2,3,4,5]))
print(np.median([1,2,3,4,5]))

3.0
3.0


In [15]:
np.mean

<function numpy.mean(a, axis=None, dtype=None, out=None, keepdims=<no value>)>

In [2]:
import na_savenya as na

# Sequence
seq = 'GACGATCTAGGCGACCGACTGGCATCG'

# Convert to RNA
na.dna_to_rna(seq)

'GACGAUCUAGGCGACCGACUGGCAUCG'