# Dictionaries

Dictionaries are what are known as mappings.  In particular a dictionary maps a `key` to a `value`.  The syntax of a dictionary is 

```python
dictionary_name = {key_1:value_1, ..., key_n:value_n}
```

A key to a value is how the value is looked up.  Keys can be numbers strings, tuples, or in general any immutable type. 
```python
dictionary_name[key_k]      # the value associated with key key_k
x = dictionary_name[key_k]  # assigns the value to x
dictionary_name[key_k] = y  # dictionaries are mutable  this changes
                            # the value referenced by key_k
```
Since dictionaries are mutable the value assigned to a key can be changed.

# Using a Dictionary

In [1]:
# make a blank dictionary
dictionary_data = {}
print(type(dictionary_data),'has data', dictionary_data)

<class 'dict'> has data {}


In [2]:
# make a dictionary with some `key:value' pairs
dictionary_data = {'first name': 'Kevin', 'height': 74}
print(type(dictionary_data), dictionary_data)

# look up values in the dictioary using 'key's
print(type(dictionary_data['first name']), dictionary_data['first name'])
print(type(dictionary_data['height']), dictionary_data['height'])

<class 'dict'> {'first name': 'Kevin', 'height': 74}
<class 'str'> Kevin
<class 'int'> 74


We can add a key:value pair to an existing dictionary using the assignment statement.  But if the key already exists in the dictioanry we will change the value associated with that key.

In [3]:
# add an item to the dictionary
dictionary_data['month born'] = 'May'
print(type(dictionary_data), dictionary_data)

# or change an item
dictionary_data['month born'] = 'March'
print(type(dictionary_data), dictionary_data)

<class 'dict'> {'first name': 'Kevin', 'height': 74, 'month born': 'May'}
<class 'dict'> {'first name': 'Kevin', 'height': 74, 'month born': 'March'}


In [4]:
# if you try to use a key that does not exist you will get a KeyError exception
dictionary_data['age']

KeyError: 'age'

In [5]:
# you can use the .get method instead
# if a key does not exist it returns the value None
print(dictionary_data.get('age'))
print(dictionary_data.get('height'))
# instead of None we can return a default
print(dictionary_data.get('age', 'indeterminate'))

None
74
indeterminate


In [6]:
# we can check to see if 'age' is a key
print(f"{'age' in dictionary_data}")

False


In [7]:
# we can iterate through a dictioary
for key in dictionary_data:
    print(f"{key} -> {dictionary_data[key]}")

first name -> Kevin
height -> 74
month born -> March


In [8]:
# we can also delete a dictionary key:value pair
del dictionary_data['month born']
print(type(dictionary_data), dictionary_data)

# or we can clear the dictionary
dictionary_data.clear()
print(type(dictionary_data), dictionary_data)

# or we can delete the entire dictioary
del dictionary_data
print(type(dictionary_data), dictionary_data)  # This will cause an error since dictioanry_data no longer exists

<class 'dict'> {'first name': 'Kevin', 'height': 74}
<class 'dict'> {}


NameError: name 'dictionary_data' is not defined

# Conversion Between Data Structures

In [9]:
ls = [1, 2, 3, 1]
print(ls)

tp = tuple(ls)  # convert list to tople
print(tp)

ls_for_dictionary = [('one',1),('two', 2)]  # list of tuples
dt = dict(ls_for_dictionary) # convert list of tuples to dictionary
print(type(dt), dt)

dts = str(dt)  # convert dictioary to string
print(type(dts),dts)

[1, 2, 3, 1]
(1, 2, 3, 1)
<class 'dict'> {'one': 1, 'two': 2}
<class 'str'> {'one': 1, 'two': 2}


# Dictionary Functions and Methods

First lets build a function to generate all the primes up to some arbitrary integer num_max.


In [10]:
def primes_to(num_max):
    ''' Function to build a dictionary of primes up to num_max 
        by brute force.  Don't make num_max too large.  this 
        method starts to slow down after num_max = 10,000'''
    primes = {0: 2}
    key = 1
    for k in range(3, num_max + 1):
        not_divisible = True
        
        for j in primes:
            if k%primes[j] == 0:
                not_divisible = False
                break
                
        if not_divisible:
            primes[key] = k
            key = key + 1
    return primes

num_max = 10000
primes = primes_to(num_max)
print(f"There are {len(primes)} prime numbers between 1 and {num_max}")


There are 1229 prime numbers between 1 and 10000


In [11]:
# lets build a reverse look up dictionary
prime_to_index = {}
for key in primes:
    prime_to_index[primes[key]] = key


# where is the prime 149
print(f"{149} is the {prime_to_index[149] +1}'th prime")

# is 227 prime
number = 511

if number in prime_to_index:
    print(f"{number} is a prime number")
else:
    print(f"{number} is not a prime number")
    

149 is the 35'th prime
511 is not a prime number


## The .keys() and .values() and .items() methods 

In [12]:
prime_numbers = list(prime_to_index.keys())
149 in prime_numbers

True

Starting with Python 3.6, dictionaries preserve the
order that the keys were created and return them in that order

In [13]:
some_integers = list(prime_to_index.values())
#print(some_integers)

In [14]:
def dictionary_printer(d, n):
    """print first n items of a dictionary"""
    m = 0
    for k, v in d.items():
        if m < n:
            print (f"dictionary[{k}] -> {v}")
            m = m + 1
    print()

dictionary_printer(primes, 10)

dictionary[0] -> 2
dictionary[1] -> 3
dictionary[2] -> 5
dictionary[3] -> 7
dictionary[4] -> 11
dictionary[5] -> 13
dictionary[6] -> 17
dictionary[7] -> 19
dictionary[8] -> 23
dictionary[9] -> 29



##  List and Dictionary Comprehensions

In list comprehensions we use square brackets, `[]`,  and a single target variable using the following general format.

```python
x = [f(y) for y in iterable if g(y)]  # x is a list
```

In dictionary comprehensions we use curly brackets, `{}`, and a key, value target using the following general format,

```python
x = {k:v for (k, v) in iterable if g(k, v)}
x = {k:v(k) for k in iterable if g(k)}
```


In [16]:
squares = [y**2 for y in range(1, 100)]
print(squares)

[1, 4, 9, 16, 25, 36, 49, 64, 81, 100, 121, 144, 169, 196, 225, 256, 289, 324, 361, 400, 441, 484, 529, 576, 625, 676, 729, 784, 841, 900, 961, 1024, 1089, 1156, 1225, 1296, 1369, 1444, 1521, 1600, 1681, 1764, 1849, 1936, 2025, 2116, 2209, 2304, 2401, 2500, 2601, 2704, 2809, 2916, 3025, 3136, 3249, 3364, 3481, 3600, 3721, 3844, 3969, 4096, 4225, 4356, 4489, 4624, 4761, 4900, 5041, 5184, 5329, 5476, 5625, 5776, 5929, 6084, 6241, 6400, 6561, 6724, 6889, 7056, 7225, 7396, 7569, 7744, 7921, 8100, 8281, 8464, 8649, 8836, 9025, 9216, 9409, 9604, 9801]


In [17]:
dict_squares = {x: x**2 for x in range(4)}
print(dict_squares)

{0: 0, 1: 1, 2: 4, 3: 9}


In [15]:
alt_prime_to_index = {v:k for k,v in primes.items()}
dictionary_printer(alt_prime_to_index, 10)

dictionary[2] -> 0
dictionary[3] -> 1
dictionary[5] -> 2
dictionary[7] -> 3
dictionary[11] -> 4
dictionary[13] -> 5
dictionary[17] -> 6
dictionary[19] -> 7
dictionary[23] -> 8
dictionary[29] -> 9



## Example Uses of Dictionaries

In [18]:
# simple word count program
text = "life is like a box of chocolates you never know what you will get next you never know"
word_count = {}
for word in text.split():
    word_count[word] = word_count.get(word, 0) + 1
    
for word in word_count:
    print(f"The word '{word}' occurs {word_count[word]} times in the string.")

The word 'life' occurs 1 times in the string.
The word 'is' occurs 1 times in the string.
The word 'like' occurs 1 times in the string.
The word 'a' occurs 1 times in the string.
The word 'box' occurs 1 times in the string.
The word 'of' occurs 1 times in the string.
The word 'chocolates' occurs 1 times in the string.
The word 'you' occurs 3 times in the string.
The word 'never' occurs 2 times in the string.
The word 'know' occurs 2 times in the string.
The word 'what' occurs 1 times in the string.
The word 'will' occurs 1 times in the string.
The word 'get' occurs 1 times in the string.
The word 'next' occurs 1 times in the string.


In [20]:
# build a sparse matrix using a dictionary with tuple keys

identity_matrix = {}
for k in range (10000):
    diag = (k, k)
    identity_matrix[diag] = 1.0

dictionary_printer(identity_matrix, 10)

dictionary[(0, 0)] -> 1.0
dictionary[(1, 1)] -> 1.0
dictionary[(2, 2)] -> 1.0
dictionary[(3, 3)] -> 1.0
dictionary[(4, 4)] -> 1.0
dictionary[(5, 5)] -> 1.0
dictionary[(6, 6)] -> 1.0
dictionary[(7, 7)] -> 1.0
dictionary[(8, 8)] -> 1.0
dictionary[(9, 9)] -> 1.0



In [21]:
# to use the whole matrix we can use code like this
def get_value(m, i, j):
    """function to return the i,j element of the sparse matrix m
       m is a dictionary with tuple keys of the form (i,j)"""
    
    if (i, j) in m:
        return m[(i, j)]
    else:
        return 0.0

get_value(identity_matrix, 500, 500)

1.0

# Copying Dictionaries 

Copy and Deepcopy 


Notice below that using assignment `g = f` simply points to the same dictionary in memory.  
On the other hand, using `f.copy()` makes a new dictionary which is a copy of f.

In [None]:
f = {'a':'A', 'b':'B', 'c':'C'} 
g = f
print(f"dictionary {f} is at {id(f)} while \ndictionary {g} is at {id(g)}")
f['d'] = 'D'
print()
print(f)
print(g)
print()
h=f.copy()
print(f"dictionary {f} is at {id(f)} while \ndictionary {h} is at {id(h)}")

You can run into difficulties using `.copy` as shown in the following example.

In [None]:
# assignment works by pointing to the same data structure
f = [{'one':1, 'two':2}, {'a':'A', 'b':'B'}]
g = f
print(f"list {f} is at {id(f)} while \nlist {g} is at {id(g)}")
print()

h = f.copy()
print(f"list {f} is at {id(f)} while \nlist {h} is at {id(h)}")
print()

f[0]['one'] = 4  # we can change this value to 4 in the first dictionary of list f
print(f"list {f} is at {id(f)} while \nlist {h} is at {id(h)}")  # but it also changed the vlue of h 
print()


To fix this we have to use deepcopy in the builtin copy module

In [None]:
import copy
# assignment works by pointing to the same data structure
f = [{'one':1, 'two':2}, {'a':'A', 'b':'B'}]

h = copy.deepcopy(f)
print(f"list {f} is at {id(f)} while \nlist {h} is at {id(h)}")
print()

f[0]['one'] = 4  # we can change this value to 4 in the first dictionary of list f
print(f"list {f} is at {id(f)} while \nlist {h} is at {id(h)}")  # and now it does not change in h
print()

# A Few Interesting Things

## Pretty Print Function pprint

In [26]:
import pprint
dict_squares = {x: x**2 for x in range(15)}
print(dict_squares)
print()
pprint.pprint(dict_squares)

{0: 0, 1: 1, 2: 4, 3: 9, 4: 16, 5: 25, 6: 36, 7: 49, 8: 64, 9: 81, 10: 100, 11: 121, 12: 144, 13: 169, 14: 196}

{0: 0,
 1: 1,
 2: 4,
 3: 9,
 4: 16,
 5: 25,
 6: 36,
 7: 49,
 8: 64,
 9: 81,
 10: 100,
 11: 121,
 12: 144,
 13: 169,
 14: 196}
[1, 3, 5, 7, 9, 11]


## Timing Code

In [30]:
import timeit

def test_concat():
    l = []
    for i in range(1000):
        l = l + [i]

def test_append():
    l = []
    for i in range(1000):
        l.append(i)

def test_comp():
    l = [i for i in range(1000)]

In [32]:
# timing various ways to build a list 

t1 = timeit.Timer("test_concat()", "from __main__ import test_concat")
print(f"concat {t1.timeit(number=1000)}, milleseconds.")
t2 = timeit.Timer("test_append()", "from __main__ import test_append")
print(f"append {t2.timeit(number=1000)}, milleseconds.")
t3 = timeit.Timer("test_comp()", "from __main__ import test_comp")
print(f"comprehension {t3.timeit(number=1000)}, milleseconds.")

concat 1.025269699999967, milleseconds.
append 0.054182100000048194, milleseconds.
comprehension 0.02637640000000374, milleseconds.


## unicode

In [34]:
# unicode with chr()
for k in range(33, 127):
    if k % 6 == 0: 
        print()

    print(f"{k:6} = {chr(k)} ", end ='')
print()
print()

for k in range(945, 970):
    if k % 6 == 0: 
        print()

    print(f"{k:6} = {chr(k)} ", end ='')

    33 = !     34 = "     35 = # 
    36 = $     37 = %     38 = &     39 = '     40 = (     41 = ) 
    42 = *     43 = +     44 = ,     45 = -     46 = .     47 = / 
    48 = 0     49 = 1     50 = 2     51 = 3     52 = 4     53 = 5 
    54 = 6     55 = 7     56 = 8     57 = 9     58 = :     59 = ; 
    60 = <     61 = =     62 = >     63 = ?     64 = @     65 = A 
    66 = B     67 = C     68 = D     69 = E     70 = F     71 = G 
    72 = H     73 = I     74 = J     75 = K     76 = L     77 = M 
    78 = N     79 = O     80 = P     81 = Q     82 = R     83 = S 
    84 = T     85 = U     86 = V     87 = W     88 = X     89 = Y 
    90 = Z     91 = [     92 = \     93 = ]     94 = ^     95 = _ 
    96 = `     97 = a     98 = b     99 = c    100 = d    101 = e 
   102 = f    103 = g    104 = h    105 = i    106 = j    107 = k 
   108 = l    109 = m    110 = n    111 = o    112 = p    113 = q 
   114 = r    115 = s    116 = t    117 = u    118 = v    119 = w 
   120 = x    121 = y    122