Python for Everybody
## Chapter 09:  Dictionaries

In [76]:
# A dictionary is like a list, but the index positions can be almost any type.
# The set of indices in a dictionary are called 'keys'.  Each key maps to a value.

d = dict()     # create a dictionary.  'dict' is a built in function, so don't use it as a variable name
print(d)

d = {}         # create a dictionary with no items
print(d)

# ----- Adding key-value pairs -----
d['John'] = 30   # add a key-value pair using [].  key = John, value = 30
print(d)
d['Jane'] = 28   # add another key-value pair.
print(d)

# ----- Retrieving a value from a specific key -----
print("---")
print(d['Jane']) # retrieve a value of a specific key
print(d['John'])

# ----- Finding how many key-value pairs are in the dictionary -----
print("---")
print(len(d))

# ----- Finding out if a particular key is in the dictionary -----
print("---")
print('Jane' in d)
print('Mike' in d)

# ----- Creating a list of keys/values -----
print("---")
print(d.values())
print(list(d.values()))
print(d.keys())
print(list(d.keys()))

# ----- Finding out if a particular value is in the dictionary -----
print("---")
vals = list(d.values())
print(30 in vals)
print(300 in vals)


{}
{}
{'John': 30}
{'John': 30, 'Jane': 28}
---
28
30
---
2
---
True
False
---
dict_values([30, 28])
[30, 28]
dict_keys(['John', 'Jane'])
['John', 'Jane']
---
True
False


In [77]:
# The order of items in a dictionary is unpredicable!

a = {'dog' : 4, "kangaroo" : 3, "snake": 0, "penguin" : 2, "spider" : 8, "octopus" : 2}

# Notice that the order of the items in the dictionary is different when it is printed.

print(a)


{'spider': 8, 'penguin': 2, 'dog': 4, 'octopus': 2, 'kangaroo': 3, 'snake': 0}


In [1]:
# 9.01 Dictionary as a set of counters

# Here is a very simple example of using dictionary implementation to count how many times a letter appears.

word = "banana"
d = {}                              # create empty dictionary
for char in word:                   # traverse the string
    if char not in d:               # if the character is not in the dictionary
        d[char] = 1                 # create a new dictionary item with the initial value 1
    else:                           # if the character is already in the dictionary
        d[char] = d[char] + 1       # increment d[char] (increase it by one)

print("d = {} ".format(d))
print("---")
for x in d:
    print("There are {} instance(s) of \'{}\'.".format(d[x], x))
    
print("---")    
a = {'dog' : 4, "kangaroo" : 3, "snake": 0, "penguin" : 2, "spider" : 8, "octopus" : 2}
default = -1
print(a.get('dog', default))     # the get method returns the 2nd argument if the key does not exist
print(a.get('cat', default))


d = {'b': 1, 'a': 3, 'n': 2} 
---
There are 1 instance(s) of 'b'.
There are 3 instance(s) of 'a'.
There are 2 instance(s) of 'n'.
---
4
-1


In [4]:
# 9.02 Dictionaries and files

# Dictionaries are commonly used to count how many times words appear in a text.

fname = "text/moby_dick.txt"
try:
    fhand = open(fname)
except:
    print('File cannot be opened:', fname)

# Create a dictionary with all the words in the file as keys, and the number of occurences as values

counts = dict()
for line in fhand:                         # outer loop reads the lines of the file
    words = line.split()
    for word in words:                     # inner loop reads iterates through the words of the line
        if word not in counts:
            counts[word] = 1
        else:
            counts[word] += 1
            
print(counts)
# pp.print(counts)    # pretty printing

# Words with a punctuation (like "me." and "me,") are not counted accurately.
# We'll solve this problem in 9.04 below. 

print("---")
print(counts['me'])
print("---")
print(counts['me.'])
print("---")
print(counts['me,'])


{'Call': 1, 'me': 3, 'Ishmael.': 1, 'Some': 1, 'years': 1, 'ago—never': 1, 'mind': 1, 'how': 1, 'long': 1, 'precisely—having': 1, 'little': 2, 'or': 2, 'no': 1, 'money': 1, 'in': 4, 'my': 4, 'purse,': 1, 'and': 7, 'nothing': 2, 'particular': 1, 'to': 5, 'interest': 1, 'on': 1, 'shore,': 1, 'I': 9, 'thought': 1, 'would': 1, 'sail': 1, 'about': 2, 'a': 5, 'see': 1, 'the': 10, 'watery': 1, 'part': 1, 'of': 4, 'world.': 1, 'It': 1, 'is': 4, 'way': 1, 'have': 1, 'driving': 1, 'off': 1, 'spleen': 1, 'regulating': 1, 'circulation.': 1, 'Whenever': 1, 'find': 2, 'myself': 2, 'growing': 1, 'grim': 1, 'mouth;': 1, 'whenever': 3, 'it': 3, 'damp,': 1, 'drizzly': 1, 'November': 1, 'soul;': 1, 'involuntarily': 1, 'pausing': 1, 'before': 1, 'coffin': 1, 'warehouses,': 1, 'bringing': 1, 'up': 1, 'rear': 1, 'every': 1, 'funeral': 1, 'meet;': 1, 'especially': 1, 'hypos': 1, 'get': 2, 'such': 1, 'an': 1, 'upper': 1, 'hand': 1, 'me,': 1, 'that': 1, 'requires': 1, 'strong': 1, 'moral': 1, 'principle': 1, '

In [5]:
# 9.03 Looping and dictionaries

# ----- Copied from 9.02 -----
fname = "text/moby_dick.txt"
fhand = open(fname)
# Create a dictionary with all the words in the file as keys, and the number of occurences as values
counts = dict()
for line in fhand:
    words = line.split()
    for word in words:
        if word not in counts:
            counts[word] = 1
        else:
            counts[word] += 1
# ----- ----- ----- ----- ----- ----- ----- 

# Find how many times "nothing" appears in the text
for key in counts:          # traverses the key of the dictionary
    if key == "nothing":
        print("There are {} instances of \'nothing\'".format(counts[key]))

print("---")

# Create a new dictionary with only the words that appear twice or more.
d = dict()
for key, value in counts.items():
    if value > 1:
        d[key] = value
print(d)          # pretty printing


# Delete all the words that only appear once form 'counts'
# This will result in an error. You cannot change the dictionary while you are iterating through it.
for key, value in counts.items():
    if value == 1:
        del counts[key]


There are 2 instances of 'nothing'
---
{'me': 3, 'little': 2, 'or': 2, 'in': 4, 'my': 4, 'and': 7, 'nothing': 2, 'to': 5, 'I': 9, 'about': 2, 'a': 5, 'the': 10, 'of': 4, 'is': 4, 'find': 2, 'myself': 2, 'whenever': 3, 'it': 3, 'get': 2, 'time': 2, 'as': 2}


RuntimeError: dictionary changed size during iteration

In [80]:
# 9.04 Advanced Text Parsing

# Here's the code from the text book.
# You will be learning a better method for cleaning up text data later using Natual Language Processing.

# Because python treats words as tokens separated by spaces, "soft" and "soft!" would be considered different words.

import string
import pprint                                  # pretty prnting for debugging
pp = pprint.PrettyPrinter(indent=4)            # create a pretty printing object used for debugging

fname = "text/moby_dick.txt"
fhand = open(fname)

counts = dict()
for line in fhand:
    line = line.rstrip()   # returns a copy of the string 'line' with trailing chracters removed

    # the following line removes all the punctuation chracters from the line
    # for now, you don't need to know how exactly this works
    line = line.translate(line.maketrans('', '', string.punctuation))
    line = line.lower()    # return a copy of the string 'line' in all lower cases.
    words = line.split()   # create a list of words from the string 'line'.
    
    for word in words:
        if word not in counts:
            counts[word] = 1
        else:
            counts[word] += 1
pp.pprint(counts)
# print(counts['me,'])


{   'a': 5,
    'about': 2,
    'account': 1,
    'ago—never': 1,
    'all': 1,
    'almost': 1,
    'an': 1,
    'and': 7,
    'as': 2,
    'ball': 1,
    'before': 1,
    'bringing': 1,
    'but': 1,
    'call': 1,
    'can': 1,
    'cato': 1,
    'cherish': 1,
    'circulation': 1,
    'coffin': 1,
    'damp': 1,
    'degree': 1,
    'deliberately': 1,
    'driving': 1,
    'drizzly': 1,
    'especially': 1,
    'every': 1,
    'feelings': 1,
    'find': 2,
    'flourish': 1,
    'for': 1,
    'from': 1,
    'funeral': 1,
    'get': 2,
    'grim': 1,
    'growing': 1,
    'hand': 1,
    'hats': 1,
    'have': 1,
    'high': 1,
    'himself': 1,
    'his': 1,
    'how': 1,
    'hypos': 1,
    'i': 9,
    'if': 1,
    'in': 4,
    'interest': 1,
    'into': 1,
    'involuntarily': 1,
    'is': 4,
    'ishmael': 1,
    'it': 5,
    'knew': 1,
    'knocking': 1,
    'little': 2,
    'long': 1,
    'me': 5,
    'meet': 1,
    'men': 1,
    'methodically': 1,
    'mind': 1,
    'money': 1