# Dictionaries and Sets

## Modern dict Syntax

### dict Comprehensions

In [2]:
dial_codes = [ #1
    (880, 'Bangladesh'),
    (55, 'Brazil'),
    (86, 'China'),
    (91, 'India'),
    (62, 'Indonesia'),
    (81, 'Japan'),
    (234, 'Nigeria'),
    (92, 'Pakistan'),
    (7, 'Russia'),
    (1, 'United States'),
]

country_dial = {country: code for code, country in dial_codes} #2
print(country_dial)

{code: country.upper() for country, code in sorted(country_dial.items()) if code < 70} #3

{'Bangladesh': 880, 'Brazil': 55, 'China': 86, 'India': 91, 'Indonesia': 62, 'Japan': 81, 'Nigeria': 234, 'Pakistan': 92, 'Russia': 7, 'United States': 1}


{55: 'BRAZIL', 62: 'INDONESIA', 7: 'RUSSIA', 1: 'UNITED STATES'}

### Unpacking Mappings

In [6]:
# We can apply ** to more than one argument in a function call. This works when keys are all
# strings and unique across all arguments (because duplicate keywords arguments are forbidden)
def dump(**kwargs):
    return kwargs

print(dump(**{'x': 1}, y=2, **{'z': 3}))

# ** can be used inside a dict literal -- also multiple times
d = {'a': 0, **{'x': 1}, 'y': 2, **{'z': 3, 'x': 4}} # duplicated key 'x'
print(d)

{'x': 1, 'y': 2, 'z': 3}
{'a': 0, 'x': 4, 'y': 2, 'z': 3}


### Merging Mappings with |

In [12]:
# Python 3.9 supports using | and |= to merge mappings. This makes sense, since these are also
# the set union operators. The | operator creates a new mapping
d1 = {'a': 1, 'b': 3}
d2 = {'a': 2, 'b': 4, 'c': 6}
print(d1 | d2)
print(d1)
print(d2)

# to update an existing mapping in place, use |=
print('--------')
print(d1)
d1 |= d2
print(d1)
print(d2)

{'a': 2, 'b': 4, 'c': 6}
{'a': 1, 'b': 3}
{'a': 2, 'b': 4, 'c': 6}
--------
{'a': 1, 'b': 3}
{'a': 2, 'b': 4, 'c': 6}
{'a': 2, 'b': 4, 'c': 6}


## Pattern Matching with Mappings

In [1]:
def get_creators(record: dict) -> list:
    match record:
        case {'type': 'book', 'api': 2, 'authors': [names]}: #1
            return names
        case {'type': 'book', 'api': 1, 'author': name}: #2
            return [name]
        case {'type': 'book'}: #3
            raise ValueError(f"Invalid 'book' record: {record!r}")
        case {'type': 'movie', 'director': name}: #4
            return [name]
        case _:
            raise ValueError(f"Invalid record: {record!r}")

In [4]:
b1 = dict(api=1, author='Douglas Hofstadter', type='book', title='Godel, Escher, Bach')
get_creators(b1)

['Douglas Hofstadter']

In [10]:
from collections import OrderedDict
b2 = OrderedDict(
    api=2, type='book', title='Python in a Nutshell', authors='Martelli Ravenscroft Holden'.split()
)
get_creators(b2)

ValueError: Invalid 'book' record: {'api': 2, 'type': 'book', 'title': 'Python in a Nutshell', 'authors': ['Martelli', 'Ravenscroft', 'Holden']}

In [13]:
get_creators({'type': 'book', 'pages': 770})

ValueError: Invalid 'book' record: {'type': 'book', 'pages': 770}

In [14]:
get_creators('Spam, spam, spam')

ValueError: Invalid record: 'Spam, spam, spam'

In [15]:
food = dict(category='ice cream', flavor='vanilla', cost=199)
match food:
    case {'category': 'ice cream', **details}:
        print(f"Ice cream details: {details}")

Ice cream details: {'flavor': 'vanilla', 'cost': 199}


## Standard API of Mapping Types

### What is Hashable

In [1]:
tt = (1, 2, (30, 40))
hash(tt)

-3907003130834322577

In [2]:
tl = (1, 2, [30, 40])
hash(tl)

TypeError: unhashable type: 'list'

In [4]:
tf = (1, 2, frozenset([30, 40]))
hash(tf)

5149391500123939311

In [2]:
d = {"a": [1]}
d.setdefault("b", []).append(2)
d

{'a': [1], 'b': [2]}

### Automatic Handling of Missing Keys

In [5]:
# defaultdict: Another Take on Missing Keys
from collections import defaultdict

d = defaultdict(list)
print(d)
print(d["a"])
print(d)

defaultdict(<class 'list'>, {})
[]
defaultdict(<class 'list'>, {'a': []})


In [6]:
# The __missing__ Method
class StrKeyDict0(dict):
    def __missing__(self, key):
        if isinstance(key, str):
            raise KeyError(key)
        return self[str(key)]
    
    def get(self, key, default=None):
        try:
            return self[key]
        except KeyError:
            return default
    
    def __contains__(self, key):
        return key in self.keys() or str(key) in self.keys()

In [12]:
# Test for item retrieval using 'd[key]' notation
d = StrKeyDict0([('2', 'two'), ('4', 'four')])
print(d['2'])
print(d[4])
print(d[1])

two


KeyError: '5'

In [10]:
# Test for item retrieval using 'd.get(key)' notation
print(d.get('2'))
print(d.get(4))
print(d.get(1, 'N/A'))

two
four
N/A


In [11]:
# Test for the 'in' operator
print(2 in d)
print(1 in d)

True
False


### Variations of dict

In [2]:
# collections.ChainMap

from collections import ChainMap

d1 = dict(a=1, b=3)
d2 = dict(a=2, b=4, c=6)

chain = ChainMap(d1, d2)
print(chain['a'])
print(chain['c'])

chain['c'] = -1
print(d1)
print(d2)

1
6
{'a': 1, 'b': 3, 'c': -1}
{'a': 2, 'b': 4, 'c': 6}


In [None]:
import builtins

pylookup = ChainMap(locals(), globals(), vars(builtins))
print(pylookup)

In [7]:
# collections.Counter
from collections import Counter

ct = Counter('abracadabra')
print(ct)

ct.update('aaaaazzz')
print(ct)

print(ct.most_common(3))

Counter({'a': 5, 'b': 2, 'r': 2, 'c': 1, 'd': 1})
Counter({'a': 10, 'z': 3, 'b': 2, 'r': 2, 'c': 1, 'd': 1})
[('a', 10), ('z', 3), ('b', 2)]


In [None]:
# shelve.Shelf - avoid it :)
# https://docs.python.org/3/library/shelve.html

In [None]:
# Subclassing UserDict instead of dict
# The main reason why it's better to subclass UserDict rather than dict is that the built-in has
# some implementations shortcuts that end up forcing us to override methods that we cant just
# inherit from UserDict with no problems.
import collections

class StrKeyDict(collections.UserDict): #1
    def __missing__(self, key): #2
        if isinstance(key, str):
            raise KeyError(key)
        return self[str(key)]

    def __contains__(self, key):
        return str(key) in self.data #3

    def __setitem__(self, key, item): #4
        self.data[str(key)] = item