#### Modern dict Syntax

In [None]:
# dict comprehensions
# A dictcomp (dict comprehension) builds a dict instance by taking key:vale pairs from any iterable
dial_codes = [
    (800, 'Bangladesh'),
    (55, 'Brazil'),
    (86, 'China'),
    (91, 'India'),
    (62, 'Indonesia'),
    (81, 'Japan'),
    (234, 'Nigeria'),
    (92, 'Pakistan'),
    (7, 'Russia'),
    (1, 'United States')
]
# An iterable of key-value pairs like dial_codes can be passed directly to the dict constructor, but ...

In [2]:
# .... here we swap the pairs: country is the key, and code is the value
country_dial = {country: code for code, country in dial_codes}

In [3]:
country_dial

{'Bangladesh': 800,
 'Brazil': 55,
 'China': 86,
 'India': 91,
 'Indonesia': 62,
 'Japan': 81,
 'Nigeria': 234,
 'Pakistan': 92,
 'Russia': 7,
 'United States': 1}

In [4]:
# Sorting country_dial by name, reversing the pairs again, uppercasing values, and filtering items with code < 70
{code: country.upper()
    for country, code in sorted(country_dial.items())
    if code < 70}

{55: 'BRAZIL', 62: 'INDONESIA', 7: 'RUSSIA', 1: 'UNITED STATES'}

#### Unpacking Mappings

In [5]:
# Can apply ** to more than one argument in a function call.
# This works whgen keys are all strings and unique across all arguments
def dump(**kwargs):
    return kwargs

dump(**{'x':1}, y=2, **{'z':3})

{'x': 1, 'y': 2, 'z': 3}

In [7]:
# ** can also be used inside a dict literal - also multiple times.
{'a':0, **{'x':1}, 'y':2, **{'z':3, 'x':4}} 
# In this case, duplicate keys are allowed.
# Later occurrences overwrite previous ones. This syntax can also be used to merge mappings.

{'a': 0, 'x': 4, 'y': 2, 'z': 3}

##### Merge Mappings

In [10]:
# The | operator creates a new mapping:
d1 = {'a':1, 'b':3}
d2 = {'a':2, 'b':4, 'c':6}
d1|d2 # Usually mapping type will be the same as on the left operand

{'a': 2, 'b': 4, 'c': 6}

In [11]:
# To updaste an existing mapping in place, use |=
d1

{'a': 1, 'b': 3}

In [12]:
d1 |= d2
d1

{'a': 2, 'b': 4, 'c': 6}

##### Pattern Matching with Mappings

In [13]:
def get_creators(record: dict) -> list:
    match record:
        # Match any mapping with 'type' book, api 2 and an author's key mapped to a sequence.
        # Return the items in the sequence, as a new list
        case {'type':'book', 'api':2, 'authors':[*names]}:
            return names
        # Return the object inside a list
        case {'type':'book','api':1,'author':name}:
            return [name]
        case {'type':'book'}:
            raise ValueError(f"Invalid 'book' record: {record!r}")
        case {'type':'movie','director':name}:
            return [name]
        case _:
            raise ValueError(f'Invalid record: {record!r}')

In [14]:
# Some useful practices for handling semi-structured data such as JSON records:
# Include a field describing the kind of record
# Include a field identifying the schema version to allow for future evolution of public APIs
# Have case clauses to handle invalid records of a specific type, as well as a catch-all

b1 = dict(api=1, author='Douglas Hofstadter', type='book', title='Gödel, Escher, Bach')
b1

{'api': 1,
 'author': 'Douglas Hofstadter',
 'type': 'book',
 'title': 'Gödel, Escher, Bach'}

In [15]:
get_creators(b1)

['Douglas Hofstadter']

In [17]:
from collections import OrderedDict
b2 = OrderedDict(api=2, type='book', title='Python in a Nutshell', authors='Martelli Revenscroft Holden'.split())
b2

OrderedDict([('api', 2),
             ('type', 'book'),
             ('title', 'Python in a Nutshell'),
             ('authors', ['Martelli', 'Revenscroft', 'Holden'])])

In [18]:
get_creators(b2)

['Martelli', 'Revenscroft', 'Holden']

In [19]:
get_creators({'type':'book','pages':770})

ValueError: Invalid 'book' record: {'type': 'book', 'pages': 770}

In [20]:
get_creators('Spam, spam, spam')

ValueError: Invalid record: 'Spam, spam, spam'

In [21]:
# There is no need to use **extra to match extra key-value pairs, but if you want to capture them as a dict, you can prefix one variable with **
food = dict(category='ice cream', flavor='vanilla', cost=199)
match food:
    case {'category': 'ice cream', **details}:
        print(f'ice cream details: {details}')

ice cream details: {'flavor': 'vanilla', 'cost': 199}


##### Standard API of Mapping Types

In [23]:
# Main value of the ABCs is documenting and formalising the standard interfaces for mappings, and serving as criteria for isinstance tests
# in code that needs to support mappings ain a broad sense:
from collections import abc
my_dict = {}
isinstance(my_dict, abc.Mapping)

True

In [24]:
isinstance(my_dict, abc.MutableMapping)

True

In [25]:
# A tuple is hashable only if all its items are hashable
tt = (1,2,(30,40))
hash(tt)

-3907003130834322577

In [26]:
tl = (1,2,[30,40])
hash(tl)

TypeError: unhashable type: 'list'

In [27]:
# a frozenset is always hashable, because every element it contains must be hashable
tf = (1,2,frozenset([30,40]))
hash(tf)

5149391500123939311

In [8]:
""" re.finditer() method in Python is used to search for all matches of a pattern in a string and return them as an iterator."""
import re

text = "GFG, O, B, GFG, O"
pattern = "GFG"

# Find all occurrences of "apple" in the string
matches = re.finditer(pattern, text)

for match in matches:
    print(match.start(), match.group())

0 GFG
11 GFG


In [9]:
"""The re.compile() method in Python is used to compile as a regular expression pattern into a regex object.
Compiling a pattern makes it more efficient when we need to use the same pattern several times, as it avoids re.compiling the pattern each time."""
import re

# Compile the pattern to match a word 'geek'
pattern = re.compile(r"geek")

# Use the compiled pattern to search in a string
result = pattern.search("I am a geek")

if result:
    print(result.group())

geek


In [11]:
"""When we need to use the same pattern multiple times, compiling the pattern beforehand makes our code more efficient"""
import re

# Compile the pattern for matching words
pattern = re.compile(r"\b\w+\b")

# Use the compiled pattern to find all words in the string
result = pattern.findall("This is a simple sentence.")

print(result)
"""
Literal meaning:
\w+ → one or more “word characters”: letters, digits, underscore.
" → a double quote character (literal)."""

['This', 'is', 'a', 'simple', 'sentence']


'\nLiteral meaning:\n\\w+ → one or more “word characters”: letters, digits, underscore.\n" → a double quote character (literal).'

##### Inserting or Updating Mutable Values

In [7]:
"""Build an index mapping word -> list of occurrences"""
import re
import sys
WORD_RE = re.compile(r'\w+')
index = {}
with open('zen.txt', encoding='utf-8') as fp:
    for line_no, line in enumerate(fp,1):
        for match in WORD_RE.finditer(line):
            word = match.group()
            column_no = match.start() + 1
            location = (line_no, column_no)
            # Get the list of occurrences for word, or [] if not found
            occurrences = index.get(word, [])
            #Append new location to occurrences
            occurrences.append(location)
            # Put changed occurrences into index dict; this entails a second search through the index
            index[word] = occurrences
            # In the key=argument of sorted, I am not calling str.upper, just passing a reference to that method so the
            # sorted function can use it to normalize the words for sorting.

# display in alphabetical order
for word in sorted(index, key=str.upper):
    print(word, index[word])

a [(19, 48), (20, 53)]
Although [(11, 1), (16, 1), (18, 1)]
ambiguity [(14, 16)]
and [(15, 23)]
are [(21, 12)]
aren [(10, 15)]
at [(16, 38)]
bad [(19, 50)]
be [(15, 14), (16, 27), (20, 50)]
beats [(11, 23)]
Beautiful [(3, 1)]
better [(3, 14), (4, 13), (5, 11), (6, 12), (7, 9), (8, 11), (17, 8), (18, 25)]
break [(10, 40)]
by [(1, 20)]
cases [(10, 9)]
complex [(5, 23)]
Complex [(6, 1)]
complicated [(6, 24)]
counts [(9, 13)]
dense [(8, 23)]
do [(15, 64), (21, 48)]
Dutch [(16, 61)]
easy [(20, 26)]
enough [(10, 30)]
Errors [(12, 1)]
explain [(19, 34), (20, 34)]
Explicit [(4, 1)]
explicitly [(13, 8)]
face [(14, 8)]
first [(16, 41)]
Flat [(7, 1)]
good [(20, 55)]
great [(21, 28)]
guess [(14, 52)]
hard [(19, 26)]
honking [(21, 20)]
idea [(19, 54), (20, 60), (21, 34)]
If [(19, 1), (20, 1)]
implementation [(19, 8), (20, 8)]
implicit [(4, 25)]
In [(14, 1)]
is [(3, 11), (4, 10), (5, 8), (6, 9), (7, 6), (8, 8), (17, 5), (18, 16), (19, 23), (20, 23)]
it [(15, 67), (19, 43), (20, 43)]
let [(21, 42)]
m

In [13]:
# The occurrences can be replaces by a single line using dict.setdefault
"""Build an index mapping word -> list of occurrences"""

import re
import sys

WORD_RE =re.compile(r'\w+')
index = {}
with open('zen.txt', encoding='utf-8') as fp:
    for line_no, line in enumerate(fp,1):
        for match in WORD_RE.finditer(line):
            word = match.group()
            column_no = match.start() +1
            location = (line_no, column_no)
            # Get the list of occurrences for word, or set it to [] if not found;
            # setdefault returns the value, so it can be updated without requiring a second search.
            index.setdefault(word, []).append(location)

# display in alphabetical order
for word in sorted(index, key=str.upper):
    print(word, index[word])

a [(19, 48), (20, 53)]
Although [(11, 1), (16, 1), (18, 1)]
ambiguity [(14, 16)]
and [(15, 23)]
are [(21, 12)]
aren [(10, 15)]
at [(16, 38)]
bad [(19, 50)]
be [(15, 14), (16, 27), (20, 50)]
beats [(11, 23)]
Beautiful [(3, 1)]
better [(3, 14), (4, 13), (5, 11), (6, 12), (7, 9), (8, 11), (17, 8), (18, 25)]
break [(10, 40)]
by [(1, 20)]
cases [(10, 9)]
complex [(5, 23)]
Complex [(6, 1)]
complicated [(6, 24)]
counts [(9, 13)]
dense [(8, 23)]
do [(15, 64), (21, 48)]
Dutch [(16, 61)]
easy [(20, 26)]
enough [(10, 30)]
Errors [(12, 1)]
explain [(19, 34), (20, 34)]
Explicit [(4, 1)]
explicitly [(13, 8)]
face [(14, 8)]
first [(16, 41)]
Flat [(7, 1)]
good [(20, 55)]
great [(21, 28)]
guess [(14, 52)]
hard [(19, 26)]
honking [(21, 20)]
idea [(19, 54), (20, 60), (21, 34)]
If [(19, 1), (20, 1)]
implementation [(19, 8), (20, 8)]
implicit [(4, 25)]
In [(14, 1)]
is [(3, 11), (4, 10), (5, 8), (6, 9), (7, 6), (8, 8), (17, 5), (18, 16), (19, 23), (20, 23)]
it [(15, 67), (19, 43), (20, 43)]
let [(21, 42)]
m

##### Automatic Handling of Missing Keys

In [None]:
""" 
defaultdict()
Does the following:
1. Calls list() to create a new list.
2. Inserts the list into dd using 'new-key' as key.
3. Returns a reference to that list. 
The callable that produces the default values is held in an instance attribute named default_factory.
"""

" \ndefaultdict()\nDoes the following:\n1. Calls list() to create a new list.\n2. Inserts the list into dd using 'new-key' as key.\n3. Returns a reference to that list. \n"

In [None]:
import collections
import re
import sys 

WORD_RE = re.compile(r'\w+')
index = collections.defaultdict(list) # Create a defaultdict with the list constructor as default_factory
with open('zen.txt', encoding='utf-8') as fp:
    for line_no, line in enumerate(fp,1):
        for match in WORD_RE.finditer(line):
            word = match.group()
            column_no = match.start() + 1
            location = (line_no, column_no)
            # If the word is not initially in the index, the default_factory is called to produce the missing value,
            # which in this case is an empty list that is then assigned to index[word] and returned, so the .append(location) operation always succeeds.
            index[word].append(location)

In [16]:
# The __missing__ Method
# When searching for a nonstring key, strkeydict0 converts it to str when it is not found.
# Tests for item retrieval using 'd[key] notation

class StrKeyDict0(dict): # StrKeyDict0 inherits from dict
    def __missing__(self, key):
        if isinstance(key, str): # Check whether key is already a str. If it is, and it's missing, raise Key error
            raise KeyError(key)
        return self[str(key)] # Build str from key and look it up
    
    def get(self, key, default=None):
        try:
            return self[key] 
        # the get method delegates to __getitem__ by using the self[key] notation; 
        # that gives the opportunity for our __missing__ to act
        except KeyError:
            return default # If a KeyError was raised, __missing__ already failed, so we return the default
    
    def __contains__(self, key):
        # search for unmodified key ( the instance may contain no-str keys), then for a str built from the key
        return key in self.keys() or str(key) in self.keys()
        

In [17]:
# collections.ChainMap
# A ChainMap instance holds a list of mappings that can be search as one.
from collections import ChainMap
d1 = dict(a=1, b=3)
d2 = dict(a=2, b=4, c=6)
chain = ChainMap(d1,d2)
chain['a']

1

In [18]:
chain['c']

6

In [19]:
# Updates or insertions to a ChainMap only affect the first input mapping.
chain['c'] = -1
d1

{'a': 1, 'b': 3, 'c': -1}

In [20]:
d2

{'a': 2, 'b': 4, 'c': 6}

In [23]:
from collections import Counter
ct = Counter('abracadabra')
ct

Counter({'a': 5, 'b': 2, 'r': 2, 'c': 1, 'd': 1})

In [24]:
ct.update('aaaaazuuiui')

In [25]:
ct

Counter({'a': 10, 'u': 3, 'b': 2, 'r': 2, 'i': 2, 'c': 1, 'd': 1, 'z': 1})

In [26]:
ct.most_common(3)

[('a', 10), ('u', 3), ('b', 2)]

In [None]:
# Subclassing UserDict instead of Dict
import collections
class StrKeyDict(collections.UserDict): # StrKeyDict extends UserDict
    def __missing__(self, key): # __missing__ is exactly as above
        if isinstance(key, str):
            raise KeyError(key)
        return self[str(key)]
    
    def __contains__(self, key):
        # __contains__ is simpler: we can assume all stored keys are str, and we can check on self.data
        # instead of involing self.keys() as we did in StrKeyDict0
        return str(key) in self.data
    
    def __setitem__(self, key, item):
        # __setitem__ converts any key to str. This method is easier to overwrite when we can delegate to the self.data attribute
        self.ddata[str(key)] = item

In [27]:
# MappingProxyType builds a read-only mappingproxy instance from a dict
from types import MappingProxyType 
d = {1:'A'}
d_proxy = MappingProxyType(d)
d_proxy

mappingproxy({1: 'A'})

In [28]:
d_proxy[1] # Items in d can be seen through d_proxy

'A'

In [29]:
d_proxy[2]='x' # changes cannot be made through d_proxy

TypeError: 'mappingproxy' object does not support item assignment

In [32]:
d[2]='B'
d_proxy # d_proxy is dynamic and any change in d is reflected 

mappingproxy({1: 'A', 2: 'B'})

In [33]:
d_proxy[2]

'B'

In [35]:
# Dictionary views
# The .values() method returns a view of the values in a dict
d = dict(a=11, b=20, c=30)
# the repr of a view object shows its contents
values = d.values()
values

dict_values([11, 20, 30])

In [37]:
# we can query the len of a view
len(values)

3

In [38]:
# Views are iterable, so it's easy to create lists from them
list(values)

[11, 20, 30]

In [39]:
# Views implement __reversed__, returning a custom iterator.
reversed(values)

<dict_reversevalueiterator at 0x112cb6840>

In [40]:
# We can't use [] to get individual items from a view
values[0]

TypeError: 'dict_values' object is not subscriptable

In [41]:
# A view object is a dynamic proxy. If the source dict is updated, you can immediately see the changes through ane existing view
d['z'] = 99
d

{'a': 11, 'b': 20, 'c': 30, 'z': 99}

In [42]:
values

dict_values([11, 20, 30, 99])

#### Sets

In [43]:
# Set is a collection of unique objects. A basic use case is removing duplication:
l = ['spam','spam','spam','eggs','ham','ham']
set(l)

{'eggs', 'ham', 'spam'}

In [44]:
# removing duplicates, but also preserving the order of the first occurrence of each item
dict.fromkeys(l).keys()

dict_keys(['spam', 'eggs', 'ham'])

In [45]:
list(dict.fromkeys(l).keys())

['spam', 'eggs', 'ham']

In [46]:
# The standard string representation of sets always uses the {...} notation, except for empty sets
s = {1}
type(s)


set

In [47]:
s

{1}

In [49]:
s.pop()

1

In [50]:
s

set()

In [52]:
# There is no special syntax to represent fozenset literals - they must be created by calling the constructor.
frozenset(range(10))

frozenset({0, 1, 2, 3, 4, 5, 6, 7, 8, 9})

In [53]:
# Set comprehensions
# Import name function from unicode character to obtain character names
from unicodedata import name 
# Build set characters with codes from 32 to 255 that have the word 'SIGN' in their names
{chr(i) for i in range(32, 256) if 'SIGN' in name(chr(i),'')}

{'#',
 '$',
 '%',
 '+',
 '<',
 '=',
 '>',
 '¢',
 '£',
 '¤',
 '¥',
 '§',
 '©',
 '¬',
 '®',
 '°',
 '±',
 'µ',
 '¶',
 '×',
 '÷'}

In [54]:
# intersection operand
d1 = dict(a=1, b=2, c=3, d=4)
d2 = dict(b=20, d=40, e=50)
d1.keys() & d2.keys()

{'b', 'd'}

In [55]:
# The return value of & is a set.
# The set operators in dictionary views are compatible with set instances 
s = {'a', 'e', 'i'}
d1.keys() & s

{'a'}

In [56]:
d1.keys() | s

{'a', 'b', 'c', 'd', 'e', 'i'}