dic comprehension

In [4]:
DIAL_CODES = [
        (86, 'China'),
        (91, 'India'),
        (1, 'United States'),
        (62, 'Indonesia'),
        (55, 'Brazil'),
        (92, 'Pakistan'),
        (880, 'Bangladesh'),
        (234, 'Nigeria'),
        (7, 'Russia'),
        (81, 'Japan'),
    ]
dict_dial_codes = {country: code for code, country in DIAL_CODES}

dict_dial_codes

{'China': 86,
 'India': 91,
 'United States': 1,
 'Indonesia': 62,
 'Brazil': 55,
 'Pakistan': 92,
 'Bangladesh': 880,
 'Nigeria': 234,
 'Russia': 7,
 'Japan': 81}

In [5]:
{country: code for code, country in DIAL_CODES if code > 50}

{'China': 86,
 'India': 91,
 'Indonesia': 62,
 'Brazil': 55,
 'Pakistan': 92,
 'Bangladesh': 880,
 'Nigeria': 234,
 'Japan': 81}

unpacking mappings

In [6]:
def dump(**kwargs):
    print(kwargs)
dump(a=1, b=2, c=3, **{'x':10}, **dict_dial_codes)

{'a': 1, 'b': 2, 'c': 3, 'x': 10, 'China': 86, 'India': 91, 'United States': 1, 'Indonesia': 62, 'Brazil': 55, 'Pakistan': 92, 'Bangladesh': 880, 'Nigeria': 234, 'Russia': 7, 'Japan': 81}


In [None]:
# dump(**{1: 20}) keuwrds must be strings

TypeError: keywords must be strings

merge dict

In [8]:
d1 = {'a': 1, 'b': 2}
d2 = {'a': 2,'c': 3, 'd': 4}
d3 = {**d1, **d2}
d3

{'a': 2, 'b': 2, 'c': 3, 'd': 4}

In [9]:
d1 | d2

{'a': 2, 'b': 2, 'c': 3, 'd': 4}

pattern matching

In [12]:
def get_creators(record: dict) -> list:
    match record:
        case {'type': 'book', 'api': 2, 'authors': [*authors]}:
            return authors
        case {'type': 'book', 'api': 1, 'author': author}:
            return [author]
        case {'type': 'book', 'api': 1}:
            raise ValueError('Invalid API version')
        case {'type': 'movie', 'director': name}:
            return [name]
        case _:
            raise ValueError('Invalid record type')
        

In [13]:
b1 = dict(api=1, author='Douglas Hofstadter', type='book', title='Gödel, Escher, Bach')
get_creators(b1)

['Douglas Hofstadter']

In [14]:
from collections import OrderedDict
b2 = OrderedDict(api=2, type='book',
         title='Python in a Nutshell',
         authors='Martelli Ravenscroft Holden'.split())
get_creators(b2)

['Martelli', 'Ravenscroft', 'Holden']

In [15]:
food = dict(category='ice cream', flavor='vanilla', cost=199)
match food:
    case {'category': 'ice cream', **details}:
        print(f'Ice cream details: {details}')

Ice cream details: {'flavor': 'vanilla', 'cost': 199}


In [16]:
food = dict(category='ice cream', flavor='vanilla')
match food:
    case {'category': 'ice cream', **details}:
        print(f'Ice cream details: {details}')

Ice cream details: {'flavor': 'vanilla'}


In [1]:
import sys
print(sys.version)

3.8.0 (v3.8.0:fa919fdf25, Oct 14 2019, 10:23:27) 
[Clang 6.0 (clang-600.0.57)]


In [2]:
from collections import abc
my_dict = {}
isinstance(my_dict, abc.Mapping)

True

In [3]:
isinstance(my_dict, abc.MutableMapping)

True

What Is Hashable


str, bytes, frozenset

tuple is hashable only if all its items are hashable.

In [17]:
tt = (1, 2, (30, 40))
hash(tt)

-3907003130834322577

In [18]:
tl = (1, 2, [30, 40])
try:
    hash(tl)
except TypeError as e:
    print(e)

unhashable type: 'list'


In [19]:
tf = (1, 2, frozenset([30, 40]))
hash(tf)

5149391500123939311

Relationship Between id(), __eq__(), and __hash__()
In Python, these three functions/methods play crucial roles in object identity, equality, and hashing:

id()
Built-in function that returns a unique integer identifier for an object
Represents the object's memory address (in CPython)
Never changes during an object's lifetime
Used for identity comparison with is operator
__eq__()
Special method that implements the equality operator (==)
Determines if two objects are considered equal in value
Can be customized for user-defined classes
Default implementation often compares by identity (is)
__hash__()
Special method that returns an integer hash value for an object
Used by dictionaries and sets for efficient lookups
Key requirement: Objects that compare equal must have identical hash values
Their Relationship
Hash-Equality Contract: If a.__eq__(b) returns True, then hash(a) == hash(b) must be True (but not necessarily vice versa)

Immutability Connection: Hashable objects typically have unchangeable values, as changing a value after insertion in a dictionary/set would break the hash-equality contract

Default Implementation: For user-defined classes:

Default __eq__() compares by identity (id())
Default __hash__() uses id() to generate hash values
Customization Requirements: If you override __eq__(), you should typically also override __hash__() to maintain the hash-equality contract (or set __hash__ = None to make the object unhashable)

# Mastering Python Comparisons and Conditions

## Identity vs Equality: `is` vs `==`

### `is` Operator
- Tests for **identity** (same object in memory)
- Compares using `id()` function
- Perfect for singleton objects like `None`



In [None]:
a = [1, 2, 3]
b = a        # b references the same list as a
a is b       # True - they are the same object

c = [1, 2, 3]
a is c       # False - different objects with same values



### `==` Operator
- Tests for **equality** (same value)
- Uses the object's `__eq__()` method
- Compares contents rather than identity



In [None]:
a = [1, 2, 3]
c = [1, 2, 3]
a == c       # True - they have equal values



## Working with `None`

- Always use `is` with `None`, not `==`
- `None` is a singleton object (only one instance exists)



In [None]:
# Good practice
if value is None:
    # do something

# Bad practice
if value == None:
    # do something



## Truth Value Testing

Objects are considered "truthy" or "falsy" in conditional contexts:

### Falsy Values
- `None`
- `False`
- `0` (or `0.0`, `0j`)
- Empty sequences: `""`, `[]`, `()`, `{}`
- Objects where `__bool__()` returns `False` or `__len__()` returns `0`



In [None]:
# These all evaluate to False
if not None:       # True
if not False:      # True
if not 0:          # True
if not "":         # True
if not []:         # True



### Truthy Values
- Everything else is considered True

## Comparison Chaining

Python allows chaining multiple comparisons:



In [None]:
# This:
if a < b < c:
    print("b is between a and c")

# Is equivalent to:
if a < b and b < c:
    print("b is between a and c")



## Common Pitfalls

### Mutable Default Arguments


In [None]:
# Problematic:
def add_item(item, lst=[]):  # lst is created once at definition
    lst.append(item)
    return lst

# Better:
def add_item(item, lst=None):
    if lst is None:
        lst = []
    lst.append(item)
    return lst



### Boolean Trap


In [None]:
# Unclear:
process_data(False)

# Better:
process_data(verbose=False)



### Variable Truth Testing


In [None]:
# Verbose:
if x == True:
    # do something

# Better:
if x:
    # do something



Use `is not None` when you specifically want to check if a value exists (but could be falsy like `0` or `""`).

dict.Update

# Duck Typing in Python's Dictionary Update Method

"Duck typing" refers to the Python philosophy of "if it walks like a duck and quacks like a duck, then it's a duck" - meaning we care about an object's behavior rather than its type.

Here's an example demonstrating how `dict.update()` uses duck typing:



In [None]:
# Example of dict.update() with duck typing

# 1. Update from another dictionary (has keys() method)
d = {'a': 1, 'b': 2}
other_dict = {'b': 3, 'c': 4}
d.update(other_dict)
print("After updating with another dict:", d)  # {'a': 1, 'b': 3, 'c': 4}

# 2. Update from a list of tuples (no keys() method, but iterable of pairs)
d = {'a': 1, 'b': 2}
pairs_list = [('b', 30), ('d', 40)]
d.update(pairs_list)
print("After updating with tuple list:", d)  # {'a': 1, 'b': 30, 'd': 40}

# 3. Update from a generator expression (no keys() method, but iterable of pairs)
d = {'a': 1, 'b': 2}
d.update((k.upper(), v*10) for k, v in d.items())
print("After updating with generator:", d)  # {'a': 1, 'b': 2, 'A': 10, 'B': 20}

# 4. Custom class that acts like a mapping (has keys() method)
class CustomMapping:
    def keys(self):
        return ['x', 'y']
    
    def __getitem__(self, key):
        return len(key) * 100
        
d = {'a': 1, 'b': 2}
d.update(CustomMapping())
print("After updating with custom mapping:", d)  # {'a': 1, 'b': 2, 'x': 100, 'y': 100}



The key insight here is that `d.update(m)` doesn't require `m` to be a specific type. It only requires that either:

1. `m` has a `keys()` method, and supports `m[key]` (like a mapping)
2. OR `m` can be iterated over, producing key-value pairs

This flexibility makes it possible to initialize dictionaries from various sources without needing explicit type conversion.

is -> id()
== -> __eq__()

In [20]:
a = dict(one=1, two=2, three=3)
b = {'three': 3, 'two': 2, 'one': 1}
c = dict([('two', 2), ('one', 1), ('three', 3)])
d = dict(zip(['one', 'two', 'three'], [1, 2, 3]))
e = dict({'three': 3, 'one': 1, 'two': 2})
a == b == c == d == e

True

In [21]:
a

{'one': 1, 'two': 2, 'three': 3}

In [22]:
list(a.keys())

['one', 'two', 'three']

In [23]:
c

{'two': 2, 'one': 1, 'three': 3}

In [24]:
c.popitem()

('three', 3)

In [25]:
c

{'two': 2, 'one': 1}

In [26]:
a == c

False

In [27]:
a is b

False

In [15]:
dial_codes = [                                                  # <1>
    (880, 'Bangladesh'),
    (55,  'Brazil'),
    (86,  'China'),
    (91,  'India'),
    (62,  'Indonesia'),
    (81,  'Japan'),
    (234, 'Nigeria'),
    (92,  'Pakistan'),
    (7,   'Russia'),
    (1,   'United States'),
]

In [17]:
country_dial = {country: code for code, country in dial_codes}
country_dial

{'Bangladesh': 880,
 'Brazil': 55,
 'China': 86,
 'India': 91,
 'Indonesia': 62,
 'Japan': 81,
 'Nigeria': 234,
 'Pakistan': 92,
 'Russia': 7,
 'United States': 1}

In [18]:
{code: country.upper() 
    for country, code in sorted(country_dial.items())
    if code < 70}

{55: 'BRAZIL', 62: 'INDONESIA', 7: 'RUSSIA', 1: 'UNITED STATES'}

In [20]:
from random import shuffle
shuffle(dial_codes)
country_dial = {country: code for code, country in dial_codes}
country_dial

{'Pakistan': 92,
 'Indonesia': 62,
 'Russia': 7,
 'Japan': 81,
 'United States': 1,
 'China': 86,
 'Brazil': 55,
 'Bangladesh': 880,
 'Nigeria': 234,
 'India': 91}

dict.setdefault

In [28]:
a = {'one': 1, 'two': 2, 'three': 3}
occur = a.get('four', [])  # search first time
occur.append(4)  
a['four'] = occur  # search second time
a

{'one': 1, 'two': 2, 'three': 3, 'four': [4]}

In [30]:
a = {'one': 1, 'two': 2, 'three': 3}
a.setdefault('four', []).append(4)  # search first time
a

{'one': 1, 'two': 2, 'three': 3, 'four': [4]}

In [31]:
a = {'one': 1, 'two': 2, 'three': 3}
if 'four' not in a: # search first time
    a['four'] = [] # search second time
a['four'].append(4)  # search third time
a

{'one': 1, 'two': 2, 'three': 3, 'four': [4]}

Automatic Handling of Missing Keys

In [None]:
import collections
import re
import sys

WORD_RE = re.compile(r'\w+')

index = collections.defaultdict(list) # create a new list for each new key. list is callable to produce a default value
with open(sys.argv[1], encoding='utf-8') as fp:
    for line_no, line in enumerate(fp, 1):
        for match in WORD_RE.finditer(line):
            word = match.group()
            column_no = match.start() + 1
            location = (line_no, column_no)
            index[word].append(location) # no error

# display in alphabetical order
for word in sorted(index, key=str.upper):
    print(word, index[word])

__missing__

Underlying the way mappings deal with missing keys is the aptly named __missing__ method. This method is not defined in the base dict class, but dict is aware of it: if you subclass dict and provide a __missing__ method, the standard dict.__getitem__ will call it whenever a key is not found, instead of raising KeyError.

In [None]:
class StrKeyDict0(dict):
    def __missing__(self, key):
        if isinstance(key, str):
            raise KeyError(key)
        return self[str(key)]

    def __contains__(self, key):
        return key in self.keys() or str(key) in self.keys()

    def get(self, key, default=None):
        try:
            return self[key]
        except KeyError:
            return default
        
        
d = StrKeyDict0([('2', 'two'), ('4', 'four')])

In [40]:
d[2]

'two'

In [41]:
d['2']

'two'

In [42]:
d[1]

KeyError: '1'

In [43]:
1 in d

False

collections.OrderedDict

In [45]:
a=OrderedDict([('one', 1), ('two', 2), ('three', 3)])
b=OrderedDict([('three', 3), ('two', 2), ('one', 1)])


In [46]:
print(a)
print(b)
print(a == b)  # False, order matters
print(a is b)  # False, different objects

OrderedDict({'one': 1, 'two': 2, 'three': 3})
OrderedDict({'three': 3, 'two': 2, 'one': 1})
False
False


# Mastering OrderedDict in Python

`OrderedDict` is a dictionary subclass that remembers the insertion order of keys. Here are some comprehensive examples to help you master it:

## 1. Basic Usage and Comparison



In [47]:
from collections import OrderedDict

# Creating OrderedDicts
od1 = OrderedDict([('apple', 1), ('banana', 2), ('cherry', 3)])
od2 = OrderedDict([('banana', 2), ('apple', 1), ('cherry', 3)])

# Regular dictionaries (in Python 3.7+, regular dicts also preserve order, but comparison is different)
d1 = {'apple': 1, 'banana': 2, 'cherry': 3}
d2 = {'banana': 2, 'apple': 1, 'cherry': 3}

# Demonstrating order matters for equality in OrderedDict but not regular dict
print("Regular dict comparison:", d1 == d2)  # True, order doesn't matter
print("OrderedDict comparison:", od1 == od2)  # False, order matters!

Regular dict comparison: True
OrderedDict comparison: False




## 2. Maintaining Order in Operations



In [48]:
from collections import OrderedDict

# Create an ordered dictionary
menu = OrderedDict([
    ('appetizer', 'salad'),
    ('main', 'steak'),
    ('dessert', 'ice cream'),
    ('drink', 'wine')
])

# Items are returned in insertion order
print("Menu items in order:")
for course, item in menu.items():
    print(f"{course}: {item}")

# Updating preserves existing order for existing keys
menu.update([('appetizer', 'soup'), ('side', 'fries')])
print("\nAfter update, still in order:")
for course, item in menu.items():
    print(f"{course}: {item}")

# Moving an item to the end
menu.move_to_end('appetizer')
print("\nAfter moving appetizer to end:")
for course, item in menu.items():
    print(f"{course}: {item}")

# Moving an item to the beginning (with last=False)
menu.move_to_end('dessert', last=False)
print("\nAfter moving dessert to beginning:")
for course, item in menu.items():
    print(f"{course}: {item}")

Menu items in order:
appetizer: salad
main: steak
dessert: ice cream
drink: wine

After update, still in order:
appetizer: soup
main: steak
dessert: ice cream
drink: wine
side: fries

After moving appetizer to end:
main: steak
dessert: ice cream
drink: wine
side: fries
appetizer: soup

After moving dessert to beginning:
dessert: ice cream
main: steak
drink: wine
side: fries
appetizer: soup




## 3. LRU (Least Recently Used) Cache Implementation



In [49]:
from collections import OrderedDict

class LRUCache:
    def __init__(self, capacity):
        self.cache = OrderedDict()
        self.capacity = capacity
        
    def get(self, key):
        if key not in self.cache:
            return -1
        # Move the accessed item to the end (most recently used)
        self.cache.move_to_end(key)
        return self.cache[key]
    
    def put(self, key, value):
        # If key exists, update and move to the end
        if key in self.cache:
            self.cache[key] = value
            self.cache.move_to_end(key)
            return
        
        # If at capacity, remove least recently used item (first item)
        if len(self.cache) >= self.capacity:
            self.cache.popitem(last=False)
            
        # Add new item at the end (most recently used)
        self.cache[key] = value

# Example usage
cache = LRUCache(3)
cache.put('a', 1)
cache.put('b', 2)
cache.put('c', 3)
print("Cache after initial puts:", list(cache.cache.items()))

# Access 'a', moving it to the end
cache.get('a')
print("Cache after getting 'a':", list(cache.cache.items()))

# Add 'd', which will evict the least recently used (now 'b')
cache.put('d', 4)
print("Cache after adding 'd':", list(cache.cache.items()))

Cache after initial puts: [('a', 1), ('b', 2), ('c', 3)]
Cache after getting 'a': [('b', 2), ('c', 3), ('a', 1)]
Cache after adding 'd': [('c', 3), ('a', 1), ('d', 4)]




## 4. Preserving Order in Dictionary Operations



In [50]:
from collections import OrderedDict

# Track the order of winners in a race
race_results = OrderedDict([
    ('Alice', 'Gold'), 
    ('Bob', 'Silver'), 
    ('Charlie', 'Bronze')
])

print("Original results:")
for position, (name, medal) in enumerate(race_results.items(), 1):
    print(f"{position}. {name}: {medal}")

# Remove and re-add a participant - in regular dict this would break order
medal = race_results.pop('Bob')
race_results['Bob'] = medal

print("\nAfter moving Bob to the end:")
for position, (name, medal) in enumerate(race_results.items(), 1):
    print(f"{position}. {name}: {medal}")

# Dictionary comprehension preserves order in OrderedDict
filtered_results = OrderedDict((name, medal) for name, medal in race_results.items() 
                               if name != 'Charlie')
print("\nWithout Charlie:")
for position, (name, medal) in enumerate(filtered_results.items(), 1):
    print(f"{position}. {name}: {medal}")

Original results:
1. Alice: Gold
2. Bob: Silver
3. Charlie: Bronze

After moving Bob to the end:
1. Alice: Gold
2. Charlie: Bronze
3. Bob: Silver

Without Charlie:
1. Alice: Gold
2. Bob: Silver




## 5. Custom Sort with OrderedDict



In [51]:
from collections import OrderedDict

# Initial data
scores = {'Alice': 87, 'Bob': 92, 'Charlie': 75, 'David': 92, 'Eve': 88}

# Sort by score (descending), then by name (alphabetically) for ties
sorted_scores = OrderedDict(
    sorted(
        scores.items(),
        key=lambda x: (-x[1], x[0])  # Sort by -score (for desc), then name
    )
)

print("Scores sorted by value (desc) then key (asc):")
for rank, (name, score) in enumerate(sorted_scores.items(), 1):
    print(f"{rank}. {name}: {score}")

Scores sorted by value (desc) then key (asc):
1. Bob: 92
2. David: 92
3. Eve: 88
4. Alice: 87
5. Charlie: 75




## Key Benefits and Use Cases

1. **When order matters**: For configurations, CSV headers, or any data where order is important
2. **Predictable iteration**: Guaranteed order of items when iterating
3. **LRU Caches**: Perfect for implementing efficient caching mechanisms
4. **Preserving ordering semantics**: When merging multiple data sources where order is significant
5. **Backward compatibility**: For code that needs to run across Python versions

Note that since Python 3.7, regular dictionaries also maintain insertion order, but `OrderedDict` still has specific advantages like the `move_to_end()` method and order-sensitive equality operations.

collections.ChainMap

In [52]:
from collections import ChainMap


d1= dict(a=1, b=2)
d2= dict(a=2, b=2, c=3)
chain = ChainMap(d1, d2)
chain['a']  # 1, from d1
chain['c']  # 3, from d1

3

In [53]:
chain['c'] = -1
print(d1)
print(d2)  # d2 is not affected

{'a': 1, 'b': 2, 'c': -1}
{'a': 2, 'b': 2, 'c': 3}


# Mastering ChainMap in Python

`ChainMap` from the `collections` module is a powerful tool for managing multiple dictionaries as a single mapping. Here's a comprehensive example demonstrating its capabilities:



In [None]:
from collections import ChainMap
import os

# Practical Example: Command-line Arguments with Defaults and Environment Variables
# Let's simulate a configuration system with multiple layers of precedence:
# 1. Command-line arguments (highest priority)
# 2. Environment variables (middle priority)
# 3. Default settings (lowest priority)

# Default configurations
defaults = {
    'debug': False,
    'port': 8000,
    'host': 'localhost',
    'database': 'test_db',
    'timeout': 30
}

# Environment variables (with ENV_ prefix)
# In a real app, you'd use os.environ, but we'll simulate it here
env_vars = {
    'port': int(os.environ.get('ENV_PORT', '0')) or None,
    'host': os.environ.get('ENV_HOST'),
    'database': os.environ.get('ENV_DATABASE'),
    'timeout': int(os.environ.get('ENV_TIMEOUT', '0')) or None,
    'log_level': 'INFO'  # An env-specific setting not in defaults
}
# Remove None values
env_vars = {k: v for k, v in env_vars.items() if v is not None}

# Command line arguments (highest priority)
# In a real app, you'd parse these from sys.argv
cmd_args = {
    'port': 9000,
    'debug': True
}

# Create the ChainMap with proper precedence
config = ChainMap(cmd_args, env_vars, defaults)

# Using the configuration
print(f"Server Configuration:")
print(f"Debug mode: {config['debug']}")          # From cmd_args
print(f"Port: {config['port']}")                 # From cmd_args
print(f"Host: {config['host']}")                 # From defaults
print(f"Database: {config['database']}")         # From defaults
print(f"Timeout: {config['timeout']}")           # From defaults
print(f"Log level: {config['log_level']}")       # From env_vars

# Adding a new configuration source (e.g., config file)
config_file = {
    'host': 'config-host',
    'worker_processes': 4
}

# Insert the new mapping at position 1 (between cmd_args and env_vars)
config = config.new_child(config_file)
config.maps.insert(1, config_file)  # Adjust position

print("\nAfter adding config file:")
print(f"Host: {config['host']}")                 # From cmd_args
print(f"Worker processes: {config['worker_processes']}") # From config_file

# Updating configuration at runtime
config['timeout'] = 60  # This modifies the first map (cmd_args)
print("\nAfter updating timeout:")
print(f"Timeout: {config['timeout']}")           # Now 60
print(f"Original defaults unchanged: {defaults['timeout']}") # Still 30

# Access the parents (all maps except the first one)
parent_configs = config.parents
print("\nParent configurations (without cmd_args):")
print(f"Host from parents: {parent_configs['host']}")  # From config_file



This example shows how `ChainMap` excels at managing layered configurations where values in the earlier mappings take precedence over those in later mappings. It's especially useful for combining default settings with user configurations and environment variables.

collections.Counter

In [55]:
import collections

ct = collections.Counter('abracadabra')
ct

Counter({'a': 5, 'b': 2, 'r': 2, 'c': 1, 'd': 1})

In [56]:
ct.update('aaaaazzz')
ct

Counter({'a': 10, 'z': 3, 'b': 2, 'r': 2, 'c': 1, 'd': 1})

In [57]:
ct.most_common(3)

[('a', 10), ('z', 3), ('b', 2)]

In [58]:
ct2 = {'one': 1, 'two': 2, 'three': 3, 'one': 12}

ct2

{'one': 12, 'two': 2, 'three': 3}

In [59]:
collections.Counter(ct2)

Counter({'one': 12, 'three': 3, 'two': 2})

# Advanced Counter Example with Complex Data

Here's a practical example using `Counter` with more complex data than simple strings:



In [60]:
from collections import Counter
from datetime import date

# Sample data: website visits by user and day
page_visits = [
    {'user': 'user1', 'page': '/home', 'date': date(2025, 4, 25)},
    {'user': 'user2', 'page': '/products', 'date': date(2025, 4, 25)},
    {'user': 'user1', 'page': '/products', 'date': date(2025, 4, 25)},
    {'user': 'user3', 'page': '/home', 'date': date(2025, 4, 25)},
    {'user': 'user1', 'page': '/checkout', 'date': date(2025, 4, 25)},
    {'user': 'user2', 'page': '/home', 'date': date(2025, 4, 26)},
    {'user': 'user1', 'page': '/home', 'date': date(2025, 4, 26)},
    {'user': 'user3', 'page': '/products', 'date': date(2025, 4, 26)},
    {'user': 'user2', 'page': '/checkout', 'date': date(2025, 4, 26)},
    {'user': 'user1', 'page': '/products', 'date': date(2025, 4, 26)},
    {'user': 'user3', 'page': '/checkout', 'date': date(2025, 4, 26)},
]

# Count page visits
page_counter = Counter(visit['page'] for visit in page_visits)
print("Page visit counts:")
for page, count in page_counter.most_common():
    print(f"  {page}: {count} visits")

# Count visits by user
user_counter = Counter(visit['user'] for visit in page_visits)
print("\nVisits by user:")
for user, count in user_counter.most_common():
    print(f"  {user}: {count} visits")

# Count visits by date
date_counter = Counter(visit['date'] for visit in page_visits)
print("\nVisits by date:")
for visit_date, count in date_counter.most_common():
    print(f"  {visit_date}: {count} visits")

# Count user-page combinations (using tuples as keys)
user_page_counter = Counter((visit['user'], visit['page']) for visit in page_visits)
print("\nUser-page combinations:")
for (user, page), count in user_page_counter.most_common(3):  # Top 3 combinations
    print(f"  {user} visited {page}: {count} times")

# Performing operations with counters
# Find which pages are more popular on the first day vs second day
day1 = date(2025, 4, 25)
day2 = date(2025, 4, 26)

day1_pages = Counter(visit['page'] for visit in page_visits if visit['date'] == day1)
day2_pages = Counter(visit['page'] for visit in page_visits if visit['date'] == day2)

print("\nChanges in page popularity from day 1 to day 2:")
difference = day2_pages - day1_pages  # Pages that were more popular on day 2

for page, count_diff in difference.most_common():
    if count_diff > 0:
        print(f"  {page}: +{count_diff} more visits on day 2")

# Finding common elements between days
common_pages = day1_pages & day2_pages
print("\nPages visited on both days (with minimum counts):")
for page, count in common_pages.most_common():
    print(f"  {page}: {count} visits (minimum across both days)")

Page visit counts:
  /home: 4 visits
  /products: 4 visits
  /checkout: 3 visits

Visits by user:
  user1: 5 visits
  user2: 3 visits
  user3: 3 visits

Visits by date:
  2025-04-26: 6 visits
  2025-04-25: 5 visits

User-page combinations:
  user1 visited /home: 2 times
  user1 visited /products: 2 times
  user2 visited /products: 1 times

Changes in page popularity from day 1 to day 2:
  /checkout: +1 more visits on day 2

Pages visited on both days (with minimum counts):
  /home: 2 visits (minimum across both days)
  /products: 2 visits (minimum across both days)
  /checkout: 1 visits (minimum across both days)




This example demonstrates:

1. Using Counter with dictionary values and complex keys (including tuples)
2. Extracting insights from structured data using Counter
3. Analyzing trends over time
4. Comparing different data segments through Counter arithmetic
5. Finding common patterns across data subsets

Counter is particularly useful for these types of data analysis tasks where you need to quickly aggregate and compare distributions across different dimensions of your data.

shelve.Shelf

Subclassing UserDict Instead of dict

It’s better to create a new mapping type by extending collections.UserDict rather than dict. 

In [61]:
import collections


class StrKeyDict(collections.UserDict):  

    def __missing__(self, key):  
        if isinstance(key, str):
            raise KeyError(key)
        return self[str(key)]

    def __contains__(self, key):
        return str(key) in self.data  

    def __setitem__(self, key, item):
        self.data[str(key)] = item   

Immutable Mappings