## Dictionaries
Notes from section 3. Topics covered:
* Creating dictionaries
* Common operations
* Dictionary views
* Updating, merging and copying
* Custom classes and hashing

#### Dict comprehension example

In [1]:
import math

x_coords = [-2, -1, 0, 1, 2] 
y_coords = [-2, -1, 0, 1, 2] 

In [2]:
grid = [(x, y) for x in x_coords for y in y_coords]
grid

[(-2, -2),
 (-2, -1),
 (-2, 0),
 (-2, 1),
 (-2, 2),
 (-1, -2),
 (-1, -1),
 (-1, 0),
 (-1, 1),
 (-1, 2),
 (0, -2),
 (0, -1),
 (0, 0),
 (0, 1),
 (0, 2),
 (1, -2),
 (1, -1),
 (1, 0),
 (1, 1),
 (1, 2),
 (2, -2),
 (2, -1),
 (2, 0),
 (2, 1),
 (2, 2)]

In [3]:
# Generate a dict with coord tuple as key and calculated distance from origin as value
grid_extended = {(x, y): math.hypot(x, y) for x, y in grid}
grid_extended

{(-2, -2): 2.8284271247461903,
 (-2, -1): 2.23606797749979,
 (-2, 0): 2.0,
 (-2, 1): 2.23606797749979,
 (-2, 2): 2.8284271247461903,
 (-1, -2): 2.23606797749979,
 (-1, -1): 1.4142135623730951,
 (-1, 0): 1.0,
 (-1, 1): 1.4142135623730951,
 (-1, 2): 2.23606797749979,
 (0, -2): 2.0,
 (0, -1): 1.0,
 (0, 0): 0.0,
 (0, 1): 1.0,
 (0, 2): 2.0,
 (1, -2): 2.23606797749979,
 (1, -1): 1.4142135623730951,
 (1, 0): 1.0,
 (1, 1): 1.4142135623730951,
 (1, 2): 2.23606797749979,
 (2, -2): 2.8284271247461903,
 (2, -1): 2.23606797749979,
 (2, 0): 2.0,
 (2, 1): 2.23606797749979,
 (2, 2): 2.8284271247461903}

#### .zip() as an example to generate dict

In [4]:
d = dict(zip("abc", range(1, 4)))
d

{'a': 1, 'b': 2, 'c': 3}

#### .get() to look for key in dictionary
* results with None instead of KeyError if key not in dictionary
* allows to return specified value if key not in dict

In [5]:
d['a']

1

In [6]:
# Calling non-existing key will result as KeyError
d['python']

KeyError: 'python'

In [7]:
# Calling non-existing key with get will result with None
result = d.get('python')
type(result)

NoneType

In [8]:
# Calling non-existing key with 'get' will result with value specified
result = d.get('z', 'N/A')
result

'N/A'

In [9]:
result = d.get('a', 'N/A')
result

1

#### Example of using .get() function on dictionary

In [10]:
text = "Dictionaries are ubiquitous in Python. Classes are essentially dictionaries, modules are dictionaries, namespaces are dictionaries, sets are dictionaries and many more."

In [11]:
counts = {}
for c in text:
    # If c doesn't exist get returns 0 and then adds 1
    counts[c] = counts.get(c, 0)+1

print(counts)

{'D': 1, 'i': 19, 'c': 6, 't': 9, 'o': 9, 'n': 11, 'a': 16, 'r': 11, 'e': 18, 's': 16, ' ': 20, 'u': 4, 'b': 1, 'q': 1, 'P': 1, 'y': 3, 'h': 1, '.': 2, 'C': 1, 'l': 4, 'd': 6, ',': 3, 'm': 4, 'p': 1}


In [12]:
# cleaning of uppercase and lowercase duplicates and other characters
counts = {}
for c in text:
    
    key = c.lower().strip()
    if key:  # if key is not empty
        # If c doesn't exist get returns 0 and then adds 1
        counts[key] = counts.get(key, 0) +1

print(counts)

{'d': 7, 'i': 19, 'c': 7, 't': 9, 'o': 9, 'n': 11, 'a': 16, 'r': 11, 'e': 18, 's': 16, 'u': 4, 'b': 1, 'q': 1, 'p': 2, 'y': 3, 'h': 1, '.': 2, 'l': 4, ',': 3, 'm': 4}


#### .pop() to remove key specified element from dictionary
returns value of removed element or specified value if key not in dict

In [13]:
d = dict.fromkeys('abcd', 0)
d

{'a': 0, 'b': 0, 'c': 0, 'd': 0}

In [14]:
result = d.pop('a', 100)
result

0

In [15]:
result = d.pop('z', 100)
result

100

In [16]:
d

{'b': 0, 'c': 0, 'd': 0}

#### .setdefault() to append new key to dictionary 
returns key value if key in dict or set new key/value from setvalue in dict and return new value

In [17]:
d.setdefault('x', 100)

100

In [18]:
d.setdefault('b', 100)

0

In [19]:
d

{'b': 0, 'c': 0, 'd': 0, 'x': 100}

#### Example of using .setdefault()

In [20]:
import string

print(string.ascii_lowercase)
print(string.ascii_uppercase)

abcdefghijklmnopqrstuvwxyz
ABCDEFGHIJKLMNOPQRSTUVWXYZ


In [21]:
def cat_key(c):
    """ Categorizes if character is lower, upper, other or None """
    categories = {' ': None,
                  string.ascii_lowercase: 'lower',
                  string.ascii_uppercase: 'upper'}
    # for key in dictionary
    for key in categories:
        # if character in key string (abcde...)
        if c in key:
            return categories[key]
    else:
        return 'other'

In [22]:
cat_key('A'), cat_key('g'), cat_key('@'), cat_key(' ')

('upper', 'lower', 'other', None)

In [23]:
def cat_key2(c):
    """ Categorizes if character is lower, upper, other or None """
    cat_1 = {' ': None}
    # Generates dictioneries with all lower/upper case keys and assigned values
    cat_2 = dict.fromkeys(string.ascii_lowercase, 'lower')
    cat_3 = dict.fromkeys(string.ascii_uppercase, 'upper')
    # Unpacks cat_1, cat_2, cat_3 into a single dictionary
    categories = {**cat_1, **cat_2, **cat_3}
    return categories.get(c, 'other')

In [24]:
cat_key2('A'), cat_key2('g'), cat_key2('@'), cat_key2(' ')

('upper', 'lower', 'other', None)

In [25]:
categories = {}
for c in text:
    
    key = cat_key(c)
    if key:
        categories.setdefault(key, set()).add(c)
    
for cat in categories:
    print(f'{cat}: ', ''.join(categories[cat]))

upper:  PCD
lower:  qeybtrdpcslinohamu
other:  ,.


#### Views and manipulating views
* keys are always sets (hash)
* values may not be sets
* items (keys, values) are sets if values are sets

Sets are not ordered objects!

In [26]:
d1= {'a': 1, 'b': 2, 'c':3}
d2= {'b': 2, 'c': 30, 'd': 4}

In [27]:
k1 = d1.keys()
k2 = d2.keys()
k1 & k2

{'b', 'c'}

In [28]:
# If key in both dicts, copy key to new dictionary and assign values from both dicts
new_dict = {key: (d1[key], d2[key]) for key in d1.keys() & d2.keys()}
print(new_dict)

{'c': (3, 30), 'b': (2, 2)}


In [29]:
# Identify itens which keys are not common in both dictionaries (d & e)
d1= {'a': 1, 'b': 2, 'c':3, 'd': 4}
d2= {'a': 10, 'b': 20, 'c': 30, 'e': 5}

In [30]:
# Define unique keys: union - intersection
k = (d1.keys() | d2.keys()) - (d1.keys() & d2.keys())
k

{'d', 'e'}

In [31]:
# Alternatively use symmetric difference which is equal to union - intersection
k = d1.keys() ^ d2.keys()
k

{'d', 'e'}

In [32]:
d1.get('d') or d2.get('d')

4

In [33]:
results = {}
for key in k:
    results[key] = d1.get(key) or d2.get(key)
print(results)

{'e': 5, 'd': 4}


In [34]:
results = {key: d1.get(key) or d2.get(key) for key in d1.keys() ^ d2.keys()}
print(results)

{'e': 5, 'd': 4}


### Updating, Merging and Copying

#### .update 
Updates dict with keys and values from specified dict. Overwrites initial values

In [35]:
# Use .up
d1 = {'a': 1, 'b': 2}
d2 = {'b': 20, 'c': 3}
d1.update(d2)
print(d1)

{'a': 1, 'b': 20, 'c': 3}


#### Unpack dicts to new dictionary

In [36]:
conf_defaults = dict.fromkeys(('host', 'port', 'user', 'pwd', 'database'), None)
conf_defaults

{'host': None, 'port': None, 'user': None, 'pwd': None, 'database': None}

In [37]:
conf_global ={'port': 5432, 'database': 'deepdive'}
conf_global

{'port': 5432, 'database': 'deepdive'}

In [38]:
conf_dev = {'host': 'localhost', 'user': 'test', 'pwd': 'test'}
conf_dev

{'host': 'localhost', 'user': 'test', 'pwd': 'test'}

In [39]:
conf_prod = {'host': 'prodpg.deepdive.com', 'user': '$prod_user', 'pwd': '$prod_pwd', 'database': 'deepdive_prod'}
conf_prod

{'host': 'prodpg.deepdive.com',
 'user': '$prod_user',
 'pwd': '$prod_pwd',
 'database': 'deepdive_prod'}

conf_defaults -->global --> dev/prod

In [40]:
conf = {**conf_defaults, **conf_global, **conf_dev}
conf

{'host': 'localhost',
 'port': 5432,
 'user': 'test',
 'pwd': 'test',
 'database': 'deepdive'}

In [41]:
conf = {**conf_defaults, **conf_global, **conf_prod}
conf

{'host': 'prodpg.deepdive.com',
 'port': 5432,
 'user': '$prod_user',
 'pwd': '$prod_pwd',
 'database': 'deepdive_prod'}

#### passing keyword arguments to a function

In [42]:
def my_func(*, kw1, kw2, kw3):
    print(kw1, kw2, kw3)

In [43]:
d = {'kw2': 20, 'kw1': 10, 'kw3': 30}

In [44]:
my_func(**d)

10 20 30


### Custom Classes and Hashing


#### Hash overview

In [45]:
t1 = (1, 2, 3)
t2 = (1, 2, 3)

In [46]:
# Separate id proves t1 and t2 are two different objects
id(t1), id(t2)

(84101464, 84101384)

In [47]:
# t1 and t2 are equal
t1==t2

True

In [48]:
# t1 and t2 are not the same object
t1 is t2

False

In [49]:
# If two objects are equal then they should have the same hash
hash(t1), hash(t2)

(2528502973977326415, 2528502973977326415)

In [50]:
# Hence t1 and t2 could be used to recover the same value from a dict
d = {t1: 100}

In [51]:
d[t1]

100

In [52]:
d[t2]

100

t2 is different object than t1, but since that object is equal and has the same hash we recover the same key

In [53]:
d[(1, 2, 3)]

100

#### Classes
In order to achieve similar result with class objects as with tuples, which is to use different but equal objects as dict keys, it is required that objects are equal and have the same hash value.\
The class instance require to specify \_\_eq\_\_ method to be able to check equality between objects.\
After doing that python by itself specifies that the class objecs are not hashable anymore by setting \_\_hash\_\_ to None.\
This requires also to specify manually \_\_hash\_\_ method.

In [54]:
class Point:
    def __init__(self, x, y):
        self.x = x
        self.y = y
    
    def __repr__(self):
        return f'({self.x}, {self.y})'
    
    def __eq__(self, other):
        # check if instance of other is another tuple
        if isinstance(other, tuple) and len(other) == 2:
            other = Point(*other)
        # comparing with other object
        if isinstance(other, Point):
            return self.x == other.x and self.y == other.y
        else:
            return False
    
    def __hash__(self):
        return hash((self.x, self.y))

In [55]:
pt1 = Point(0,0)
pt2 = Point(1,1)
points = {pt1: 'origin', pt2: 'point at (1,1)'}

In [56]:
# Now we can get values out of points dictionary in various ways
points[pt1], points[Point(0, 0)], points[(0,0)]

('origin', 'origin', 'origin')

This will no longer work if we mutate any of existing parameters of pt1 or pt2 because of new hash value for that key is different to original key hash

In [57]:
class Person:
    def __init__(self, id, name, age):
        self._id = id
        self.name = name
        self.age = age
    
    def __repr__(self):
        return f'Person(id={self._id}, name={self.name}, age={self.age})'
    
    def __eq__(self, other):
        if isinstance(other, Person):
            return self._id == other._id
        else:
            return False
    
    def __hash__(self):
        return hash(self._id)

In [58]:
p1 = Person ('john', 'John', 28)
persons = {p1: 'john object'}

In [59]:
persons[p1]

'john object'

In [60]:
# object comparison is based on id attribute
persons[Person('john', 'qwerty', 30)]

'john object'

The object is no longer sensitive to attribute change

In [61]:
p1.name = 'Eric'
p1.age = 70
p1

Person(id=john, name=Eric, age=70)

In [62]:
persons[Person('john', None, None)]

'john object'