# Python sets and dictionaries

## Lists/tuples (key points of the previous lesson)

In [7]:
list("English")

['E', 'n', 'g', 'l', 'i', 's', 'h']

In [8]:

digit2name = ( 'zero', 'one', 'two', 'three', 'four', 'five', 'six', 'seven', 'eight', 'nine' )
print( "     digit2name:", digit2name )

digits = [ 5, 2, 6, 2, 0, 9, 2, 1, 9 ]                     # some random numbers
print( "         digits:", digits )

digitsNames = [ digit2name[d] for d in digits ]
print( "    digitsNames:", digitsNames )

smallDigits = [ d for d in digits if d <= 3 ]
print( "    smallDigits:", smallDigits )
digitsWithNames = [ (d, digit2name[d] ) for d in digits ]  # tuples are produced here
print( "digitsWithNames:", digitsWithNames )

     digit2name: ('zero', 'one', 'two', 'three', 'four', 'five', 'six', 'seven', 'eight', 'nine')
         digits: [5, 2, 6, 2, 0, 9, 2, 1, 9]
    digitsNames: ['five', 'two', 'six', 'two', 'zero', 'nine', 'two', 'one', 'nine']
    smallDigits: [2, 2, 0, 2, 1]
digitsWithNames: [(5, 'five'), (2, 'two'), (6, 'six'), (2, 'two'), (0, 'zero'), (9, 'nine'), (2, 'two'), (1, 'one'), (9, 'nine')]


## Set
Set is a built-in data type with the following properties:

an element is either in the set of not in the set.

elements can be added to a set or remove from it

an element added to a set cannot ever change, therefore only elements of immutable types are allowed

fronzenset is an imuutable variant of a set - once created it cannot be changed but therefore a forzonset can be used as an element of a set


In [9]:
# A new set
# two ways to create a set: {...} and set([...])
# See https://www.recipesfromitaly.com/tiramisu-original-italian-recipe/
# A *set* with ingredients needed for tiramisu:
tiramisuIngredients = { "ladyfingers", "mascarpone", "eggs", "sugar", "espresso"}
print(tiramisuIngredients)

inHouseIngredients = set( [ "eggs", "espresso", "cocoa", "butter", "strawberries" ] )
print(inHouseIngredients) 
# order information of the list is losted in set 
# even though it might happen to show the same order in the set, which is unreliable. 

print(set("egg"))
print(set(["egg"]))

{'espresso', 'mascarpone', 'eggs', 'ladyfingers', 'sugar'}
{'espresso', 'butter', 'strawberries', 'cocoa', 'eggs'}
{'g', 'e'}
{'egg'}


In [10]:
len(inHouseIngredients)

5

In [11]:
type(inHouseIngredients)

set

In [12]:
# generate an empty set via set() but not {}
print(type({}))
print(type(set()))

<class 'dict'>
<class 'set'>


# Set algebra

## Sets: is element in or not

In [13]:
"milk" in tiramisuIngredients

False

In [14]:
"milk" not in tiramisuIngredients

True

In [15]:
# A True/False map whether an in-house-ingredient is not needed for tiramisu
print([ing for ing in inHouseIngredients])
[(ing, ing not in tiramisuIngredients) for ing in inHouseIngredients]

['espresso', 'butter', 'strawberries', 'cocoa', 'eggs']


[('espresso', False),
 ('butter', True),
 ('strawberries', True),
 ('cocoa', True),
 ('eggs', False)]

Note: the set methods issubset(...) and issuperset(...) allow to test whether all elements of one set are also present in the other set.The same functions are provided though <=, => operators. The == allows to check whether two sets have identical elements.

In [16]:
tiramisuIngredients.issubset(inHouseIngredients)

False

# Sets: adding/removing elements

In [17]:
print(inHouseIngredients)
# for a single elment:
inHouseIngredients.add("bread")
print(inHouseIngredients)

# for an iterable collection of elements ( it can also be another set)
inHouseIngredients.update(["salmi", "tomato"])
print(inHouseIngredients)

print(inHouseIngredients.update({"food"}))

{'espresso', 'butter', 'strawberries', 'cocoa', 'eggs'}
{'espresso', 'butter', 'strawberries', 'bread', 'cocoa', 'eggs'}
{'espresso', 'butter', 'salmi', 'strawberries', 'tomato', 'bread', 'cocoa', 'eggs'}
None


In [18]:
# For an element currently in the set
inHouseIngredients.remove("bread")
print(inHouseIngredients)

# Note: KeyError exception would be raised because "BREAD" is not in the set:
# inHouseIngredients.remove( "BREAD" ) 

# For value which might be in the set but no error when the value is not there
inHouseIngredients.discard("BREAD")
print(inHouseIngredients)

{'espresso', 'butter', 'salmi', 'strawberries', 'tomato', 'food', 'cocoa', 'eggs'}
{'espresso', 'butter', 'salmi', 'strawberries', 'tomato', 'food', 'cocoa', 'eggs'}


# Sets: a loop over all elements

In [19]:
for ing in tiramisuIngredients:
    print("Needed for tiramisu:", ing)
    print(f"Needed for tiramisu: {ing}")
print("***")

Needed for tiramisu: espresso
Needed for tiramisu: espresso
Needed for tiramisu: mascarpone
Needed for tiramisu: mascarpone
Needed for tiramisu: eggs
Needed for tiramisu: eggs
Needed for tiramisu: ladyfingers
Needed for tiramisu: ladyfingers
Needed for tiramisu: sugar
Needed for tiramisu: sugar
***


# Dictionaries
Dict is a built-in data type with the follwing properties:

1. (key, value), key cannot be replicated; it is possible whether a key is in or not in a dict object

2. the keys must be immutable type but the values corresponding to a key can be modified.

3. new items can be added, existing items can be removed

4. for loops can iterate over all items of a dictionary, or over the keys, or over values.

5. the order of a dictionary is not trustworthy


In [20]:
# A new dictionary
day2KCal = {"Mon": 2330, "Tue":1990, "Wed": 2150}
print(day2KCal)
print(day2KCal.keys())
print(day2KCal["Mon"])

{'Mon': 2330, 'Tue': 1990, 'Wed': 2150}
dict_keys(['Mon', 'Tue', 'Wed'])
2330


In [21]:
type(day2KCal)

dict

In [22]:
{  "Mon": 2330, "Tue": 1990, "Wed": 2150, "Mon": 1000 }

{'Mon': 1000, 'Tue': 1990, 'Wed': 2150}

In [23]:
days = ("Mon", "Tue", "Wed", "Thu", "Fri", "Sat", "Sun")     # a tuple, having an valid order
dayKCals = [ 2330, 1990, 2150, 2290, 1920, 2370, 2050 ]         # a list, having an valid order
zip( days, dayKCals )           # a generator, iterable over tuples (day, dayKCal)
list( zip( days, dayKCals ) )   # a list of tuples generated by the generator

day2KCal = dict( zip( days, dayKCals ) )
print(day2KCal)
print(len(day2KCal))

{'Mon': 2330, 'Tue': 1990, 'Wed': 2150, 'Thu': 2290, 'Fri': 1920, 'Sat': 2370, 'Sun': 2050}
7


# Dict: getting, adding or modifying elements

In [27]:
day2KCal = { "Mon": 2330, "Tue": 1990, "Wed": 2150 }
print(day2KCal)

# to add a new value or change a value for a single key
day2KCal["Thu"] = 2290
day2KCal


{'Mon': 2330, 'Tue': 1990, 'Wed': 2150}


{'Mon': 2330, 'Tue': 1990, 'Wed': 2150, 'Thu': 2290}

In [28]:
# Multiple items can be added/updated by calling update(), for an iterable over (key, value) items
day2KCal.update([("Fri", 1920), ("Sat", 2370)])
day2KCal

{'Mon': 2330, 'Tue': 1990, 'Wed': 2150, 'Thu': 2290, 'Fri': 1920, 'Sat': 2370}

In [30]:
# to obatin the value for a provided key
print(day2KCal["Mon"])
day2KCal["Monday"]

2330


KeyError: 'Monday'

In [31]:
# The get(key, defaultValue) function may be used to avoid exceptins when the key is missing in the dictionary.
# Then the defaultValue is returned

day2KCal.get("Monday", 100)

100

In [32]:
"Tuesday" in dayKCals

False

In [33]:
aKey = "Tuesday"
aKey not in day2KCal

True

# Dict: removing an element

In [39]:
# A few possiblities exists to remove an element
day2KCal = { "Mon": 2330, "Tue": 1990, "Wed": 2150}
print(day2KCal.pop("Tue")) # removes Tue but returns the value which Tue had
                    # raises exception if the key is not found
day2KCal.pop("Monday", None) # remove only when the is present; return the second argument
                             # does not raise exception if the key not found


1990


In [41]:
del day2KCal["Mon"] # also remove an existing element
                    # raises exception if the key is not found

KeyError: 'Mon'

# Dict: all keys, all values or their pairs

In [42]:
# with values() it is possible to iterate over all values in a dictionary:
day2KCal = {"Mon": 2330, "Tue": 1990, "Wed": 2150}
day2KCal.values()

dict_values([2330, 1990, 2150])

In [43]:
kJoules = [kCal * 4.184 for kCal in day2KCal.values()]
kJoules

[9748.720000000001, 8326.16, 8995.6]

In [46]:
print(day2KCal.items())
day2KJoule = {day:kCal*4.184 for day, kCal in day2KCal.items()}
day2KJoule

dict_items([('Mon', 2330), ('Tue', 1990), ('Wed', 2150)])


{'Mon': 9748.720000000001, 'Tue': 8326.16, 'Wed': 8995.6}

In [48]:
print(day2KCal.keys())
days = tuple(day2KCal.keys())
days

dict_keys(['Mon', 'Tue', 'Wed'])


('Mon', 'Tue', 'Wed')

# Dict: a loop over elements
items(), values(), keys() can also be used in loops

In [51]:
day2KCal = { "Mon": 2330, "Tue": 1990, "Wed": 2150 }
for day, kCal in day2KCal.items():
    print("On", day, "consumed food was", kCal, "kCal or", kCal*4.184, "kJ.")

On Mon consumed food was 2330 kCal or 9748.720000000001 kJ.
On Tue consumed food was 1990 kCal or 8326.16 kJ.
On Wed consumed food was 2150 kCal or 8995.6 kJ.


# Formatting strings


In [52]:
day2KCal = {"Mon": 2330, "Tue":1990, "Wed":2150}
for day, kCal in day2KCal.items():
    print(f"On {day} consumed food was {kCal} or {kCal * 4.184} kJ.")


On Mon consumed food was 2330 or 9748.720000000001 kJ.
On Tue consumed food was 1990 or 8326.16 kJ.
On Wed consumed food was 2150 or 8995.6 kJ.


In [53]:
x = "Statistics"
y = "Data Science"
f"{x} and {y}"

'Statistics and Data Science'

In [54]:
f"{x}" " and " f'{y}'            # Note, there are several strings here
                                 # and they get concatenated

'Statistics and Data Science'

In [55]:
( f"{x}"
" and "
f"{y}" )

'Statistics and Data Science'

In [56]:
from math import pi
{ 
    "               full precision":  f"{pi}",
    "                  four digits":  f"{pi:.4f}",
    "four digits and forced + sign":  f"{pi:+.4f}",
    "                right aligned":  f"{pi:12.4f}",
    "               center aligned":  f"{pi:^12.4f}",
    "                 left aligned":  f"{pi:<12.4f}",
    "            exponent notation":  f"{pi * 1000:.4e}",
    "        with comma separators":  f"{pi * 1e6:,.2f}",     # Note: the comma here is ok
}


{'               full precision': '3.141592653589793',
 '                  four digits': '3.1416',
 'four digits and forced + sign': '+3.1416',
 '                right aligned': '      3.1416',
 '               center aligned': '   3.1416   ',
 '                 left aligned': '3.1416      ',
 '            exponent notation': '3.1416e+03',
 '        with comma separators': '3,141,592.65'}

In [66]:
# Self-study tasks
## Generating random integers
from random import randint
vs = [randint(0, 10) for i in range(0, 20)]
print(len(vs))
set(vs)

20


{0, 1, 2, 3, 4, 6, 7, 8, 9, 10}

In [73]:
# Counting elements
v2nct = {}
for num in set(vs):
    if num not in v2nct.keys():
        v2nct[num] = 1
    else:
         v2nct[num] += 1
print(v2nct)

{0: 1, 1: 1, 2: 1, 3: 1, 4: 1, 6: 1, 7: 1, 8: 1, 9: 1, 10: 1}


In [74]:
print(vs.count)

<built-in method count of list object at 0x000002111C80BC40>


# Removing from a dictionary all itmes of given values

In [76]:
licPlate2color = {
    "VA-111-V": "silver", "SB-222-W": "red", "XC-333-L": "red",
    "AB-111-E": "white", "ER-222-U": "black", "BV-333-Z": "white",
    "CC-111-J": "silver", "UI-222-R": "green", "GF-333-U": "silver",
    "WT-111-K": "white", "KJ-222-Q": "silver", "LK-333-I": "black",
}
toRemoveColors = [ "white", "silver", "black" ]

toRemoveKeys = []
for key, value in licPlate2color.items():
    if value in toRemoveColors:
        toRemoveKeys.append(key)
    else: continue
print(toRemoveKeys)

for key in toRemoveKeys:
    licPlate2color.pop(key, None)
print(licPlate2color)

['VA-111-V', 'AB-111-E', 'ER-222-U', 'BV-333-Z', 'CC-111-J', 'GF-333-U', 'WT-111-K', 'KJ-222-Q', 'LK-333-I']
{'SB-222-W': 'red', 'XC-333-L': 'red', 'UI-222-R': 'green'}


# Build a dictionary with values being lists

In [4]:
names =     ["Grzegorz", "Małgorzata", "Paweł", "Jeroen", "Sanne", "Ana", "Sofia", "Javier", "Sofia"]
countries = ["pl",       "pl",         "pl",    "nl",     "nl",    "es",  "es",    "es",     "es"]

from collections import defaultdict
zippedList = list(zip(countries, names))
#print(zippedList)

country2names = {}
for (country, name) in zippedList:
    if country not in country2names.keys():
        country2names[country] = [name]
    else: 
        country2names[country].append(name)
print(country2names)

country2names = defaultdict(list) # generate an dict with its values beng an empty list; not necessary to check the exitence of keys every time
for (country, name) in zippedList:
     country2names[country].append(name)
print(country2names)


{'pl': ['Grzegorz', 'Małgorzata', 'Paweł'], 'nl': ['Jeroen', 'Sanne'], 'es': ['Ana', 'Sofia', 'Javier', 'Sofia']}
defaultdict(<class 'list'>, {'pl': ['Grzegorz', 'Małgorzata', 'Paweł'], 'nl': ['Jeroen', 'Sanne'], 'es': ['Ana', 'Sofia', 'Javier', 'Sofia']})


# Reverse a list and sample elements

In [5]:
from random import sample

role2num = { "setter": 1, "dia": 1, "middle": 2, "outside": 2 }

player2role = { 
    "Chen": "setter", "Martijn": "setter",
    "Marnick": "dia", "Simon": "dia",
    "David": "middle", "Luuk": "middle",
    "Ronald": "outside", "Alex": "outside", "Kadir": "outside", "Koen": "outside"
}


role2players = defaultdict(list)
for player, role in player2role.items():
    role2players[role].append(player)
print(role2players)

Team = {role:sample(players, role2num[role]) for role, players in role2players.items()}
Team


defaultdict(<class 'list'>, {'setter': ['Chen', 'Martijn'], 'dia': ['Marnick', 'Simon'], 'middle': ['David', 'Luuk'], 'outside': ['Ronald', 'Alex', 'Kadir', 'Koen']})


{'setter': ['Chen'],
 'dia': ['Marnick'],
 'middle': ['Luuk', 'David'],
 'outside': ['Ronald', 'Kadir']}

# NATO phonoetic alphabet