In [1]:
# In this chapter, we cover the last object for data collection and that is set. 
# Perhaps the best use for sets is that they cannot contain duplicate values so can be useful for storing unique values. 
# They are also unordered and cannot be changed.

names = {'Tony','Peter','Natasha','Wanda',1,2,3}
names

# Here, we use the curly brackets as we did with a dictionary. 
# However now we have no key value pairs and the content resembles that of a list of tuple. 
# We are not constrained by having strings in it, let’s add some integers to our set.

{1, 2, 3, 'Natasha', 'Peter', 'Tony', 'Wanda'}

In [2]:
type(names)

set

In [3]:
dir(names)

['__and__',
 '__class__',
 '__contains__',
 '__delattr__',
 '__dir__',
 '__doc__',
 '__eq__',
 '__format__',
 '__ge__',
 '__getattribute__',
 '__gt__',
 '__hash__',
 '__iand__',
 '__init__',
 '__init_subclass__',
 '__ior__',
 '__isub__',
 '__iter__',
 '__ixor__',
 '__le__',
 '__len__',
 '__lt__',
 '__ne__',
 '__new__',
 '__or__',
 '__rand__',
 '__reduce__',
 '__reduce_ex__',
 '__repr__',
 '__ror__',
 '__rsub__',
 '__rxor__',
 '__setattr__',
 '__sizeof__',
 '__str__',
 '__sub__',
 '__subclasshook__',
 '__xor__',
 'add',
 'clear',
 'copy',
 'difference',
 'difference_update',
 'discard',
 'intersection',
 'intersection_update',
 'isdisjoint',
 'issubset',
 'issuperset',
 'pop',
 'remove',
 'symmetric_difference',
 'symmetric_difference_update',
 'union',
 'update']

In [4]:
# Here, we can see that the ordering of the set doesn’t resemble what we put into it.
# We can also create a set using the set built-in function as covered earlier.

names = set(('Tony','Peter','Natasha','Wanda',1,2,3))
names

{1, 2, 3, 'Natasha', 'Peter', 'Tony', 'Wanda'}

In [5]:
names = set(['Tony','Peter','Natasha','Wanda',1,2,3])
names

{1, 2, 3, 'Natasha', 'Peter', 'Tony', 'Wanda'}

In [6]:
# We can create a set from a string but we need to be aware of how the curly brackets and set built-in work.

names = {'Wanda'}
names

{'Wanda'}

In [7]:
# What happens is that when you pass in a string using the curly brackets you retain the
# full string in but when passed in using set the string is split into the individual characters.

names = set('Wanda')
names

{'W', 'a', 'd', 'n'}

In [8]:
names = set(('Wanda'))
names

{'W', 'a', 'd', 'n'}

In [9]:
# Next, we try and add a list to the set but we raise a type error as the list cannot be added.

my_set = {'Tony','Wanda', 1, 2, ['hello','world']}

TypeError: unhashable type: 'list'

In [10]:
# The same is true for dictionaries and sets as we show below:

my_set = {'Tony','Wanda', 1, 2, {'key':'value'}}

TypeError: unhashable type: 'dict'

In [11]:
my_set = {'Tony','Wanda', 1, 2, {1,2,3}}

TypeError: unhashable type: 'set'

In [12]:
# However, if we include a tuple in the set we get the following:

my_set = {'Wanda','Tony',1,2,(4,5,6)}
my_set

# The reason we can include the tuple over the dictionary, list and set 
# is that the tuple cannot be changed so is supported in a set.

{(4, 5, 6), 1, 2, 'Tony', 'Wanda'}

In [13]:
'Tony' in my_set

True

In [14]:
(4,5,6) in my_set

True

In [15]:
1 in my_set

True

In [16]:
4 in my_set

False

In [17]:
'Natasha' in my_set

False

In [18]:
# An item could be added to the set by using the add method:

names = {'Tony','Peter','Natasha','Wanda',1,2,3}
names

{1, 2, 3, 'Natasha', 'Peter', 'Tony', 'Wanda'}

In [19]:
names.add('Steve')
names

{1, 2, 3, 'Natasha', 'Peter', 'Steve', 'Tony', 'Wanda'}

In [20]:
# Note here when we add in Tony again to the set we don’t get duplicate values of Tony in
# the set, but Steve gets added in as it was not present.

names.add('Tony')
names

{1, 2, 3, 'Natasha', 'Peter', 'Steve', 'Tony', 'Wanda'}

In [21]:
# That aspect of not having duplicate values within the set is useful 
# if we want to have a unique representation of values where we could 
# have duplicates. For example, you could imagine a long list with lots 
# of repeated values and you just want the unique values within it as we show below:

days = ['Monday','Monday','Tuesday','Wednesday','Sunday','Sunday']
days

['Monday', 'Monday', 'Tuesday', 'Wednesday', 'Sunday', 'Sunday']

In [22]:
# Now, the example is fairly trivial as we can see all content within the list but you can
# imagine examples within a big dataset where you may want a similar representation.

days_set = set(days)
days_set

{'Monday', 'Sunday', 'Tuesday', 'Wednesday'}

In [23]:
# Above is useful for a single set.
# We also have the ability to operate on multiple sets.
# The next example looks to obtain the unique value between two sets.

names_1 = {'Tony','Peter','Natasha','Wanda'}
names_2 = {'Steve','Peter','Carol','Wanda'}

In [24]:
# Here, we use the | operator and get the values that are in the names_1 set OR the
# names_2 set so any shared values are included only once.

names_1 | names_2

{'Carol', 'Natasha', 'Peter', 'Steve', 'Tony', 'Wanda'}

In [25]:
# The same can be achieved using the set method 'union'.

names_1.union(names_2)

{'Carol', 'Natasha', 'Peter', 'Steve', 'Tony', 'Wanda'}

In [26]:
# We could rewrite passing a list into the union and get the same results.

names_1 = {'Tony','Peter','Natasha','Wanda'}
names_2 = ['Steve','Peter','Carol','Wanda']
names_1.union(names_2)

{'Carol', 'Natasha', 'Peter', 'Steve', 'Tony', 'Wanda'}

In [27]:
# So, we can achieve the same result as the list is converted into a set. 
# If we used the | operator with the list, then we see an error.

names_1 | names_2

TypeError: unsupported operand type(s) for |: 'set' and 'list'

In [28]:
# We can extend the union example by passing more items into the union method, for
# example, if we have two sets that we want to take the union with our existing set we would
# do as follows:

names_1 = {'Tony','Peter','Natasha','Wanda'}
names_2 = {'Steve','Peter','Carol','Wanda'}
names_3 = {'Tony','Johnny','Sue','Wade'}

In [29]:
names_1 | names_2 | names_3

{'Carol',
 'Johnny',
 'Natasha',
 'Peter',
 'Steve',
 'Sue',
 'Tony',
 'Wade',
 'Wanda'}

In [30]:
names_1.union(names_2, names_3)

{'Carol',
 'Johnny',
 'Natasha',
 'Peter',
 'Steve',
 'Sue',
 'Tony',
 'Wade',
 'Wanda'}

In [31]:
# Now, if the values were lists and not sets, then we can still use the union method as shown below:

names_1 = {'Tony','Peter','Natasha','Wanda'}
names_2 = ['Steve','Peter','Carol','Wanda']
names_3 = ['Tony','Johnny','Sue','Wade']

In [32]:
names_1.union(names_2, names_3)

{'Carol',
 'Johnny',
 'Natasha',
 'Peter',
 'Steve',
 'Sue',
 'Tony',
 'Wade',
 'Wanda'}

In [33]:
names_1 | names_2 | names_3

TypeError: unsupported operand type(s) for |: 'set' and 'list'

In [34]:
# If we want to find out what values are in all sets we use the intersection method or the & operator.

names_1 = {'Tony','Peter','Natasha','Wanda'}
names_2 = {'Steve','Peter','Carol','Wanda'}

In [35]:
names_1 & names_2

{'Peter', 'Wanda'}

In [36]:
names_1.intersection(names_2)

{'Peter', 'Wanda'}

In [37]:
# We can include more than two sets.

names_1 = {'Tony','Peter','Natasha','Wanda'}
names_2 = {'Steve','Peter','Carol','Wanda'}
names_3 = {'Tony','Johnny','Sue','Wade'}

names_1 & names_2 & names_3

set()

In [38]:
names_1 = {'Tony','Peter','Natasha','Wanda'}
names_2 = {'Steve','Peter','Carol','Wanda'}
names_3 = {'Peter','Johnny','Sue','Wade'}

names_1 & names_2 & names_3

{'Peter'}

In [39]:
# And similarly as shown with the union method we can add non-sets into the intersection method.

names_1 = {'Tony','Peter','Natasha','Wanda'}
names_2 = ['Steve','Peter','Carol','Wanda']
names_3 = ['Peter','Johnny','Sue','Wade']

names_1.intersection(names_2,names_3)

{'Peter'}

In [40]:
# If we want to look at the differences between two or more sets, then we can use the 'difference' method or the − operator. 
# Again the rules that we have seen before are consistent here in that we can only do comparison using the − operator 
# on sets whereas for the difference method we can handle non-sets passed in.

names_1 - names_2

TypeError: unsupported operand type(s) for -: 'set' and 'list'

In [41]:
names_1.difference(names_2)

{'Natasha', 'Tony'}

In [42]:
names_1 = {'Tony','Peter','Natasha','Wanda'}
names_2 = {'Steve','Peter','Carol','Wanda'}
names_3 = {'Peter','Johnny','Sue','Wade'}

names_1 - names_2

{'Natasha', 'Tony'}

In [43]:
# The manner in which difference is applied for more than one comparison is to work left
# to right so we first look at the difference between names_1 and names_2 and then look at
# the difference between this result and names_3.

names_1 - names_2 - names_3

{'Natasha', 'Tony'}

In [44]:
names_1.difference(names_2,names_3)

{'Natasha', 'Tony'}

In [45]:
# Another set comparison that we can perform is using the 'symmetric_difference' method or the ^ operator. 
# What this does is return back the elements that are in either set but not in both, 
# so its like the or method but doesn’t include any common values.

names_1 = {'Tony','Peter','Natasha','Wanda'}
names_2 = {'Steve','Peter','Carol','Wanda'}
names_3 = {'Peter','Johnny','Sue','Wade'}

In [46]:
names_1 ^ names_2

{'Carol', 'Natasha', 'Steve', 'Tony'}

In [47]:
names_1.symmetric_difference(names_2)

{'Carol', 'Natasha', 'Steve', 'Tony'}

In [48]:
names_1 ^ names_2 ^ names_3

{'Carol', 'Johnny', 'Natasha', 'Peter', 'Steve', 'Sue', 'Tony', 'Wade'}

In [49]:
# Unlike with previous methods, 'symmetric_difference' doesn’t allow more than one set.
# However it still allows us to pass in a non-set.

names_1.symmetric_difference(names_2,names_3)

TypeError: symmetric_difference() takes exactly one argument (2 given)

In [50]:
names_1 = {'Tony','Peter','Natasha','Wanda'}
names_2 = ['Steve','Peter','Carol','Wanda']

names_1.symmetric_difference(names_2)

{'Carol', 'Natasha', 'Steve', 'Tony'}

In [51]:
# We can also see if a set has any elements in common by using the 'isdisjoint' method.

names_1 = {'Tony','Peter','Natasha','Wanda'}
names

{1, 2, 3, 'Natasha', 'Peter', 'Steve', 'Tony', 'Wanda'}

In [52]:
names_2 = {'Steve', 'Bruce', 'Carol', 'Wanda'}
names_2

{'Bruce', 'Carol', 'Steve', 'Wanda'}

In [53]:
names_1.isdisjoint(names_2)

False

In [54]:
names_2 = {'Steve', 'Bruce', 'Carol', 'Sue'}
names_2

{'Bruce', 'Carol', 'Steve', 'Sue'}

In [55]:
names_1.isdisjoint(names_2)

True

In [56]:
# There are a couple of other set methods:
# issubset
# issuperset

In [57]:
# 'pop' method.

names = {'Steve', 'Wanda', 'Peter', 'Tony', 'Natasha'}
names

{'Natasha', 'Peter', 'Steve', 'Tony', 'Wanda'}

In [58]:
names.pop()

'Tony'

In [59]:
names

{'Natasha', 'Peter', 'Steve', 'Wanda'}

In [60]:
# 'remove' method

names = {'Steve', 'Wanda', 'Peter', 'Tony', 'Natasha'}
names

{'Natasha', 'Peter', 'Steve', 'Tony', 'Wanda'}

In [61]:
names.remove('Tony')

In [62]:
names

{'Natasha', 'Peter', 'Steve', 'Wanda'}

In [63]:
# Now the problem with the above is that if we try and remove something from the set that
# isn’t in there, then we get a key error. 

# Another way to remove something from the set that allows for a value not to be in the set 
# is the 'discard' method which is demonstrated below:

names.remove('Tony')

KeyError: 'Tony'

In [64]:
names.discard('Peter')

In [65]:
names

{'Natasha', 'Steve', 'Wanda'}

In [66]:
names.discard('Sue')

In [67]:
# We can also clear the set by using the 'clear' method which gives us an empty set.

names.clear()
names

set()

In [68]:
# add 'method'

names.add('Peter')
names

{'Peter'}

In [69]:
# add() is able to take one argument only.

names.add('Bruce','Sue')

TypeError: add() takes exactly one argument (2 given)

In [70]:
names.add(('Bruce','Sue'))
names

{('Bruce', 'Sue'), 'Peter'}

In [71]:
# Now the add method adds values one at a time. 
# We can utilise some methods which are similar in nature to those that looked at 
# comparison on the sets and allow us to modify a set, the first of these we will
# show is the 'update' method.

names_1 = {'Tony','Peter','Natasha','Wanda'}
names_2 = {'Steve','Peter','Carol','Wanda'}
names_1 | names_2

{'Carol', 'Natasha', 'Peter', 'Steve', 'Tony', 'Wanda'}

In [72]:
names_1

{'Natasha', 'Peter', 'Tony', 'Wanda'}

In [73]:
names_2

{'Carol', 'Peter', 'Steve', 'Wanda'}

In [74]:
names_1.update(names_2)
names_1

{'Carol', 'Natasha', 'Peter', 'Steve', 'Tony', 'Wanda'}

In [75]:
# You can see the return value using the | operator applied to two sets is the same as the 'update' value. 
# The big difference here is that when you use the | operator you don’t change either of the sets.
# However using the 'update' method changes the set that you have used the method for so in this case the 
# names set is now the result of names_1 | names_2.

In [76]:
names_1 = {'Tony','Peter','Natasha','Wanda'}
names_2 = {'Steve', 'Peter', 'Carol', 'Wanda'}
names_1 & names_2

{'Peter', 'Wanda'}

In [77]:
names_1

{'Natasha', 'Peter', 'Tony', 'Wanda'}

In [78]:
names_2

{'Carol', 'Peter', 'Steve', 'Wanda'}

In [79]:
names_1.intersection_update(names_2)
names_1

# Like with the 'update' method, the 'intersection_update' applies the & operation 
# but assigns the result back to the set that its applied to. 

{'Peter', 'Wanda'}

In [80]:
# The same is true for the symmetric_difference:update which gives the same result as ^ 
# and difference:update which gives the difference between two sets.

# The last concept that we look at in this chapter is the concept of the frozen set. 
# The frozen set is what the tuple is to a list in that it cannot be altered.

frozen_names = frozenset({'Tony','Peter','Natasha','Wanda'})
frozen_names

frozenset({'Natasha', 'Peter', 'Tony', 'Wanda'})

In [81]:
frozen_name = frozenset(('Tony','Peter','Natasha','Wanda'))
frozen_name

frozenset({'Natasha', 'Peter', 'Tony', 'Wanda'})

In [82]:
frozen_name = frozenset(['Tony','Peter','Natasha','Wanda'])
frozen_name

frozenset({'Natasha', 'Peter', 'Tony', 'Wanda'})

In [83]:
frozen_name = frozenset({'Tony'})
frozen_name

frozenset({'Tony'})

In [84]:
frozen_name = frozenset(('Tony'))
frozen_name

frozenset({'T', 'n', 'o', 'y'})

In [85]:
type(frozen_name)

frozenset

In [86]:
dir(frozen_name)

['__and__',
 '__class__',
 '__contains__',
 '__delattr__',
 '__dir__',
 '__doc__',
 '__eq__',
 '__format__',
 '__ge__',
 '__getattribute__',
 '__gt__',
 '__hash__',
 '__init__',
 '__init_subclass__',
 '__iter__',
 '__le__',
 '__len__',
 '__lt__',
 '__ne__',
 '__new__',
 '__or__',
 '__rand__',
 '__reduce__',
 '__reduce_ex__',
 '__repr__',
 '__ror__',
 '__rsub__',
 '__rxor__',
 '__setattr__',
 '__sizeof__',
 '__str__',
 '__sub__',
 '__subclasshook__',
 '__xor__',
 'copy',
 'difference',
 'intersection',
 'isdisjoint',
 'issubset',
 'issuperset',
 'symmetric_difference',
 'union']

In [87]:
# We use frozenset around a set, list, tuple, or string to give us our set but looking at the
# methods available you can see that the ones which change the set are not available to a
# frozen set, however the methods that allow us to compare sets are still available. 