# Sets


## Sets

* unordered
* uniques only - great for getting unique items out of some collection
* curly braces {3, 6, 7}

![Set](https://upload.wikimedia.org/wikipedia/commons/thumb/6/6d/Venn_A_intersect_B.svg/440px-Venn_A_intersect_B.svg.png)

https://en.wikipedia.org/wiki/Set_theory

In [None]:
#

In [3]:
s = {3,3,6,1,3,6,7} # so curly braces are also used for a set not only a dictionary
print(s)

{1, 3, 6, 7}


In [4]:
nset = set((3,3,6,1,3,6,7)) # alternative is to use set which takes an iterable
nset

{1, 3, 6, 7}

In [5]:
num_set = set([1,2,6,2,7,2,1]) # could pass a list
num_set

{1, 2, 6, 7}

In [6]:
a = set("ķiļķēni un klimpas") # takes a sequence so string qualifies
a

{' ', 'a', 'i', 'k', 'l', 'm', 'n', 'p', 's', 'u', 'ē', 'ķ', 'ļ'}

In [7]:
b = {"abracadbra","abba", "dubba", "abba",56,7,2,12,2,2,1,1}
b

{1, 12, 2, 56, 7, 'abba', 'abracadbra', 'dubba'}

In [8]:
bset = set(["abracadbra","abba", "dubba", "abba"])
bset

{'abba', 'abracadbra', 'dubba'}

In [11]:
aset = set("abracadbra")
aset

{'a', 'b', 'c', 'd', 'r'}

In [9]:
set(["abracadbra"])  # I gave list of one string which is in fact unique

{'abracadbra'}

In [12]:
for c in aset: # notice no guarantee on order
    print(c)

b
c
d
a
r


In [13]:
# this lookup (membership testing) is very quick even for large sets
# In computer science terms this is O(1) lookup, so constant time even with millions of elements
# much faster than in a list
'a' in aset, 'b' in aset, 'f' in aset 

(True, True, False)

In [14]:
mylist = sorted(aset) # sorted gives you a list
mylist

['a', 'b', 'c', 'd', 'r']

In [15]:
# list lookup is linear so much slower for large data list > 10_000 and so on
'a' in mylist, 'b' in mylist, 'f' in mylist 


(True, True, False)

In [17]:
type(s), type(aset)

(set, set)

In [None]:
a

{' ', 'a', 'i', 'k', 'l', 'm', 'n', 'p', 's', 'u', 'ē', 'ķ', 'ļ'}

In [18]:
myletters = list(a)
myletters

['p', 'n', 'a', 'i', 'ķ', 'ē', 'u', 'k', 'l', ' ', 'm', 's', 'ļ']

In [22]:
"|".join(sorted(a)) # you can join with any character even blank space
# notice that sorting is using Unicode chr values so Latvian letters are after English
# TODO sort it locale specific way

' |a|i|k|l|m|n|p|s|u|ē|ķ|ļ'

In [None]:
myletters[:3]

['a', 'ķ', 'u']

In [None]:
al = list(a)
al

['a', 'ķ', 'u', 'l', 'm', 'n', 'p', 's', ' ', 'ē', 'ļ', 'i', 'k']

In [None]:
sorted(al)

[' ', 'a', 'i', 'k', 'l', 'm', 'n', 'p', 's', 'u', 'ē', 'ķ', 'ļ']

In [24]:
s = {1,2,65,2,6,3}
s

{1, 2, 3, 6, 65}

In [23]:
nset = set(range(10))
nset

{0, 1, 2, 3, 4, 5, 6, 7, 8, 9}

In [25]:
s.issubset(nset)  # false because s has 65 which is outside of nset values

False

In [26]:
n_3_7 = set(range(3,8))
n_3_7

{3, 4, 5, 6, 7}

In [27]:
n_3_7.issubset(nset)

True

In [28]:
# Alternative syntax
n_3_7 < nset # strong subset meaning n_3_7 can't be equal to nset

True

In [30]:
n_3_7 <= nset  # this is just like issubset

True

In [31]:
nset < nset

False

In [33]:
nset <= nset

True

In [35]:
s

{1, 2, 3, 6, 65}

In [34]:
nset.issuperset(s)

False

In [None]:
nset, s

({0, 1, 2, 3, 4, 5, 6, 7, 8, 9}, {1, 2, 3, 6, 65})

In [36]:
s.remove(65) # we can remove elements
s

{1, 2, 3, 6}

In [37]:
nset, s

({0, 1, 2, 3, 4, 5, 6, 7, 8, 9}, {1, 2, 3, 6})

In [38]:
nset.issuperset(s)

True

In [39]:
nset > s, nset >= s, nset < s

(True, True, False)

In [40]:
s.issuperset(range(6))

False

In [41]:
nset.issuperset(range(6))

True

In [42]:
n_5_9 = set(range(5,10))
n_5_9

{5, 6, 7, 8, 9}

In [43]:
n_3_7.union(n_5_9)

{3, 4, 5, 6, 7, 8, 9}

In [None]:
# shorter union syntax is 
n_3_7 | n_5_9 # means we make a new set out of ALL elements of the two sets

{3, 4, 5, 6, 7, 8, 9}

In [44]:
n_3_7.intersection(n_5_9)

{5, 6, 7}

In [45]:
n_3_7 & n_5_9 # same as intersection above so only elements in BOTH sets

{5, 6, 7}

In [46]:
n_5_7 = n_3_7 & n_5_9  # we can store the values
n_5_7

{5, 6, 7}

In [47]:
n_5_7 = n_3_7 & n_5_9 & nset # nset is 0 to 9
n_5_7

{5, 6, 7}

In [49]:
set(range(7))

{0, 1, 2, 3, 4, 5, 6}

In [50]:
n_5_6 = n_3_7 & n_5_9 & set(range(7)) # range goes to 6
n_5_6 # only 5 and 6 is in ALL 3 sets

{5, 6}

In [51]:
n_3_7.difference(n_5_9) # only elements unique to left side

{3, 4}

In [52]:
n_3_7 - n_5_9, n_5_9 - n_3_7 # so - is syntactic sugar to the difference

({3, 4}, {8, 9})

In [53]:
n_3_7.symmetric_difference(n_5_9) # only elements unique either side # analogy to XOR in logic

{3, 4, 8, 9}

In [54]:
n_3_7 ^ n_5_9 # ^ is short for .symmetric_difference

{3, 4, 8, 9}

{1, 2, 3, 6}

In [56]:
# we can update  a single with many differnt data types as long as they are in iterable format
s.update({3,3,6,2,7,9},range(4,15), [3,6,7,"Valdis", "Badac","Valdis"],"Abba")
s

{1,
 10,
 11,
 12,
 13,
 14,
 2,
 3,
 4,
 5,
 6,
 7,
 8,
 9,
 'A',
 'Badac',
 'Valdis',
 'a',
 'b'}

In [None]:
dir(s)

['__and__',
 '__class__',
 '__contains__',
 '__delattr__',
 '__dir__',
 '__doc__',
 '__eq__',
 '__format__',
 '__ge__',
 '__getattribute__',
 '__gt__',
 '__hash__',
 '__iand__',
 '__init__',
 '__init_subclass__',
 '__ior__',
 '__isub__',
 '__iter__',
 '__ixor__',
 '__le__',
 '__len__',
 '__lt__',
 '__ne__',
 '__new__',
 '__or__',
 '__rand__',
 '__reduce__',
 '__reduce_ex__',
 '__repr__',
 '__ror__',
 '__rsub__',
 '__rxor__',
 '__setattr__',
 '__sizeof__',
 '__str__',
 '__sub__',
 '__subclasshook__',
 '__xor__',
 'add',
 'clear',
 'copy',
 'difference',
 'difference_update',
 'discard',
 'intersection',
 'intersection_update',
 'isdisjoint',
 'issubset',
 'issuperset',
 'pop',
 'remove',
 'symmetric_difference',
 'symmetric_difference_update',
 'union',
 'update']

In [57]:
# we can check if our set has anything in common with anohther data structures
n_3_7.isdisjoint(n_5_9) # False because sets do intersect with 5,6,7

False

In [59]:
n_8_9 = set((8,9))
n_8_9

{8, 9}

In [60]:
n_3_7.isdisjoint(n_8_9)

True

In [61]:
sentence = "a quick brown fox jumped over a sleeping dog which is not a normal dog"
words = sentence.split()
words

['a',
 'quick',
 'brown',
 'fox',
 'jumped',
 'over',
 'a',
 'sleeping',
 'dog',
 'which',
 'is',
 'not',
 'a',
 'normal',
 'dog']

In [62]:
unique_words_set = set(words)
unique_words_set

{'a',
 'brown',
 'dog',
 'fox',
 'is',
 'jumped',
 'normal',
 'not',
 'over',
 'quick',
 'sleeping',
 'which'}

In [63]:
unique_words_list = list(unique_words_set)
unique_words_list

['quick',
 'jumped',
 'not',
 'sleeping',
 'brown',
 'over',
 'dog',
 'which',
 'fox',
 'a',
 'is',
 'normal']

In [None]:
# so Sets use them to obtain  unique elements 
# then can convert back to other data structures