In [None]:
s = {2, 3, 5}

print(3 in s)
print( 4 in s)

for x in range(7):
  if x not in s:
    print(x)

True
False
0
1
4
6


In [None]:
s[0] 

# Creating Sets

In [None]:
s = set()

In [None]:
s

set()

In [None]:
s = set(['cat', 'dog', 'giraffe'])

In [None]:
s

{'cat', 'dog', 'giraffe'}

In [None]:
s = set('Yankees')

In [None]:
s 

{'Y', 'a', 'e', 'k', 'n', 's'}

In [None]:
s = {}
print(type(s) == set)

False


In [None]:
print(type(s))

<class 'dict'>


# Using Sets

In [None]:
s = {"hello", "Shashank", "John"}

hash("hello")

7637479777234301884

In [None]:
len(s)

3

In [None]:
print(2 in s)
print(4 in s)
print(4 not in s)
print(2 not in s)

True
False
True
False


In [None]:
s.add(7)

In [None]:
s

{1, 2, 3, 7}

In [None]:
s.remove(3)

In [None]:
s

{1, 2, 7}

# Properties of Sets

In [None]:
s= {2, 4, 8}

In [None]:
s

{2, 4, 8}

In [None]:
for element in s:
  print(element)

8
2
4


In [None]:
s = {2, 2, 2}

In [None]:
s

{2}

# HASHING

In [None]:
How sets work

How Sets Work: Hashing
Sets achieve their blazing speed using an algorithmic approach called **hashing**.

A **hash function** takes any value as input and returns an integer. The function returns the same integer each time it is called on a given value, and should generally return different integers for different values, though that does not always need to be the case. We actually don't need to build the hash function ourselves, as Python has one already, a built-in function called **hash**.

Python stores items in a set by creating a **hash table**, which is a list of N lists (called 'buckets'). Python chooses the bucket for an element based on its hash value, using hash(element) % n. Values in each bucket are not sorted, but the size of each bucket is limited to some constant K.

We get O(1) (constant-time) adding like so:
Compute the bucket index hash(element) % n -- takes O(1).
Retrieve the bucket hashTable[bucketIndex] -- takes O(1).
Append the element to the bucket -- takes O(1).

We get O(1) (constant-time) membership testing ('in') like so:
Compute the bucket index hash(element) % n -- takes O(1).
Retrieve the bucket hashTable[bucketIndex] -- takes O(1).
Check each value in the bucket if it equals the element -- takes O(1) because there are at most K values in the bucket, and K is a constant.

Q: How do we guarantee that each bucket is no larger than size K? A: Good question! If we need to add a (K+1)th value to a bucket, instead we resize our hashtable, making it say twice as big, and then we rehash every value, basically adding it to the new hashtable. This takes O(N) time, but we do it very rarely, so the **amortized worst case** remains O(1).

A practical example of how sets are faster than lists is shown below:

In [None]:
import time

n = 100000

# get time using a list first

a = list(range(1, n+1, 2))

print("Using a list... ", end='')
start = time.time()
count = 0

for x in range(n+1):
  if x in a:
    count += 1

end = time.time()

elapsed1 = end - start

print(f"count={count} and time={elapsed1:0.5f} seconds")

# now get time using a set

print("\nUsing a set...")

start = time.time()

s=set(a)

count = 0

for x in range(n+1):
  if x in s:
    count += 1

end = time.time()
elapsed2 = end-start

print(f"\ncount={count} and time = {elapsed2:0.5f} seconds")
print(f"\nAt n={n}, sets ran ~{elapsed1/elapsed2:0.1f} times faster than lists!")




Using a list... count=50000 and time=41.82062 seconds

Using a set...

count=50000 and time = 0.01401 seconds

At n=100000, sets ran ~2984.0 times faster than lists!


# isPermutation(L)

In [None]:
def isPermutation(L):
    # return True if L is a permutation of [0,...,n-1]
    # and False otherwise
    return (set(L) == set(range(len(L))))

def testIsPermutation():
    print("Testing isPermutation()...", end="")
    assert(isPermutation([0,2,1,4,3]) == True)
    assert(isPermutation([1,3,0,4,2]) == True)
    assert(isPermutation([1,3,5,4,2]) == False)
    assert(isPermutation([1,4,0,4,2]) == False)
    print("Passed!")

testIsPermutation()

Testing isPermutation()...Passed!


# Dictionaries

In [None]:
state_map = {'pittsburgh':'PA', 
             'chicago':'IL',
             'miami':'FL'}

city = input("Enter a city name---> ").lower()

if city in state_map:
  print(f"{city.title()} is in {state_map[city]}")
else:
  print("Sorry, never heard of it")
  

Enter a city name---> miami
Miami is in FL


In [None]:
counts = dict()

while True:
  n = int(input("Enter an integer (zero to end) --> "))
  if n==0:break
  if n in counts:
    counts[n] += 1
  else:
    counts[n] = 1
  
  print(f"I have seen {n} a total of {counts[n]} times")

print(f"\nDone, counts: {counts}")

Enter an integer (zero to end) --> 42
I have seen 42 a total of 1 times
Enter an integer (zero to end) --> 22
I have seen 22 a total of 1 times
Enter an integer (zero to end) --> 15
I have seen 15 a total of 1 times
Enter an integer (zero to end) --> 22
I have seen 22 a total of 2 times
Enter an integer (zero to end) --> 42
I have seen 42 a total of 2 times
Enter an integer (zero to end) --> 7
I have seen 7 a total of 1 times
Enter an integer (zero to end) --> 0

Done, counts: {42: 2, 22: 2, 15: 1, 7: 1}


# Creating and Using Dictionaries

In [None]:
# create a dictionary from key:value pairs

In [None]:
pairs = [('dog', 5), ('cow', 23), ('cat', 2)]

In [None]:
d = dict(pairs)

In [None]:
d

{'cat': 2, 'cow': 23, 'dog': 5}

In [None]:
d = {'a': 1, 'b': 2, 'c': 3}

print(len(d))


3


In [None]:
print('a' in d)

True


In [None]:
print(2 in d) 

False


In [None]:
print(d['a'])

1


In [None]:
print(d.get('z', 42))

42


In [None]:
d['e'] = 'wow'

In [None]:
d

{'a': 1, 'b': 2, 'c': 3, 'e': 'wow'}

In [None]:
del d['c']

In [None]:
for key in d:
  print(key, d[key])

a 1
b 2
e wow


# Properties of Dictionaries

In [None]:
ages = dict()
a  = 'fred'
num = 38

ages[a] = num

In [None]:
ages[key]

38

In [None]:
d = dict()
d[2] = 100
d[4] = 200
d[8] = 300
d[1] = 515
d['hello'] = ['this' , 'that']
print(d)  

{2: 100, 4: 200, 8: 300, 1: 515, 'hello': ['this', 'that']}


In [None]:
d = dict()
d[2] = 100
d[2] = 200
d[2] = 400
print(d) #  keys are to be unique, will be over-ridden

{2: 400}


In [None]:
d = dict()
a = [1,2]
d['Fred'] = a
d

{'Fred': [1, 2]}

In [None]:
a += [3]
d['Fred']

[1, 2, 3]

# isAnagram(s1, s2)


In [None]:
from collections import defaultdict

def letterCounts(s):
  counts = {}

  for ch in s.upper():
    if ch.isalpha():
      if ch not in counts:
        counts[ch] = 0
      counts[ch] += 1

  return counts

def isAnagram(s1, s2):
  return letterCounts(s1) == letterCounts(s2)

def testIsAnagram():
    print("Testing isAnagram()...", end="")
    assert(isAnagram("", "") == True)
    assert(isAnagram("abCdabCd", "abcdabcd") == True)
    assert(isAnagram("abcdaBcD", "AAbbcddc") == True)
    assert(isAnagram("abcdaabcd", "aabbcddcb") == False)
    print("Passed!")

testIsAnagram()

Testing isAnagram()...Passed!


# mostFrequent(L)

In [None]:
def mostFrequent(L):

  maxValue = None
  maxCount = 0
  counts = dict()

  for element in L:
    count = 1 + counts.get(element, 0)
    counts[element] = count

    if count > maxCount:
      maxCount = count
      maxValue = element

  return maxValue


def testMostFrequent():
    print("Testing mostFrequent()... ", end="")
    assert(mostFrequent([2,5,3,4,6,4,2,4,5]) == 4)
    assert(mostFrequent([2,3,4,3,5,3,6,3,7]) == 3)
    assert(mostFrequent([42]) == 42)
    assert(mostFrequent([]) == None)
    print("Passed!")

testMostFrequent()

Testing mostFrequent()... Passed!


# mostCommonName(L)

In [None]:
def mostCommonName(L):
  counts = dict()
  mostFrequentName = None
  numOfMostFrequent = None

  for name in L:
    counts[name] = 1 + counts.get(name, 0) #(name, 0) returns 0 if name not in counts
    if mostFrequentName == None or counts[name] > numOfMostFrequent:
      mostFrequentName = name
      numOfMostFrequent = counts[name]
  
  return mostFrequentName

In [None]:
def testMostCommonName():
    print("Testing mostCommonName()...", end="")
    assert(mostCommonName(["Jane", "Aaron", "Cindy", "Aaron"])
           == "Aaron")
testMostCommonName()

Testing mostCommonName()...

In [None]:
mostCommonName(["Jane", "Aaron", "Cindy", "Aaron"])

'Aaron'

In [None]:
mostCommonName(["Jane", "Aaron", "Cindy", "Cindy"])

'Cindy'

In [None]:
x = set([-1, 0, 1])

In [None]:
x

{-1, 0, 1}

In [None]:
sum(x)

0

# findTriplets(L)

In [None]:
def findTriplets(L):
  s = set()

  for element in range(len(L)-2):
    test_set = set([L[element], L[element + 1], L[element + 2]])
    print(test_set)
    if sum(test_set) == 0:
      s.update(test_set)

  return s

In [None]:
findTriplets([-1, 0, -3, 2, 1])

{0, -3, -1}
{0, 2, -3}
{1, 2, -3}


{-3, 1, 2}

In [None]:
5/1.25

4.0

In [None]:
2/1.25

1.6