# EC2202 Sets and Maps

## Student Information

* Student ID: 20225056
* Name: 김창완

**Disclaimer.**
This code examples are based on 
1. [KAIST CS206 (Professor Otfried Cheong)](https://otfried.org/courses/cs206/)
2. [GeeksForGeeks](https://practice.geeksforgeeks.org/)
3. Coding Interviews

In [None]:
import doctest
import time
import math

## Sets

In [None]:
%%HTML
<iframe width="560" height="315" src="https://www.youtube.com/embed/eHArNzW0gdQ" title="YouTube video player" frameborder="0" allowfullscreen></iframe>
<iframe width="560" height="315" src="https://www.youtube.com/embed/Fkzw3we-OT4" title="YouTube video player" frameborder="0" allowfullscreen></iframe>

### Implementing the Set ADT Using a Python list

In [None]:
class set():
  def __init__(self, items=None):
    self._data = []  # empty list = empty set
    if items:
      for item in items:
        self.add(item)

  def __contains__(self, item):
    # we are going to make this ~O(1) in the next lecture
    return item in self._data

  def __len__(self):
    return len(self._data)

  def add(self, item):
    if item not in self._data:
      self._data.append(item)

  def remove(self, item):
    if item in self._data:
      self._data.remove(item) # use the default `remove` for lists
    else:
      raise KeyError(item)
  
  def discard(self, el):
    if item in self._data:
      self._data.remove(item) # use remove for list

  ################################
  ######## 'ppp' exercise ########
  ################################
  def __eq__(self, t):
    state1, state2 = False, False
    for i in self._data:
      if i in t:
        state1 = True
    for j in t:
      if j in self._data:
        state2 = True
    return state1 and state2

  """
  def __eq__(self, t):
    return self.is_subset(t) and self.is_superset(t)  # 이러면 단점, 복잡한 계산을 두 번 하는 경우가 생길지도!
  
  <개선코드>
  def __eq__(self, t):
    if len(self) != len(t):
      return False
    return self.is_subset(t)  # 이러면 위 코드보다는 효율적으로 일처리 가능.

  """    

  ################################
  ######## 'ppp' exercise ########
  ################################
  def is_subset(self, t):
    # checks if s <= t
    for i in self._data:
      if i not in t:
        return False
    return True

  ################################
  ######## 'ppp' exercise ########
  ################################
  def is_superset(self, t):
    # checks if s >= t
    for i in t:
      if i not in self._data:
        return False
    return True

  # subset이랑 superset 문제 --> in과 not in, True와 False 헷갈릴 수 있다. 잘 구분하기


  """
  <개선코드>
  def is_superset(self, t):
    return t.is_subset(self)

  [comment] 사실 개선 전 코드처럼 만들면 subset과 superset이 중복구현이 된 상태. 코드가 깔끔하지 못하다.
            동일한 알고리즘이고 방향만 다르다면, 하나만 만들어서 대상 자리만 바꿔주면 됨.

  """

  ################################
  ######## 'ppp' exercise ########
  ################################
  def union(self, t):
    # should be non-destructive
    new_set = set()
    new_set._data.extend(self._data)  # copy elements to new set
    for item in t:
      new_set.add(item)
    return new_set
    
  def __iter__(self):
    return _SetIterator(self._data)

  def __repr__(self):
    s = "ListSet("
    sep = ""
    for item in self._data:
      s += sep + repr(item)
      sep = ","
    return s + ")"

class _SetIterator():
  def __init__(self, l):
    self._l = l
    self._current = 0
  
  def __iter__(self):
    return self
    
  def __next__(self):
    if self._current < len(self._l):
      entry = self._l[self._current]
      self._current += 1
      return entry
    else:      
      raise StopIteration

In [None]:
%%HTML
<iframe width="560" height="315" src="https://www.youtube.com/embed/pCShafI6jMc" title="YouTube video player" frameborder="0" allowfullscreen></iframe>
<iframe width="560" height="315" src="https://www.youtube.com/embed/XjoqMXMgNtU" title="YouTube video player" frameborder="0" allowfullscreen></iframe>

### Applications

set은 구현이 간단하기 때문에 다른 자료형보다도 활용(Application)이 중요하다!

#### A Simple Spell Checker

In [None]:
from google.colab import drive
drive.mount('/content/drive')

def read_words():
  s = open("/content/drive/My Drive/수업자료/[EC2202] Data Structures/01_lecture_slides/words-5000.txt", "r")
  words = set()
  for w in s.readlines():
    words.add(w.strip())
  s.close()
  return words  # dictionary of valid words

def spell(given_word):
  words = read_words()
  if given_word in words:
    return True
  return False


def spell_interactive():
  words = read_words()
  while True:
    w = input("Tell me a word> ").strip().lower()
    if w == "":
      return
    if w in words:
      print("'%s' is a word" % w)
    else:
      print("Error: '%s' is not a word" % w)

Mounted at /content/drive


In [None]:
spell()

#### **'ppp' Exercise** Sieve of Erathosthenes
- This is to find the prime numbers smaller than given N.

In [None]:
def sieve(n):
  '''finds the set of prime numbers smaller than n
  '''
  source = set(range(2, n+1))
  result = set()
  
  while True:
    m = min(source)
    result.add(m)
    for i in range(1, (n // m) + 1):
      if m * i in source:
        source.remove(m * i)
    if len(source) == 0:
      break
    #print(f"{m}th iteration: {result}")
  return result
"""
<naive implementation(sol)>

def sieve(n):
  prime_numbers = set(range(2, n))
  for i in range(2, n):
    k = 2
    while i * k < n:
      prime_numbers.discard(i * k)
  return prime_numbers

<efficient imple.>  --> 조금 생각을 해봐야 이해가 될 것.

# using "n = sqrt(n) * sqrt(n)"

def sieve(n):
  nums = set(range(2, n))
  for num in range(2, int((n+1) ** 0.5)):
    if num in nums:
      for k in range(num * 2, n+1, num):
        if k in nums:
          nums.remove(k)  
  return nums._data

"""



################################
########### caution! ###########
################################
num_list = list(range(10))

for item in num_list:  # error in other languages
  num_list.remove(item)

print(num_list) # 비어있는 리스트 출력이 기대되지만, [1, 3, 5, 7, 9]가 나옴.
# 어떤 대상 안에서 for문을 돌릴 때 그 대상에 변화를 주는 것은 피해야 함. 예측하기 어려운 결과가 나옴.
################################

num = 100

start_time = time.perf_counter()
primes = sieve(num)
stop_time = time.perf_counter()

for i in primes:
  print(i, end=" ")
print()

print("Runtime %g secs" % (stop_time - start_time))

[1, 3, 5, 7, 9]
2th iteration: {2}
3th iteration: {2, 3}
5th iteration: {2, 3, 5}
7th iteration: {2, 3, 5, 7}
11th iteration: {2, 3, 5, 7, 11}
13th iteration: {2, 3, 5, 7, 11, 13}
17th iteration: {2, 3, 5, 7, 11, 13, 17}
19th iteration: {2, 3, 5, 7, 11, 13, 17, 19}
23th iteration: {2, 3, 5, 7, 11, 13, 17, 19, 23}
29th iteration: {2, 3, 5, 7, 11, 13, 17, 19, 23, 29}
31th iteration: {2, 3, 5, 7, 11, 13, 17, 19, 23, 29, 31}
37th iteration: {2, 3, 5, 37, 7, 11, 13, 17, 19, 23, 29, 31}
41th iteration: {2, 3, 5, 37, 7, 41, 11, 13, 17, 19, 23, 29, 31}
43th iteration: {2, 3, 5, 37, 7, 41, 11, 43, 13, 17, 19, 23, 29, 31}
47th iteration: {2, 3, 5, 37, 7, 41, 11, 43, 13, 47, 17, 19, 23, 29, 31}
53th iteration: {2, 3, 5, 37, 7, 41, 11, 43, 13, 47, 17, 19, 53, 23, 29, 31}
59th iteration: {2, 3, 5, 37, 7, 41, 11, 43, 13, 47, 17, 19, 53, 23, 59, 29, 31}
61th iteration: {2, 3, 5, 37, 7, 41, 11, 43, 13, 47, 17, 19, 61, 53, 23, 59, 29, 31}
67th iteration: {2, 3, 5, 7, 11, 13, 17, 19, 23, 29, 31, 37, 41,

In [None]:
%%HTML
<iframe width="560" height="315" src="https://www.youtube.com/embed/gppsSfE80NY" title="YouTube video player" frameborder="0" allowfullscreen></iframe>
<iframe width="560" height="315" src="https://www.youtube.com/embed/zHtg3cJVPb4" title="YouTube video player" frameborder="0" allowfullscreen></iframe>

## Maps

In [None]:
%%HTML
<iframe width="560" height="315" src="https://www.youtube.com/embed/I5UMIL_bbXY" title="YouTube video player" frameborder="0" allowfullscreen></iframe>
<iframe width="560" height="315" src="https://www.youtube.com/embed/Gi5JXlcWzsg" title="YouTube video player" frameborder="0" allowfullscreen></iframe>

### Implementing the Map ADT

In [None]:
class dict():
  def __init__(self):
    self._data = []

  def __len__(self):
    return len(self._data)
  
  def __setitem__(self, k, value):
    i = self._findkey(k)
    if i >= 0:
      self._data[i] = (k, value)
    else:
      self._data.append((k, value))
    # di = dict()
    # di['5'] = [1, 2, 3, 4, 5]

  def _findkey(self, k):
    for i in range(len(self._data)):
      if k == self._data[i][0]:
        return i
    return -1

  def __contains__(self, k):
    return self._findkey(k) >= 0

  ################################
  ######## 'ppp' exercise ########
  ################################
  def __getitem__(self, k):
    # print(di['5']) => [1, 2, 3, 4, 5]
    i = self._findkey(k)
    if i >= 0:
      return self._data[i][1]
    else:
      raise KeyError(k)

  ################################
  ######## 'ppp' exercise ########
  ################################
  def get(self, k, v0=None):
    i = self._findkey(k)
    if i >= 0:
      return self._data[i][1]
    else:
      return v0

  def keys(self):
    return _MapIterator(self._data)

  def __repr__(self):
    s = "ListMap("
    sep = ""
    for k, v in self._data:
      s += sep + repr(k) + ": " + repr(v)
      sep = ","
    return s + ")"

  def __iter__(self):
    return _MapIterator(self._data)
    
class _MapIterator():
  def __init__(self, d):
    self._d = d
    self._current = 0
  
  def __iter__(self):
    return self
    
  def __next__(self):
    if self._current < len(self._d):
      key = self._d[self._current][0]
      self._current += 1
      return key
    else:      
      raise StopIteration

### Applications

#### Converting a DNA sequence to a protein sequence

In [None]:
# Rosalind PROT
# Convert a DNA sequence to the corresponding sequence of proteins
#
# Example input: AUGGCCAUGGCGCCCAGAACUGAGAUCAAUAGUACCCGUAUUAACGGGUGA
# Output: MAMAPRTEINSTRING

codon = { "UUU" : "F",    "CUU" : "L", "AUU" : "I", "GUU" : "V",
          "UUC" : "F",    "CUC" : "L", "AUC" : "I", "GUC" : "V",
          "UUA" : "L",    "CUA" : "L", "AUA" : "I", "GUA" : "V",
          "UUG" : "L",    "CUG" : "L", "AUG" : "M", "GUG" : "V",
          "UCU" : "S",    "CCU" : "P", "ACU" : "T", "GCU" : "A",
          "UCC" : "S",    "CCC" : "P", "ACC" : "T", "GCC" : "A",
          "UCA" : "S",    "CCA" : "P", "ACA" : "T", "GCA" : "A",
          "UCG" : "S",    "CCG" : "P", "ACG" : "T", "GCG" : "A",
          "UAU" : "Y",    "CAU" : "H", "AAU" : "N", "GAU" : "D",
          "UAC" : "Y",    "CAC" : "H", "AAC" : "N", "GAC" : "D",
          "UAA" : "Stop", "CAA" : "Q", "AAA" : "K", "GAA" : "E",
          "UAG" : "Stop", "CAG" : "Q", "AAG" : "K", "GAG" : "E",
          "UGU" : "C",    "CGU" : "R", "AGU" : "S", "GGU" : "G",
          "UGC" : "C",    "CGC" : "R", "AGC" : "S", "GGC" : "G",
          "UGA" : "Stop", "CGA" : "R", "AGA" : "R", "GGA" : "G",
          "UGG" : "W",    "CGG" : "R", "AGG" : "R", "GGG" : "G" }

rna = input("mRNA sequence> ")

proteins = []

i = 0
transcribing = False
while i < len(rna) - 3:
  cod = rna[i:i+3]
  p = codon[cod]
  if cod == "AUG":
    transcribing = True
  if p == "Stop":
    transcribing = False
  if transcribing:
    proteins.append(p)
  i += 3

print("".join(proteins))

#### **'ppp' Exercise** [Amazon] Longest subarray having sum k

In [None]:
def long_subarr_k(arr, k):
  '''Given an array arr containing len(arr) integers and an integer k, 
  long_subarr_k finds
    1) the length of the longest subarray
       with the sum of the elements equal to the given value k, and
    2) the list of elements summing to k.
  >>> long_subarr_k([10, 5, 2, 7, 1, 9], 15)
  (4, [5, 2, 7, 1])
  >>> long_subarr_k([-1, 2, 3], 6)
  0
  >>> long_subarr_k([-5, 8, -14, 2, 4, 12], -5)
  (5, [-5, 8, -14, 2, 4])
  '''
  d = dict()
  length = len(arr)
  for i in range(length):
    for j in range(length):
      temp = arr[i:j]
      if sum(temp) == k:
        d[len(temp)] = temp
  m = min(d.keys())
  return d[m]

In [None]:
doctest.run_docstring_examples(long_subarr_k, globals(), False, __name__)

**********************************************************************
File "__main__", line 7, in __main__
Failed example:
    long_subarr_k([10, 5, 2, 7, 1, 9], 15)
Expected:
    (4, [5, 2, 7, 1])
Got:
    [10, 5]
**********************************************************************
File "__main__", line 9, in __main__
Failed example:
    long_subarr_k([-1, 2, 3], 6)
Exception raised:
    Traceback (most recent call last):
      File "/usr/lib/python3.10/doctest.py", line 1350, in __run
        exec(compile(example.source, filename, "single",
      File "<doctest __main__[1]>", line 1, in <module>
        long_subarr_k([-1, 2, 3], 6)
      File "<ipython-input-25-0a026291f7ba>", line 21, in long_subarr_k
        m = min(d.keys())
    ValueError: min() arg is an empty sequence
**********************************************************************
File "__main__", line 11, in __main__
Failed example:
    long_subarr_k([-5, 8, -14, 2, 4, 12], -5)
Expected:
    (5, [-5, 8, -14, 2, 4])
G

In [None]:
%%HTML
<iframe width="560" height="315" src="https://www.youtube.com/embed/X_xCSkCoR3w" title="YouTube video player" frameborder="0" allowfullscreen></iframe>
<iframe width="560" height="315" src="https://www.youtube.com/embed/0v3ODG1grAw" title="YouTube video player" frameborder="0" allowfullscreen></iframe>