# heap的ADT

In [1]:
class PriorityQueueBase:
    """Abstract base class for a priority queue."""

    class Item: 
        """Lightweight composite to store priority queue items."""
        __slots__ = '_key' , '_value'

        def __init__ (self, k, v):
            self._key = k
            self._value = v

        def __lt__ (self, other):                                        
            return self._key < other._key

        def is_empty(self):
            return len(self) == 0   

        def __str__(self):
            return str(self._key)
        

class HeapPriorityQueue(PriorityQueueBase):

    def __init__ (self):
        self._data = [ ]         

    def __len__ (self):
        return len(self._data)
    
    def is_empty(self):
        return len(self) == 0  

    def add(self, key, value): 
        self._data.append(self.Item(key, value)) 
        self._upheap(len(self._data) - 1)
        
    def min(self): 
        if self.is_empty():
            raise ValueError( "Priority queue is empty." )
        item = self._data[0]
        return (item._key, item._value)
    
    def remove_min(self):
        if self.is_empty():
            raise ValueError( "Priority queue is empty." )
        self._swap(0, len(self._data) - 1)
        item = self._data.pop( )
        self._downheap(0)
        return (item._key, item._value)

    def _parent(self, j): 
        return (j - 1) // 2
    
    def _left(self, j):
        return 2 * j + 1
    
    def _right(self, j):
        return 2 * j + 2

    def _has_left(self, j):
        return self._left(j) < len(self._data)
    
    def _has_right(self, j):
        return self._right(j) < len(self._data)      
    
    def _swap(self, i, j):
        self._data[i], self._data[j] = self._data[j], self._data[i]
        
    def _upheap(self, j):
        parent = self._parent(j) 
        if j > 0 and self._data[j] < self._data[parent]: 
            self._swap(j, parent) 
            self._upheap(parent) 
    
    def _downheap(self, j):
        if self._has_left(j):
            left = self._left(j)
            small_child = left
            if self._has_right(j):
                right = self._right(j) 
                if self._data[right] < self._data[left]:
                    small_child = right 
            if self._data[small_child] < self._data[j]:
                self._swap(j, small_child) 
                self._downheap(small_child)        

In [2]:
heap = HeapPriorityQueue()
heap.add(4, "D")
heap.add(3, "C")
heap.add(1, "A")
heap.add(5, "E")
heap.add(2, "B")
heap.add(7, "G")
heap.add(6, "F")
heap.add(26, "Z")

for item in heap._data:
    print(item)

print("min is: ")
print(heap.min())
print()

print("remove min: ")
print(heap.remove_min())
print("Now min is: ")
print(heap.min())
print()

print("remove min: ")
print(heap.remove_min())
print("Now min is: ")
print(heap.min())
print()

heap.add(1, "A")
print("Now min is: ")
print(heap.min())
print()

1
2
3
5
4
7
6
26
min is: 
(1, 'A')

remove min: 
(1, 'A')
Now min is: 
(2, 'B')

remove min: 
(2, 'B')
Now min is: 
(3, 'C')

Now min is: 
(1, 'A')



# Python里的heap

In [3]:
from heapq import heappush, heappop
heap = []
data = [1, 3, 5, 7, 9, 2, 4, 6, 8, 0]
for item in data:
    heappush(heap, item)

ordered = []
while heap:
    ordered.append(heappop(heap))

ordered
[0, 1, 2, 3, 4, 5, 6, 7, 8, 9]

[0, 1, 2, 3, 4, 5, 6, 7, 8, 9]

In [4]:
import heapq

data = [1,5,3,2,8,5]
heapq.heapify(data)
data

[1, 2, 3, 5, 8, 5]

In [7]:
# The priority queue can store objects such as tuples:Using a heap to insert items at the correct place in a priority queue:

heap = []
data = [(1, 'J'), (4, 'N'), (3, 'H'), (2, 'O')]
for item in data:
    heappush(heap, item)

while heap:
    item = heappop(heap) 
    print(item[0], ": ", item[1])

1 :  J
2 :  O
3 :  H
4 :  N


In [8]:
# Class Objects

# Override __lt__ in Python 3, __cmp__ only in Python 2

class Skill(object):
    def __init__(self, priority, description):
        self.priority = priority
        self.description = description
        print('New Level:', description)
        return
    def __cmp__(self, other):
        return cmp(self.priority, other.priority)
    def __lt__(self, other):
        return self.priority < other.priority
    def __repr__(self):
        return str(self.priority) + ": " + self.description
    

s1 = Skill(5, 'Proficient')
s2 = Skill(10, 'Expert')
s3 = Skill(1, 'Novice')

l = [s1, s2, s3]

heapq.heapify(l)
print("The 3 largest numbers in list are : ",end="")
print(heapq.nlargest(3, l))

while l:
    item = heappop(l) 
    print(item)

New Level: Proficient
New Level: Expert
New Level: Novice
The 3 largest numbers in list are : [10: Expert, 5: Proficient, 1: Novice]
1: Novice
5: Proficient
10: Expert


In [9]:
import numpy as np
import heapq
x = np.array([[1, 2, 3,4,5,6], [2,3,5,7,8,1], [7,9,6,6,3,2], [8,9,0,1,4,7]], np.int32)
cols = x.shape[1]

for col in range(cols):
    y = x[:,col]
    h = []
    for e in y:
        heapq.heappush(h, e)
        if len(h) > 2:
            heapq.heappop(h)
    print(h)

np.sort(x, axis=0)[-2:]

[7, 8]
[9, 9]
[5, 6]
[6, 7]
[5, 8]
[6, 7]


array([[7, 9, 5, 6, 5, 6],
       [8, 9, 6, 7, 8, 7]])

In [10]:
list(map(lambda x: heapq.nlargest(n = 2, iterable = x), x.T))

[[8, 7], [9, 9], [6, 5], [7, 6], [8, 5], [7, 6]]

# 数组中第k大的元素

In [18]:
# O(k+(n-k)lgk) time, min-heap
def findKthLargest(nums, k):
    heap = []
    for num in nums:
        heapq.heappush(heap, num)
        if len(heap) > k:
            heapq.heappop(heap)
    
    return heapq.heappop(heap)

In [19]:
nums = [5,11,3,6,12,9,8,10,14,1,4,2,7,15]
k = 5
findKthLargest(nums, k)

10

In [20]:
# O(k+(n-k)lgk) time, min-heap        
def findKthLargest(nums, k):
    return heapq.nlargest(k, nums)[k-1]

In [21]:
nums = [5,11,3,6,12,9,8,10,14,1,4,2,7,15]
k = 5
findKthLargest(nums, k)

10

# 出现频率最多的K个单词

In [22]:
import collections
import heapq
import functools

@functools.total_ordering
class Element:
    def __init__(self, count, word):
        self.count = count
        self.word = word
        
    def __lt__(self, other):
        if self.count == other.count:
            return self.word > other.word
        return self.count < other.count
    
    def __eq__(self, other):
        return self.count == other.count and self.word == other.word

def topKFrequent(words, k):
    counts = collections.Counter(words)   

    freqs = []
    heapq.heapify(freqs)
    for word, count in counts.items():
        heapq.heappush(freqs, (Element(count, word), word))
        if len(freqs) > k:
            heapq.heappop(freqs)

    res = []
    for _ in range(k):
        res.append(heapq.heappop(freqs)[1])
    return res[::-1]


words = ["i", "love", "you", "i", "love", "coding","i","like","sports","i","love","travel","coding","is","fun"]
k = 4
topKFrequent(words, k)

['i', 'love', 'coding', 'fun']

In [51]:
def topKFrequent(nums, k):
    from collections import Counter as ct
    return [k for (k,v) in ct(nums).most_common(k)]

#words = ["i", "love", "you", "i", "love", "coding","i","like","sports","i","love","travel","coding","is","fun"]
words = ["i", "love", "leetcode", "i", "love", "coding"]
k = 3
topKFrequent(words, k)

['i', 'love', 'leetcode']

# 找出和最小的K个pair

In [25]:
# Solution 1: Brute Force
# Just produce all pairs, sort them by sum, and return the first k.


import itertools
def kSmallestPairs(nums1, nums2, k):
    return sorted(itertools.product(nums1, nums2), key=sum)[:k]

In [26]:
nums1 = [1,7,11]
nums2 = [2,4,6]
k = 4
kSmallestPairs(nums1, nums2, k)

[(1, 2), (1, 4), (1, 6), (7, 2)]

In [29]:
# takes O(k) extra memory and O(mn log k) time
import heapq
def kSmallestPairs(nums1, nums2, k):
    return heapq.nsmallest(k, ([u, v] for u in nums1 for v in nums2), key=sum)

nums1 = [1,7,11]
nums2 = [2,4,6]
k = 4
kSmallestPairs(nums1, nums2, k)

[[1, 2], [1, 4], [1, 6], [7, 2]]

In [30]:
# takes O(k) extra memory and O(k^2 log k) time
import heapq
def kSmallestPairs(nums1, nums2, k):
    n1 = nums1[:k+1]
    n2 = nums2[:k+1]
    return heapq.nsmallest(k, ([u, v] for u in n1 for v in n2), key=sum)


nums1 = [1,7,11]
nums2 = [2,4,6]
k = 4
kSmallestPairs(nums1, nums2, k)

[[1, 2], [1, 4], [1, 6], [7, 2]]

In [31]:
# O(kLogk) 
def kSmallestPairs(nums1, nums2, k):
    queue = []
    def push(i, j):
        if i < len(nums1) and j < len(nums2):
            heapq.heappush(queue, [nums1[i] + nums2[j], i, j])
    push(0, 0)
    pairs = []
    while queue and len(pairs) < k:
        _, i, j = heapq.heappop(queue)
        pairs.append([nums1[i], nums2[j]])
        push(i, j + 1)
        if j == 0:
            push(i + 1, 0)
    return pairs

In [32]:
nums1 = [1,7,11]
nums2 = [2,4,6]
k = 20
kSmallestPairs(nums1, nums2, k)

[[1, 2], [1, 4], [1, 6], [7, 2], [7, 4], [7, 6], [11, 2], [11, 4], [11, 6]]

In [33]:
def kSmallestPairs2(nums1, nums2, k):
    queue = []
    def push(i, j):
        if i < len(nums1) and j < len(nums2):
            heapq.heappush(queue, [nums1[i] + nums2[j], i, j])
    for i in range(0, k):
        push(i, 0)
    pairs = []
    while queue and len(pairs) < k:
        _, i, j = heapq.heappop(queue)
        pairs.append([nums1[i], nums2[j]])
        push(i, j + 1)
    return pairs

In [34]:
nums1 = [1,1,2]
nums2 = [1,2,3]
k = 2
kSmallestPairs(nums1, nums2, k)

[[1, 1], [1, 1]]

# 合并K个有序列表

In [35]:
# 还没写

# 从数据流中找到中位数

In [40]:
from heapq import *

class MedianFinder:

    def __init__(self):
        self.heaps = [], []

    def addNum(self, num):
        small, large = self.heaps
        heappush(small, -heappushpop(large, num))
        if len(large) < len(small):
            heappush(large, -heappop(small))

    def findMedian(self):
        small, large = self.heaps
        if len(large) > len(small):
            return float(large[0])
        return (large[0] - small[0]) / 2.0

In [41]:
finder = MedianFinder()
finder.addNum(2)
finder.addNum(3)
finder.addNum(4)
finder.findMedian()

3.0

# 管理你的项目

In [42]:
import heapq
def findMaximizedCapital(k, W, Profits, Capital):
    pqCap = []
    pqPro = []
    
    for i in range(len(Profits)):
        heapq.heappush(pqCap, (Capital[i], Profits[i]))
        
    for i in range(k):
        while len(pqCap) != 0 and pqCap[0][0] <= W:
            heapq.heappush(pqPro, -heapq.heappop(pqCap)[1])
            
        if len(pqPro) == 0:
            break
        
        W -= heapq.heappop(pqPro)
    
    return W

In [43]:
k=2
W=0
Profits=[1,2,3]
Capital=[0,1,1]

findMaximizedCapital(k, W, Profits, Capital)

4

In [44]:
def findMaximizedCapital2(k, W, Profits, Capital):
    current = []
    future = sorted(zip(Capital, Profits))[::-1]
    for _ in range(k):
        while future and future[-1][0] <= W:  # afford
            heapq.heappush(current, -future.pop()[1])
        if current:
            W -= heapq.heappop(current)
    return W

In [45]:
k=2
W=0
Profits=[1,2,3]
Capital=[0,1,1]

findMaximizedCapital2(k, W, Profits, Capital)

4

# 判断丑数

In [46]:
def uglyNumber(num):
    for p in 2, 3, 5:
        while num % p == 0 < num:
            num /= p
    return num == 1

In [49]:
uglyNumber(8)

True

# 输出丑数

In [47]:
def nthUglyNumber(n):
    q2, q3, q5 = [2], [3], [5]
    ugly = 1
    for u in heapq.merge(q2, q3, q5):
        if n == 1:
            return ugly
        if u > ugly:
            ugly = u
            n -= 1
            q2 += 2 * u,
            q3 += 3 * u,
            q5 += 5 * u,

In [48]:
nthUglyNumber(10)

12