#### 692. Top K Frequent Words

* https://leetcode.com/problems/top-k-frequent-words/description/

#### BBG - IMP

* Three Solution - ranked in order of preference
* Min Heap + custom class using __lt__ -> O(n log k), O(n) -> Preferred solution
* Bucket Sort -> O(n) avg, O(n)
* Max Heap -> O(n log n), O(n)
 

In [None]:
# Feature,Solution 1 (Min-Heap),Solution 2 (Max-Heap),Solution 3 (Bucket Sort)
# Time Complexity,O(Nlogk),O(N+klogN),O(N+UlogU)
# Space Complexity,O(U+k),O(U),O(U)
# Best Used For,"Large streams of data, small k",Small datasets,Limited frequency range
# Readability,High (Modular),Medium,Lower (Nested loops)

# TL; DR
# The Verdict: Why Solution 1 Wins1. Optimal Memory Efficiency for ScaleIn Enterprise Data Acquisition, 
# you are often dealing with massive datasets (the "N").Solution 2 stores all unique elements in the heap.
# Solution 1 maintains a heap of size k.When $N = 10,000,000$ and $k = 10$, Solution 1 uses negligible memory for the heap, 
# whereas Solution 2's heap grows linearly with the number of unique words.
# 2. Time Complexity AdvantageSolution 1: $O(N \log k)$ — You iterate through the data once and maintain a small heap.
# Solution 2: $O(N + k \log N)$ — While heapify is $O(N)$, if $k$ is large, the repeated popping becomes expensive.
# Solution 3 (Bucket Sort): $O(N + U \log U)$ — While theoretically fast, in data acquisition, frequency distributions can be skewed. 
# If many words have the same frequency, the sort() on the bucket can spike to $O(N \log N)$, negating the benefit.

# 3. Senior-Level "Signals"Solution 1 uses a Custom Wrapper/Dataclass. This is a huge "green flag" for senior roles because:
# It demonstrates an understanding of how Python’s heapq (a min-heap) works.
# It handles the lexicographical requirement (alphabetical order for tie-breaks) elegantly by overriding __lt__.
# It avoids the "clever but unreadable" hack of negating integers or using complex tuples that confuse other developers.

In [None]:
# Preferred SOlution - Min Heap
# TC - O(n log k)
# SC - O(n)
# Next solution has proper comments

import heapq
from collections import Counter
from typing import List
from dataclasses import dataclass
import time

@dataclass
class WordEntry:
    word: str
    freq: int

    def __lt__(self, other):
        if self.freq == other.freq:
            # Reverse comparison for strings: 'b' < 'a' is True
            # This ensures the 'larger' string (like 'love') is at the 
            # top of the min-heap to be popped before 'coding'.
            return self.word > other.word
        return self.freq < other.freq

class Solution:
    
    def topKFrequent(self, words: List[str], k: int) -> List[str]:
        fc = Counter(words)

        minheap = []
        for w, f in fc.items():
            heapq.heappush(minheap, WordEntry(w, f))
            if len(minheap) > k:
                heapq.heappop(minheap)

        res = []
        while minheap:
            res.append(heapq.heappop(minheap).word)

        return res[::-1]
    
Solution().topKFrequent(['love', 'lc', 'lc', 'lc', 'love', 'i', 'i'], 2)

[WordEntry(word='love', freq=2)]
[WordEntry(word='love', freq=2), WordEntry(word='lc', freq=3)]
[WordEntry(word='love', freq=2), WordEntry(word='lc', freq=3), WordEntry(word='i', freq=2)]


['lc', 'i']

In [None]:
import heapq
from collections import Counter
from typing import List
import time

class WordEntry:
    """
    Utility class to handle custom comparison logic within the heap.
    For a Min-Heap of size K:
    - Lower frequency is 'smaller' (should be popped).
    - For same frequency, lexicographically LATER word is 'smaller' 
      (should be popped first to keep smaller strings in the top K).
    """
    def __init__(self, word: str, freq: int):
        self.word = word
        self.freq = freq

    def __lt__(self, other):
        if self.freq == other.freq:
            # Reverse comparison for strings: 'b' < 'a' is True
            # This ensures the 'larger' string (like 'love') is at the 
            # top of the min-heap to be popped before 'coding'.
            print(f'{self.word}, {other.word}, {self.word > other.word}')
            time.sleep(10)
            return self.word > other.word
        return self.freq < other.freq
    
    def __repr__(self):
        return f'{self.word} - {self.freq}'

class Solution:
    def topKFrequent(self, words: List[str], k: int) -> List[str]:
        if not words or k <= 0:
            return []
            
        # Step 1: Count frequencies - O(N)
        counts = Counter(words)
        
        # Step 2: Maintain a Min-Heap of size K - O(N log K)
        min_heap = []
        for word, freq in counts.items():
            heapq.heappush(min_heap, WordEntry(word, freq))
            print(min_heap)
            time.sleep(5)
            if len(min_heap) > k:
                heapq.heappop(min_heap)
        
        # Step 3: Extract and reverse - O(K log K)
        # Since it's a min-heap, popping gives us the 'bottom' of our top-K
        res = []
        while min_heap:
            res.append(heapq.heappop(min_heap).word)
            
        return res[::-1]
    
Solution().topKFrequent(['love', 'lc', 'lc', 'lc', 'love', 'i', 'i'], 2)

In [None]:
from typing import List
from collections import Counter
import heapq
class Solution:
    def topKFrequent(self, words: List[str], k: int) -> List[str]:
        """
            Using Max Heap
            We need to heapify the whole counter dict, 
            hence time complexity is O (N + klog N) 
            Space complexity is O(N)
        """
        freq_counter = Counter(words)
        maxheap = [(-freq, word) for word, freq in freq_counter.items]
        heapq.heapify(maxheap)
        return [heapq.heappop(maxheap)[1] for _ in range(k)]

In [5]:
from typing import List
from collections import Counter, defaultdict

class Solution:
    def topKFrequent(self, words: List[str], k: int) -> List[str]:
        """
            Time Complexity - O(N) avg
                Counting	O(N)
                Bucket creation	O(U)
                Bucket traversal	O(N)
                Sorting buckets	O(U log U) worst case
                Overall	O(N + U log U)

            Space Complexity - O(N)

        """
        freq_counter = Counter(words)
        n = len(words)

        # buckets = [[] for _ in range(n+1)]
        buckets = defaultdict(list) # Preferred
         
        for word, freq in freq_counter.items():
            buckets[freq].append(word)

        res = []
        for freq in range(n, 0, -1):
            if not buckets[freq]:
                continue
            buckets[freq].sort()
            for word in buckets[freq]:
                res.append(word)
                if len(res) == k:
                    return res
        
        return res

Solution().topKFrequent(['3word', '3word', '3word', '1word'], k = 2)

['3word', '1word']

In [20]:
from typing import List
import time

class TrieNode:
    """
        Trie Node contains char and dict to next set of child chars
        In this case, we want to store word in the leaf node 
    """
    def __init__(self):
        self.children = {}
        self.word = None

    def __repr__(self):
        return f'{self.children} -> {self.word}'

class Trie:
    """
        Create trie starting from root node
    """
    def __init__(self):
        self.root = TrieNode()

    def insert(self, word):
        node = self.root
        for char in word:
            if char not in node.children:
                node.children[char] = TrieNode()
            node = node.children[char] # point node to child/next node
        node.word = word



class Solution:
    """
        Finds all words from the given list that exist in the board.
        Uses Trie + DFS with backtracking for optimal performance.
    """
    def findWords(self, board: List[List[str]], words: List[str]) -> List[str]:
        rows: int = len(board)
        cols: int = len(board[0])

        trie = Trie()
        for word in words:
            trie.insert(word)

        dirs = ((-1, 0), (1, 0), (0, -1), (0, 1))
        res = []
        def dfs(r, c, parent):
            char = board[r][c]

            node = parent.children.get(char)

            if not node:
                return

            if node.word:
                res.append(node.word)
                node.word = None # pruning - avoid repeating of word in res
                print(r, c, res)
                print(node)
                time.sleep(10)

            # mark visited
            board[r][c] = '#'

            for dr, dc in dirs:
                nr, nc = dr + r, dc + c
                if 0 <= nr < rows and 0 <= nc < cols and board[nr][nc] != '#':
                    dfs(nr, nc, node)

            board[r][c] = char

            if not node.children:
                parent.children.pop(char)


        for row in range(rows):
            for col in range(cols):
                dfs(row, col, trie.root)

        return res

Solution().findWords([['f','x', 'b', 'a'], ['x', 'e', 'm', 'f']], ['fx', 'fxem'])

1 0 ['fx']
{'e': {'m': {} -> fxem} -> None} -> None
1 2 ['fx', 'fxem']
{} -> None


['fx', 'fxem']

In [23]:
from collections import defaultdict,deque
class Solution:
    def kill_process(self, pid, ppid, kill):
        graph = defaultdict(list)

        for parent, child in zip(ppid, pid):
            graph[parent].append(child)

        queue = deque([kill])
        killed_processes = []
        
        while queue:
            process = queue.popleft()
            killed_processes.append(process)

            for nei in graph[process]:
                queue.append(nei)

        return killed_processes




Solution().kill_process([1,3,10,5], [3,0,5,3], 3)

[3, 1, 5, 10]