#### 140. Word Break II

* https://leetcode.com/problems/word-break-ii/description/

#### BBG - IMP

In [None]:
# Start from Brute force and go to the Optimized Preferred DFS + Memo solution below

# Brute force solution using Backtracking
# TC - Exponential
# At each char/step we have two choices either to choose the word or leave it and match that word against the word dict

from typing import List

class Solution:
    def wordBreak(self, s: str, wordDict: List[str]) -> List[str]:
        n = len(s)
        ws = set(wordDict)
        sol = []
        res = []

        def bt(start_index):
            if start_index == n:
                res.append(' '.join(sol))
                return

            for end_index in range(start_index + 1, n + 1):
                word = s[start_index: end_index]
                if word in ws:
                    sol.append(word)
                    bt(end_index)
                    sol.pop()

        bt(0)
        return res

Solution().wordBreak('ratesandfx', ['rate', 'rates', 'and', 'sand', 'fx'])
        

['rate sand fx', 'rates and fx']

In [None]:
# Improving the BT solution with pruning, i.e other that checking start_index to n each time, 
# let's check from start_index to max word length

class Solution:
    def wordBreak(self, s: str, wordDict: List[str]) -> List[str]:
        n = len(s)
        ws = set(wordDict)
        sol = []
        res = []

        mwl = max(map(len, ws))

        def bt(start_index):
            if start_index == n:
                res.append(' '.join(sol))
                return

            for end_index in range(start_index + 1, min(n, start_index + mwl) + 1):
                word = s[start_index: end_index]
                if word in ws:
                    sol.append(word)
                    bt(end_index)
                    sol.pop()

        bt(0)
        return res
        

In [None]:
# Optimized solution
# DFS + Memoization works best for such solution which are optimization over BT
# Time Complexity - O(N * L * S)
# Where:
# N = length of string
# L = max word length
# S = number of valid sentences (output size)
# Output-sensitive (important to say out loud)
# Space Complexity O(N * S)
# Memo stores lists of strings per index
# Recursion stack: O(N)

class Solution:
    def wordBreak(self, s: str, wordDict: List[str]) -> List[str]:
        n = len(s)
        ws = set(wordDict)

        mwl = max(map(len, ws)) # largest word len

        memo = {} # start_index -> sentence
        def dfs(start_index):
            if start_index in memo:
                return memo[start_index]
            
            if start_index == n:
                return [""] # this will return Truthy value for tail

            sentences = []
            for end_index in range(start_index + 1, min(n, start_index + mwl) + 1):
                word = s[start_index: end_index]
                if word not in ws:
                    continue

                # word is in word set, 
                # let's look for the next word from here in this sentence
                for tail in dfs(end_index):
                    sentence = word if not tail else f'{word} {tail}'
                    sentences.append(sentence)
            
                memo[start_index] = sentences

            return sentences

        return dfs(0)
Solution().wordBreak('ratesandfx', ['rate', 'rates', 'and', 'sand', 'fx'])

['rate sand fx', 'rates and fx']

In [None]:
# why not BFS = tree would become extremenly large for each branch hence space usage explodes
# why not Tabulation - Unlike LC 139 we are not storing the Truth state of the end of the word but complete sentence, 
# managing it would be a mess 

In [8]:
for (x, y), z in (zip([["a","b"],["b","c"]], [2.0,3.0])):
    print(x, y, 1/z)

a b 0.5
b c 0.3333333333333333
