In [None]:
# Skip this cell
%config InteractiveShell.ast_node_interactivity = "none"
from jamcoders.datasets import shake_words
# Count unigrams and bigrams
u_counts = {}
b_counts = {}

for sent in shake_words:
    for i in range(len(sent) - 1):
        context = sent[i]
        next_word = sent[i + 1]
        if sent[i] not in u_counts:
            u_counts[context] = 0
            b_counts[context] = {}
        u_counts[context] += 1
        if next_word not in b_counts[context]:
            b_counts[context][next_word] = 0
        b_counts[context][next_word] += 1
        
# Normalize (convert) bigrams into probabilities
bigram_model = {}
for context in b_counts:
    bigram_model[context] = {}
    for next_word in b_counts[context]:
        count = b_counts[context][next_word]
        bigram_model[context][next_word] = count / u_counts[context]

# How to solve (almost) any coding problem

Yesterday, I mentioned my approach to solving open coding problems.

It had 5-steps. Do you remember them?

1. **Read** the question
2. Work through **examples** and identify patterns
3. Write down **pseudocode**
4. Translate pseudocode into **Python**
5. **Trace** the examples (step 2) through your code

Today, we will work through a few coding problems using this approach.

### A Question
Write a function `is_strictly_decreasing` which takes a list of integers `lst` and returns `True` if the list is sorted in strictly decreasing order (each number is less than the number before it), `False` otherwise.

Examples:
- `[1,2,3] --> False`
- `[1, 1, 1] --> False`
- `[2, 1, 0] --> True`

In [None]:
# Student solution (nice job!)
def is_strictly_decreasing(lst):
    for i in range(len(lst) - 1):
        if lst[i] > lst[i + 1]:
            continue
        else:
            return False
    return True

### Another Question
Write a function `is_alternating` which takes a list of integers `lst` and returns `True` if the list alternates between increasing and decreasing (each element is either greater than or less than the previous element, with the pattern switching).

Examples:
- `[1,2,3] --> False`
- `[1, 1, 1] --> False`
- `[2, 1, 3] --> True`

In [None]:
# Skipped in lecture
def is_alternating(lst):

# Back to bigrams ✌️

Wait, what's a bigram model again?
> A bigram model predicts the next word _based on the previous word_

Remember our bigram model?

`# bigram_model is loaded from yesterday`

Me neither! Let's investigate to find its data type and how to use it 🔎

In [None]:
# What functions can we call on bigram_model to "understand" it?
print(type(bigram_model))
print(bigram_model.keys())
print(bigram_model['now'])

Next, let's generate from bigram models: repeatedly generate next words until we reach a desired length.

In [None]:
from jamcoders.random import sample_from_dict
def gen(model, start_word, length):
    # Input: model (bigram model), start_word (a single word to start from), length (how many words to generate)
    # You can assume start_word is in the model.

In [None]:
# Solution
from jamcoders.random import sample_from_dict
# Recursive implementation. Note: Input is context (list of str), not just the start word!
def gen(model, context, length):
    if length == 0:
        return context
    context_word = context[-1]
    probs = model[context_word]
    next_word = sample_from_dict(probs)
    return gen(model, context + [next_word], length - 1)

Now let's generate a poem from our model 📜

In [None]:
verse_starts = ['juliet', 'hamlet', 'macbeth']# TODO 
for start in verse_starts:
    starting_context = [start]
    print(gen(bigram_model, starting_context, 5))

# 🖊️🍍🍎🖊️

Mr. Kosaka wrote a new song **AppleLemonApp**, but disaster has struck. His computer crashed and removed all the spaces from his lyrics. Now he's staring at a string of letters with no idea where one word ends and another begins.

Help him find the missing spaces!

In [None]:
vocab = ["apple", "lemon", "app"]
# Examples: applelemonapp --> True
#           appapplemon --> True
#           apples --> False

In [None]:
def can_seg(string, vocab):
    # TODO

In [None]:
# Solution
def can_seg(string, vocab):
    if len(string) == 0:
        return True
    for word in vocab:
        prefix = string[:len(word)]
        suffix = string[len(word):]
        if can_seg(suffix, vocab) and prefix == word:
            return True
    return False

In [None]:
# Let's test it
vocab = ["pen", "apple", "lemon", "app"]
print(can_seg("penapplepen", vocab))  # Should be True
print(can_seg("applemon", vocab))     # Should be True
print(can_seg("application", vocab))  # Should be False

In [None]:
# Let's test it some more...
long_lyrics = "applemonapp" * 10 + "lemon"
print(can_seg(long_lyrics, vocab))  # Should be True

Oops! Let's debug.

In [None]:
# Let's debug it!
def can_seg(string, vocab):
    print('Called with: '+ string) # Added this
    if len(string) == 0:
        return True
    for word in vocab:
        prefix = string[:len(word)]
        suffix = string[len(word):]
        if can_seg(suffix, vocab) and prefix == word:
            return True
    return False

In [None]:
debug_vocab = ['ab', 'abc', 'bc']
debug_string = 'abc' * 3
can_seg(debug_string, debug_vocab)  # Should be True

What's going on? How can we fix it?

In [None]:
# Assume "vocab" doesn't change between calls (what happens to mem if it does?)
mem = {}
def can_seg(string, vocab):  # vocab doesn't change
    print('Called with: '+ string)
    if string in mem:
        return mem[string]
    if len(string) == 0:
        return True
    for word in vocab:
        prefix = string[:len(word)]
        suffix = string[len(word):]
        # If we reached this, we know string not in mem
        mem[string] = can_seg(suffix, vocab)
        if mem[string] and prefix == word:
            return True
    return False

In [None]:
# Adding prints to see how memoization helps
num_calls = 0
memory = {}
def can_seg(string, vocab):
    # Track calls
    global num_calls
    num_calls += 1
    if len(string) == 0:
        return True
    print(f"Input: {string}; Call #{num_calls}")
    for word in vocab:
        prefix = string[:len(word)]
        suffix = string[len(word):]
        if suffix not in memory:
            memory[suffix] = can_seg(suffix, vocab)
        if memory[suffix] and prefix == word:
            return True
    return False

In [None]:
# Let's test it some more...
long_lyrics = "applemonapp" * 10 + "lemon"
print(can_seg(long_lyrics, vocab))  # Should be True

In [None]:
# A simpler example for memoization. It can be used in many places!
# fib(0) == 1; fib(1) == 1; fib(n) == fib(n-1) + fib(n-2)
mem = {}
def fib(n):
    if n in mem:
        return mem[n]
    if n == 1 or n == 0:
        return 1
    result = fib(n-1) + fib(n-2)
    mem[n] = result
    return result

In [None]:
print(fib(100))