# morse alphabet 

In [1]:
import time
import sys

### Morse Alphabet

A  .- 	 B  -... 	 C  -.-. 	 D  -..
E  . 	 F  ..-. 	 G  --. 	 H  ....
I  .. 	 J  .--- 	 K  -.- 	 L  .-..
M  -- 	 N  -. 	 O  --- 	 P  .--.
Q  --.- 	 R  .-. 	 S  ... 	 T  -
U  ..- 	 V  ...- 	 W  .-- 	 X  -..-
Y  -.-- 	 Z  --..

In [2]:
from alphabet import MorseAlphabet

In [3]:
from context import Context

In [4]:
from wordencoder import WordEncoder
from wordfinder import WordFinder

In [5]:
from node import Node, NodeState
from nodetree import NodeTree

In [6]:
import jdc

# -------------------------------------

In [39]:
def count(acc, current_node, context, tree):
    if not current_node:
        #print("fin")
        return acc
    
    #print(f"current_node:{current_node}")
  
    # found a whole sentence
    if current_node.is_done:
        acc += current_node.total_options
    
    return count(acc, tree.next(), context, tree)

# unit tests harness

In [51]:
def count_runner(target_sentence, words, verbose=False):
    print(f'nb words {len(words)}')
             
    # sort words by length
    #words.sort(key=len, reverse=True)
    context = Context()
    context.add_sentence(target_sentence)
    context.add_alphabet(MorseAlphabet())
    
    word_encoder = WordEncoder(context, use_screening=True)
    context.add_word_encoder(word_encoder)

    word_encoder.add_word_list(words)
    if verbose:
        print(f'encoded words {word_encoder.get_dict()}')
    print(f'nb encoded words {len(word_encoder.get_dict())}')

    word_finder = WordFinder(context)
    context.add_word_finder(word_finder)
  
    tree = NodeTree(context) 
    next_node = tree.next()
    acc = 0
    while next_node:
        if next_node.is_done:
            acc += next_node.total_options
        next_node = tree.next()
 
    return acc

In [9]:
print('ok')

ok


# -------------------------------------

# unit tests

### empty sentence

In [52]:
morse_sentence = '' 
words = ['SE', 'T', 'O'] 

res = count_runner(morse_sentence, words)

print(f'count: {res}')

assert res == 0

nb words 3
nb encoded words 0
count: 0


### empty words

In [53]:
morse_sentence = '-' # T 
words = [] 

res = count_runner(morse_sentence, words)

print(f'count: {res}')

assert res == 0

nb words 0
nb encoded words 0
count: 0


### one word

In [54]:
morse_sentence = '-' # T 
words = ['T'] 

res = count_runner(morse_sentence, words, verbose=True)

print(f'count: {res}')

assert res == 1

nb words 1
encoded words {'-': -}
nb encoded words 1
count: 1


In [55]:
morse_sentence = '--' # T 
words = ['T', 'X', 'M'] 

res = count_runner(morse_sentence, words, verbose=True)

print(f'count: {res}')

assert res == 2 # TT, M

nb words 3
encoded words {'-': -, '--': --}
nb encoded words 2
count: 2


### One letter - one option

In [56]:
morse_sentence = '-' # T
words = ['SE', 'T', 'O'] 

res = count_runner(morse_sentence, words, verbose=True)
print(f'count: {res}')

assert res == 1 # T

nb words 3
encoded words {'-': -}
nb encoded words 1
count: 1


### very few words

In [57]:
morse_sentence = '-.-.' # TETE
words = ['TE'] 

res = count_runner(morse_sentence, words, verbose=True)
print(f'count: {res}')

assert res == 1 # TE TE

nb words 1
encoded words {'-.': -.}
nb encoded words 1
count: 1


In [58]:
morse_sentence = '-.-.' # TETE
words = ['T','E'] 

res = count_runner(morse_sentence, words, verbose=True)
print(f'count: {res}')

assert res == 1 # TE TE

nb words 2
encoded words {'-': -, '.': .}
nb encoded words 2
count: 1


### short message - multiple options

In [59]:
morse_sentence = '....' # E . I .. S ... H ....
words = ['EIE', 'SE', 'ES', 'H', 'L', 'O'] 

res = count_runner(morse_sentence, words, verbose=True)
print(f'count: {res}')

assert res == 4 # EIE, ES, H, SE 

nb words 6
encoded words {'....': ....#4}
nb encoded words 1
count: 4


### no match

In [60]:
morse_sentence = '....' # E . I .. S ... H ....
words = ['X', 'L', 'O'] 

res = count_runner(morse_sentence, words, verbose=True)
print(f'count: {res}')

assert res == 0 # no match

nb words 3
encoded words {}
nb encoded words 0
count: 0


### short message - multiple options with permutations

In [61]:
morse_sentence = '.....' # confusion EH/HE
words = ['HEL', 'HE', 'EH', 'O'] 

res = count_runner(morse_sentence, words, verbose=True)
print(f'count: {res}')

assert res == 2 # HE, EH

nb words 4
encoded words {'.....': .....#2}
nb encoded words 1
count: 2


### short message - one option

In [62]:
morse_sentence = '......-..' # HEL single option
words = ['HEL', 'O'] 

res = count_runner(morse_sentence, words, verbose=True)
print(f'count: {res}')

assert res == 1 # HEL

nb words 2
encoded words {'......-..': ......-..}
nb encoded words 1
count: 1


### short message - multiple options with partial match

In [63]:
morse_sentence = '......-..' # HEL or HE L
words = ['HEL', 'HE', 'L', 'O'] 

res = count_runner(morse_sentence, words, verbose=True)
print(f'count: {res}')

assert res == 2 # HEL, HE L  -- fix stops when HE L is found and never reach HEL

nb words 4
encoded words {'......-..': ......-.., '.....': ....., '.-..': .-..}
nb encoded words 3
count: 2


### short message - multiple options with partial match and permutations

In [64]:
morse_sentence = '......-..' # HEL with confusion EH/HE
words = ['HEL', 'HE', 'EH', 'L', 'O'] 

res = count_runner(morse_sentence, words, verbose=True)
print(f'count: {res}')

assert res == 3 # HEL, HE L, EH L

nb words 5
encoded words {'......-..': ......-.., '.....': .....#2, '.-..': .-..}
nb encoded words 3
count: 3


### short sample message - multiple options

In [65]:
morse_sentence = '......-...-..---' # HELLO 
words = ['HELL', 'HELLO', 'WORLD', 'OWORLD', 'TEST', 'L', 'O'] 

res = count_runner(morse_sentence, words, verbose=True)
print(f'count: {res}')

assert res == 2 # HELLO, HELL O

nb words 7
encoded words {'......-...-..': ......-...-.., '......-...-..---': ......-...-..---, '.-..': .-.., '---': ---}
nb encoded words 4
count: 2


### short sample message - multiple options with permutations

In [66]:
morse_sentence = '......-...-..---' # HELLO with confusion EH/HE
words = ['HELL', 'HELLO', 'WORLD', 'OWORLD', 'TEST', 'HE', 'EH', 'L', 'O'] 

res = count_runner(morse_sentence, words, verbose=True)
print(f'count: {res}')

assert res == 4 # HELLO, HELL O, HE L L O, EH L L O

nb words 9
encoded words {'......-...-..': ......-...-.., '......-...-..---': ......-...-..---, '.....': .....#2, '.-..': .-.., '---': ---}
nb encoded words 5
count: 4


### sample message

In [32]:
import time

In [67]:
morse_sentence = '......-...-..---.-----.-..-..-..' # HELLOWORLD
words = ['HELL', 'HELLO', 'WORLD', 'OWORLD', 'TEST'] 

start = time.perf_counter()
res = count_runner(morse_sentence, words, verbose=True)
stop = time.perf_counter()
print(f"duration {stop-start}", file=sys.stderr, flush=True)

print(f'count: {res}')

assert res == 2 # HELLO WORLD, HELL OWORLD

duration 0.0002897399999710615


nb words 5
encoded words {'......-...-..': ......-...-.., '......-...-..---': ......-...-..---, '.-----.-..-..-..': .-----.-..-..-.., '---.-----.-..-..-..': ---.-----.-..-..-..}
nb encoded words 4
count: 2


### other sample

In [68]:
morse_sentence = '--.-------..' # HELLOWORLD
words = ['GOD', 'GOOD', 'MORNING', 'G', 'HELLO'] 
# A .- B -... C -.-. D -.. E . F ..-. G --. H .... 
# I .. J .--- K -.- L .-.. M -- N -. O --- P .--. 
# Q --.- R .-. S ... T - U ..- V ...- W .-- X -..- Y -.-- Z --..
start = time.perf_counter()
res = count_runner(morse_sentence, words, verbose=True)
stop = time.perf_counter()
print(f"duration {stop-start}", file=sys.stderr, flush=True)

print(f'count: {res}')

assert res == 1 # GOOD

duration 0.00023283800010176492


nb words 5
encoded words {'--.-------..': --.-------.., '--.': --.}
nb encoded words 2
count: 1


count avec startwith 
unitaire  5.9784999997702926e-05

# -------------------------------------

# long string

### long string generation fixture

In [69]:
import random
def generate_random_morse_sentence(length, signs=None, seed=1234, chunk_size=None):
    random.seed(seed)
    sentence = []
    stats = {}
    tokens = []
    current_token = []
    if not chunk_size: chunk_size = random.randint(0, 4) + random.randint(0, 16)

    if not signs: signs = list(alphabet_map.keys())
    max_sign = len(signs) -1
    for s in signs:
        stats[s] = 0
    for i in range(length):
        letter = signs[random.randint(0, max_sign)]
        sentence.append(alphabet_map[letter])
        stats[letter] += 1
        current_token.append(letter)
        if len(current_token) >= chunk_size:
            tokens.append(''.join(current_token))
            current_token = []
    
    if current_token:
        tokens.append(''.join(current_token))
         
    unique_tokens = list(set(tokens))
    
    return ''.join(sentence), stats, unique_tokens

In [70]:
morse = MorseAlphabet()
alphabet_map = morse.encoded_letters_map

generated_morse_sentence, stats, tokens = generate_random_morse_sentence(1, signs='E', chunk_size=5)
assert len(generated_morse_sentence) == 1
assert stats['E'] == 1

generated_morse_sentence, stats, tokens = generate_random_morse_sentence(2, signs='ET', chunk_size=5)
assert len(generated_morse_sentence) == 2
assert stats['E'] == 1
assert stats['T'] == 1

generated_morse_sentence, stats, tokens = generate_random_morse_sentence(2, chunk_size=5)
assert len(generated_morse_sentence) == 7
assert stats['O'] == 1
assert stats['Y'] == 1

In [None]:
if False:
    generated_morse_sentence, stats, tokens = generate_random_morse_sentence(10, signs='ET', chunk_size=5)
    print(tokens)
    assert len(tokens) == 2
    assert len(tokens[0]) == 5
    assert len(tokens[1]) == 5
    assert tokens[0] == 'TEEEE'
    assert tokens[1] == 'EETEE'

    generated_morse_sentence, stats, tokens = generate_random_morse_sentence(10, signs='E', chunk_size=5)
    print(tokens)
    assert len(tokens) == 1
    assert len(tokens[0]) == 5
    assert tokens[0] == 'EEEEE'

    generated_morse_sentence, stats, tokens = generate_random_morse_sentence(8, signs='E', chunk_size=5)
    print(tokens)
    assert len(tokens) == 2
    assert len(tokens[0]) == 3
    assert len(tokens[1]) == 5
    assert tokens[0] == 'EEE'
    assert tokens[1] == 'EEEEE'

    generated_morse_sentence, stats, tokens = generate_random_morse_sentence(8, chunk_size=2)
    print(tokens)
    assert len(tokens) == 4
    assert len(tokens[0]) == 2
    assert tokens[0] == 'YO'

    generated_morse_sentence, stats, tokens = generate_random_morse_sentence(40)
    print(tokens)
    assert len(tokens) == 7
    assert tokens[0] == 'ACZSBV'


### size of the sentence

In [71]:
#sentence_size = 20
sentence_size = 300
verbose = False

https://jakevdp.github.io/PythonDataScienceHandbook/01.07-timing-and-profiling.html

In [None]:
##%load_ext line_profiler

### long string - 1-char word - 1 option - stackoverflow

assume count for 1 word is 1

In [72]:
morse_sentence, stats, tokens = generate_random_morse_sentence(sentence_size, signs='E')
print(f'stats {stats}')
print(f'length {len(morse_sentence)}')
words = ['E'] 

start = time.perf_counter() 
%prun res = count_runner(morse_sentence, words, verbose=verbose)
##%lprun -f count_runner res = count_runner(morse_sentence, words, verbose=verbose)
stop = time.perf_counter()
print(f"duration {stop-start}", file=sys.stderr, flush=True)

print(f'count: {res}')

assert res == 1

duration 0.005039118000013332


stats {'E': 300}
length 300
nb words 1
nb encoded words 1
 count: 1


### long string - 2 1-char words - multiple options - stackoverflow

In [73]:
import sys
morse_sentence, stats, tokens = generate_random_morse_sentence(sentence_size, signs='ET')
print(f'stats {stats}')
print(f'length {len(morse_sentence)}')
words = ['E', 'T'] 

start = time.perf_counter()
%prun res = count_runner(morse_sentence, words, verbose=verbose)
stop = time.perf_counter()
print(f"duration {stop-start}", file=sys.stderr, flush=True)

print(f'count: {res}')

assert res == 1

duration 0.005379391999895233


stats {'E': 148, 'T': 152}
length 300
nb words 2
nb encoded words 2
 count: 1


### long string - few words - multiple options

issue = a large number of words ':' -> execeed recursion limit

In [74]:
morse_sentence, stats, tokens = generate_random_morse_sentence(sentence_size, signs='E', chunk_size=5)
print(f'stats {stats}')
print(f'length {len(morse_sentence)}')
words = tokens
print(f'nb words {len(words)}')

start = time.perf_counter()
%prun res = count_runner(morse_sentence, words, verbose=verbose)
stop = time.perf_counter()
print(f"duration {stop-start}", file=sys.stderr, flush=True)

print(f'count: {res}')

assert res >= 1 # 1 si multiple 2 ou 4 sinon

duration 0.002226986999858127


stats {'E': 300}
length 300
nb words 1
nb words 1
nb encoded words 1
 count: 1


### long string - more words 

In [75]:
morse_sentence, stats, tokens = generate_random_morse_sentence(sentence_size)
print(f'stats {stats}')
print(f'length {len(morse_sentence)}')
words = tokens
print(f'nb words {len(words)}')

start = time.perf_counter()
%prun res = count_runner(morse_sentence, words, verbose=verbose)
stop = time.perf_counter()
print(f"total duration {stop-start}", file=sys.stderr, flush=True)

print(f'count: {res}')

assert res >= 1

total duration 0.013423241000054986


stats {'A': 11, 'B': 14, 'C': 19, 'D': 9, 'E': 13, 'F': 14, 'G': 12, 'H': 6, 'I': 13, 'J': 12, 'K': 8, 'L': 11, 'M': 13, 'N': 9, 'O': 14, 'P': 16, 'Q': 12, 'R': 8, 'S': 13, 'T': 6, 'U': 8, 'V': 16, 'W': 12, 'X': 5, 'Y': 11, 'Z': 15}
length 967
nb words 50
nb words 50
nb encoded words 50
 count: 1


### long string - few words 

In [76]:
morse_sentence, stats, tokens = generate_random_morse_sentence(sentence_size, signs='ET')
print(f'stats {stats}')
print(f'length {len(morse_sentence)}')
words = tokens
print(f'nb words {len(words)}')

start = time.perf_counter()
%prun res = count_runner(morse_sentence, words, verbose=verbose)
stop = time.perf_counter()
print(f"duration {stop-start}", file=sys.stderr, flush=True)

print(f'count: {res}')

assert res >= 1

duration 0.004651267999861375


stats {'E': 148, 'T': 152}
length 300
nb words 37
nb words 37
nb encoded words 37
 count: 1


### long sentence - lots of permutations

In [None]:
morse_sentence, stats, tokens = generate_random_morse_sentence(sentence_size, signs='EISH')
print(f'stats {stats}')
print(f'length {len(morse_sentence)}')
words = tokens
print(f'nb words {len(words)}')

start = time.perf_counter()
verbose = False
%prun res = count_runner(morse_sentence, words, verbose=verbose)
stop = time.perf_counter()
print(f"duration {stop-start}", file=sys.stderr, flush=True)

print(f'count: {res}')

assert res >= 1

stats {'E': 78, 'I': 70, 'S': 81, 'H': 71}
length 745
nb words 50
nb words 50
nb encoded words 13


# -------------------------------------

## long sentence - with repeating pattern

In [None]:
morse_sentence = '.-.-.-.-.-.-.-.-' # ETETETETETETETET
words = ['E', 'T'] 

start = time.perf_counter()
res = count_runner(morse_sentence, words, verbose=True)
stop = time.perf_counter()
print(f"duration {stop-start}", file=sys.stderr, flush=True)

print(f'count: {res}')

assert res == 1 # HELLO WORLD, HELL OWORLD

# -------------------------------------

# many words

In [None]:
import random
def generate_words(nb, max_length=20, signs=None, seed=1234):
    random.seed(seed)
    words = []
    
    for i in range(nb):
        current_token = []
        size = random.randint(0, 4) + random.randint(0, max_length)

        #if not signs: signs = list(alphabet_map.keys())
        #max_sign = len(signs) -1
        #for s in signs:
        #    stats[s] = 0
        for i in range(size):
            letter = random.randint(0, 25)
            current_token.append('ABCDEFGHIJKLMNOPQRSTUVWXYZ'[letter])
            #stats[letter] += 1
        words.append(''.join(current_token))
         
    return words

In [None]:
alphabet_map = morse.get_encoded_letters_map()
nb = 10000

start = time.perf_counter()
words = generate_words(nb, seed=1234)
stop = time.perf_counter()
print(f"duration {stop-start}", file=sys.stderr, flush=True)

assert len(words) == nb
print(words[0])

In [None]:
start = time.perf_counter()
lengths = {w:len(w) for w in words}
stop = time.perf_counter()
print(f"duration {stop-start}", file=sys.stderr, flush=True)

In [None]:
start = time.perf_counter()
for w in words:
    n = lengths[w]
stop = time.perf_counter()
print(f"duration {stop-start}", file=sys.stderr, flush=True)