# morse alphabet 

In [1]:
import time
import sys

### Morse Alphabet

A  .- 	 B  -... 	 C  -.-. 	 D  -..
E  . 	 F  ..-. 	 G  --. 	 H  ....
I  .. 	 J  .--- 	 K  -.- 	 L  .-..
M  -- 	 N  -. 	 O  --- 	 P  .--.
Q  --.- 	 R  .-. 	 S  ... 	 T  -
U  ..- 	 V  ...- 	 W  .-- 	 X  -..-
Y  -.-- 	 Z  --..

In [2]:
from alphabet import MorseAlphabet

In [3]:
morse = MorseAlphabet()
assert morse.get_encoded_letters_map()['D'] == '-..'

In [4]:
from wordencoder import WordEncoder

In [5]:
encoder = WordEncoder()
encoder.add_word_list(['HTE', 'EE'])
assert len(encoder.get_list()) == 2

#### version 5 6 
- basic reducer with remaining
- replace list manipulations with node -> still bumpinto the reccursion  limitation
- replace reccursion with command pattern -> still too long 
- options object - keep track of solved positions

In [6]:
from context import Context

In [7]:
from node import Node, NodeState

In [8]:
sentence1 = '..-.--.'
node_0 = Node('', 0, len(sentence1), state=NodeState.ROOT)
node_1 = Node('..', 2, len('-.--.'), parent=node_0)
node_2 = Node('-', 3, len('.--.'), parent=node_1)
node_3 = Node('.--', 4, len('.'), parent=node_2)
node_4 = Node('.', 7, len(''), parent=node_3)
#print(node_4.get_sequence())
#origin = node_4.get_sequence_start()
#assert origin.get_sequence() == ''
#assert origin.get_sequence() == NodeState.ROOT

In [9]:
from tasklist import TaskList

In [10]:
sentence1 = '..-.--.'
node_0 = Node('', 0, len(sentence1), state=NodeState.ROOT)
tasklist = TaskList(node_0)

In [11]:
import jdc

In [12]:
class SentenceDecoder():
    def __init__(self, context, verbose=False):
        self.context = context
        self.sentence = None
        self.nb_sentence_signs = 0
        self.valid_words_map = context.get_encoder().get_dict()
        self.options = None
        self.verbose = verbose
        self.task_list = None

In [13]:
%%add_to SentenceDecoder
def screen_words(self, encoded_words_map):
    valid_encoded_words_maps = { k:v for k,v in encoded_words_map.items() if self.sentence.find(k)>=0}
    print(f'nb possible words {len(valid_encoded_words_maps.keys())}')
    return valid_encoded_words_maps

In [14]:
%%add_to SentenceDecoder
def log2(self, message):
    print(message, file=sys.stderr, flush=True)

def debug(self, category, level, formated, duplicates=0):
    if self.verbose:
        message = f'{category}.{level} {formated} *{duplicates}'
        print(message, file=sys.stderr, flush=True)

def get_formated(self, node, word=None):
    text = ''
    if self.verbose:
        left = node.get_sequence() if node else ''
        pos = node.pos
        if word:
            right = self.sentence[node.pos+len(word):]  # node.remaining
            text = f'[{left} <= @{pos}({word}) => {right}]'
        else:
            right = self.sentence[node.next_pos:]  # node.remaining
            if right:
                text = f'[{left} <=> {right}]'
            else:
                text = f'[{left}]'
    return text


In [15]:
%%add_to SentenceDecoder
def decode_1(self, sentence):
    # dumb cases
    if len(sentence) == 0: 
        self.log2('empty sentence')
        return 0      

    self.sentence = sentence

    self.valid_words_map = self.screen_words(self.valid_words_map)
    if len(self.valid_words_map.keys()) == 0: 
        self.log2('empty words')
        return 0
    #print(f'valid_words_map {self.valid_words_map}')

    count_loops = 0
    #self.options = Options(len(sentence))
    start_node = Node('', 0, self.sentence, state=NodeState.ROOT, level=0) 
    commands = [start_node]
    nb_options = 0
    while commands:
        count_loops += 1
        # sort by pos
        #commands.sort(key=lambda c: c.pos, reverse=True)
        self.log2(f'loop:{count_loops}, nb_commands:{len(commands)}')
        commands, nb_options = self.decode_loop(commands, nb_options)

    return nb_options

In [53]:
%%add_to SentenceDecoder
def decode_loop_1(self, commands, last_nb_options):
    nb_options = last_nb_options
    new_commands = []
    for last_node in commands:
        level = last_node.level + 1
        remaining = last_node.remaining
        current_pos = last_node.next_pos

        if last_node.is_done: 
            nb_options_for_sequence = last_node.get_nb_options()  
            self.debug('done', level, self.get_formated(last_node), nb_options_for_sequence)
            nb_options += nb_options_for_sequence
        else:
            #N = 20
            #for length in range(N):
            #    word = remaining[0:length]
            #    if word in self.valid_words_map:
            #        duplicates = self.valid_words_map[word]
            for word, duplicates in self.valid_words_map.items():
                i = len(word)
                #if len(remaining) >= i and remaining[0:i]:
                if len(remaining) >= i and remaining.startswith(word):
                    new_remaining = str(remaining[i:])
                    new_node = Node(word, current_pos, new_remaining, duplicates=duplicates, parent=last_node)
                    new_commands.append(new_node)

    return new_commands, nb_options

In [96]:
%%add_to SentenceDecoder
def decode(self, sentence):
    # dumb cases
    if len(sentence) == 0: 
        self.log2('empty sentence')
        return 0      

    self.sentence = sentence
    self.nb_sentence_signs = len(self.sentence)

    self.valid_words_map = self.screen_words(self.valid_words_map)
    if len(self.valid_words_map.keys()) == 0: 
        self.log2('empty words')
        return 0
    #print(f'valid_words_map {self.valid_words_map}')

    count_loops = 0
    chunk_size = 10
    start_node = Node('', 0, self.nb_sentence_signs, state=NodeState.ROOT, level=0) 
    self.task_list = TaskList(start_node)
    nb_options = 0
    nodes = self.task_list.next(size=chunk_size)
    while nodes:
        count_loops += 1
        # self.log2(f'loop:{count_loops} nodes:{nodes}')
        nb_options = self.decode_loop(nodes, nb_options)
        nodes = self.task_list.next(size=chunk_size)

    self.log2(f'loop:{count_loops}')
    return nb_options

In [18]:
%%add_to SentenceDecoder
def decode_loop_2(self, nodes, last_nb_options):
    nb_options = last_nb_options
    for last_node in nodes:
        level = last_node.level + 1
        remaining = last_node.remaining
        current_pos = last_node.next_pos

        if last_node.is_done: 
            nb_options_for_sequence = last_node.get_nb_options()  
            self.debug('done', level, self.get_formated(last_node), nb_options_for_sequence)
            nb_options += nb_options_for_sequence
        else:
            self.debug('loop', level, self.get_formated(last_node))
            for word, duplicates in self.valid_words_map.items():
                i = len(word)
                if len(remaining) >= i and remaining[0:i]:
                #if len(remaining) >= len(word) and remaining.startswith(word):
                    self.debug('word', level, self.get_formated(last_node, word), duplicates)
                    i = len(word)
                    new_remaining = str(remaining[i:])
                    new_node = Node(word, current_pos, new_remaining, duplicates=duplicates, parent=last_node)
                    self.debug('cont', level, self.get_formated(new_node, word), duplicates)
                    self.task_list.add(new_node)

    return nb_options

In [118]:
%%add_to SentenceDecoder

def decode_loop_done(self, last_node):
    level = last_node.level + 1
    nb_options_for_sequence = last_node.get_nb_options()  
    self.debug('done', level, self.get_formated(last_node), nb_options_for_sequence)
    return nb_options_for_sequence

def decode_loop_lookup(self, last_node):
    level = last_node.level + 1
    current_pos = last_node.next_pos
    #self.debug('loop', level, self.get_formated(last_node))
    for word, duplicates in self.valid_words_map.items():
        nb_word_signs = len(word)
        #self.debug('word', level, self.get_formated(last_node, word), duplicates)
        if last_node.nb_remaining >= nb_word_signs:
            self.decode_loop_check_word(last_node, word, nb_word_signs, current_pos, duplicates)

# !!!
def decode_loop_check_word(self, last_node, word, nb_word_signs, current_pos, duplicates):
    #if self.sentence[current_pos:current_pos+nb_word_signs] == word:
    if self.decode_loop_slice(current_pos, nb_word_signs) == word:
        self.decode_loop_new_node(last_node, word, nb_word_signs, current_pos, duplicates)

def decode_loop_slice(self, current_pos, nb_word_signs):
    return self.sentence[current_pos:current_pos+nb_word_signs]

def decode_loop_new_node(self, last_node, word, nb_word_signs, current_pos, duplicates):
    nb_remaining_signs = last_node.nb_remaining - nb_word_signs
    new_node = Node(word, current_pos, nb_remaining_signs, duplicates=duplicates, parent=last_node)
    #self.debug('cont', level, self.get_formated(new_node), duplicates)
    self.task_list.add(new_node)


def decode_loop(self, nodes, last_nb_options):
    nb_options = last_nb_options
    for last_node in nodes:
        if last_node.is_done: 
             nb_options += self.decode_loop_done(last_node)
        else:
            self.decode_loop_lookup(last_node)

    return nb_options

In [76]:
def count_runner(target_sentence, words, verbose=False):
    print(f'nb words {len(words)}')
             
    # sort words by length
    #words.sort(key=len, reverse=True)
    
    word_encoder = WordEncoder()
    word_encoder.add_word_list(words)
    if verbose:
        print(f'encoded words {word_encoder.get_dict()}')
    print(f'nb encoded words {len(word_encoder.get_dict())}')

    context = Context(word_encoder=word_encoder)
    decoder = SentenceDecoder(context, verbose)
    res = decoder.decode(target_sentence)

    return res

In [21]:
print('ok')

ok


# -------------------------------------

# unit tests

### empty sentence

In [22]:
morse_sentence = '' 
words = ['SE', 'T', 'O'] 

res = count_runner(morse_sentence, words)

print(f'count: {res}')

assert res == 0

empty sentence


nb words 3
nb encoded words 3
count: 0


### empty words

In [64]:
morse_sentence = '-' # T 
words = [] 

res = count_runner(morse_sentence, words)

print(f'count: {res}')

assert res == 0

empty words


nb words 0
nb encoded words 0
nb possible words 0
count: 0


### one word

In [59]:
morse_sentence = '-' # T 
words = ['T'] 

res = count_runner(morse_sentence, words, verbose=True)

print(f'count: {res}')

assert res == 1

loop.1 [ <=> -] *0
word.1 [ <= @0(-) => ] *1
cont.1 [-] *1
done.1 [-] *1
loop:2


nb words 1
encoded words {'-': 1}
nb encoded words 1
nb possible words 1
count: 1


In [60]:
morse_sentence = '--' # T 
words = ['T', 'X', 'M'] 

res = count_runner(morse_sentence, words, verbose=True)

print(f'count: {res}')

assert res == 2 # TT, M

loop.1 [ <=> --] *0
word.1 [ <= @0(-) => -] *1
cont.1 [- <=> -] *1
word.1 [ <= @0(--) => ] *1
cont.1 [--] *1
done.1 [--] *1
loop.1 [- <=> -] *0
word.1 [- <= @0(-) => -] *1
cont.1 [-|-] *1
word.1 [- <= @0(--) => ] *1
done.1 [-|-] *1
loop:4


nb words 3
encoded words {'-': 1, '-..-': 1, '--': 1}
nb encoded words 3
nb possible words 2
count: 2


### One letter - one option

In [26]:
morse_sentence = '-' # T
words = ['SE', 'T', 'O'] 

res = count_runner(morse_sentence, words, verbose=True)
print(f'count: {res}')

assert res == 1 # T

loop.1 [ <=> -] *0
word.1 [ <= @0(-) => ] *1
cont.1 [-] *1
done.1 [-] *1
loop:2


nb words 3
encoded words {'....': 1, '-': 1, '---': 1}
nb encoded words 3
nb possible words 1
count: 1


### very few words

In [27]:
morse_sentence = '-.-.' # TETE
words = ['TE'] 

res = count_runner(morse_sentence, words, verbose=True)
print(f'count: {res}')

assert res == 1 # TE TE

loop.1 [ <=> -.-.] *0
word.1 [ <= @0(-.) => -.] *1
cont.1 [-. <=> -.] *1
loop.1 [-. <=> -.] *0
word.1 [-. <= @0(-.) => -.] *1
cont.1 [-.|-.] *1
done.1 [-.|-.] *1
loop:3


nb words 1
encoded words {'-.': 1}
nb encoded words 1
nb possible words 1
count: 1


In [28]:
morse_sentence = '-.-.' # TETE
words = ['T','E'] 

res = count_runner(morse_sentence, words, verbose=True)
print(f'count: {res}')

assert res == 1 # TE TE

loop.1 [ <=> -.-.] *0
word.1 [ <= @0(-) => .-.] *1
cont.1 [- <=> .-.] *1
word.1 [ <= @0(.) => .-.] *1
loop.1 [- <=> .-.] *0
word.1 [- <= @0(-) => .-.] *1
word.1 [- <= @0(.) => .-.] *1
cont.1 [-|. <=> -.] *1
loop.1 [-|. <=> -.] *0
word.1 [-|. <= @1(-) => -.] *1
cont.1 [-|.|- <=> .] *1
word.1 [-|. <= @1(.) => -.] *1
loop.1 [-|.|- <=> .] *0
word.1 [-|.|- <= @2(-) => .] *1
word.1 [-|.|- <= @2(.) => .] *1
cont.1 [-|.|-|.] *1
done.1 [-|.|-|.] *1
loop:5


nb words 2
encoded words {'-': 1, '.': 1}
nb encoded words 2
nb possible words 2
count: 1


### short message - multiple options

In [29]:
morse_sentence = '....' # E . I .. S ... H ....
words = ['EIE', 'SE', 'ES', 'H', 'L', 'O'] 

res = count_runner(morse_sentence, words, verbose=True)
print(f'count: {res}')

assert res == 4 # EIE, ES, H, SE 

loop.1 [ <=> ....] *0
word.1 [ <= @0(....) => ] *4
cont.1 [....] *4
done.1 [....] *4
loop:2


nb words 6
encoded words {'....': 4, '.-..': 1, '---': 1}
nb encoded words 3
nb possible words 1
count: 4


### no match

In [30]:
morse_sentence = '....' # E . I .. S ... H ....
words = ['X', 'L', 'O'] 

res = count_runner(morse_sentence, words, verbose=True)
print(f'count: {res}')

assert res == 0 # no match

empty words


nb words 3
encoded words {'-..-': 1, '.-..': 1, '---': 1}
nb encoded words 3
nb possible words 0
count: 0


### short message - multiple options with permutations

In [31]:
morse_sentence = '.....' # confusion EH/HE
words = ['HEL', 'HE', 'EH', 'O'] 

res = count_runner(morse_sentence, words, verbose=True)
print(f'count: {res}')

assert res == 2 # HE, EH

loop.1 [ <=> .....] *0
word.1 [ <= @0(.....) => ] *2
cont.1 [.....] *2
done.1 [.....] *2
loop:2


nb words 4
encoded words {'......-..': 1, '.....': 2, '---': 1}
nb encoded words 3
nb possible words 1
count: 2


### short message - one option

In [32]:
morse_sentence = '......-..' # HEL single option
words = ['HEL', 'O'] 

res = count_runner(morse_sentence, words, verbose=True)
print(f'count: {res}')

assert res == 1 # HEL

loop.1 [ <=> ......-..] *0
word.1 [ <= @0(......-..) => ] *1
cont.1 [......-..] *1
done.1 [......-..] *1
loop:2


nb words 2
encoded words {'......-..': 1, '---': 1}
nb encoded words 2
nb possible words 1
count: 1


### short message - multiple options with partial match

In [33]:
morse_sentence = '......-..' # HEL or HE L
words = ['HEL', 'HE', 'L', 'O'] 

res = count_runner(morse_sentence, words, verbose=True)
print(f'count: {res}')

assert res == 2 # HEL, HE L  -- fix stops when HE L is found and never reach HEL

loop.1 [ <=> ......-..] *0
word.1 [ <= @0(......-..) => ] *1
cont.1 [......-..] *1
word.1 [ <= @0(.....) => .-..] *1
cont.1 [..... <=> .-..] *1
word.1 [ <= @0(.-..) => ..-..] *1
done.1 [......-..] *1
loop.1 [..... <=> .-..] *0
word.1 [..... <= @0(......-..) => ] *1
word.1 [..... <= @0(.....) => .-..] *1
word.1 [..... <= @0(.-..) => ..-..] *1
cont.1 [.....|.-..] *1
done.1 [.....|.-..] *1
loop:4


nb words 4
encoded words {'......-..': 1, '.....': 1, '.-..': 1, '---': 1}
nb encoded words 4
nb possible words 3
count: 2


### short message - multiple options with partial match and permutations

In [34]:
morse_sentence = '......-..' # HEL with confusion EH/HE
words = ['HEL', 'HE', 'EH', 'L', 'O'] 

res = count_runner(morse_sentence, words, verbose=True)
print(f'count: {res}')

assert res == 3 # HEL, HE L, EH L

loop.1 [ <=> ......-..] *0
word.1 [ <= @0(......-..) => ] *1
cont.1 [......-..] *1
word.1 [ <= @0(.....) => .-..] *2
cont.1 [..... <=> .-..] *2
word.1 [ <= @0(.-..) => ..-..] *1
done.1 [......-..] *1
loop.1 [..... <=> .-..] *0
word.1 [..... <= @0(......-..) => ] *1
word.1 [..... <= @0(.....) => .-..] *2
word.1 [..... <= @0(.-..) => ..-..] *1
cont.1 [.....|.-..] *1
done.1 [.....|.-..] *2
loop:4


nb words 5
encoded words {'......-..': 1, '.....': 2, '.-..': 1, '---': 1}
nb encoded words 4
nb possible words 3
count: 3


### short sample message - multiple options

In [35]:
morse_sentence = '......-...-..---' # HELLO 
words = ['HELL', 'HELLO', 'WORLD', 'OWORLD', 'TEST', 'L', 'O'] 

res = count_runner(morse_sentence, words, verbose=True)
print(f'count: {res}')

assert res == 2 # HELLO, HELL O

loop.1 [ <=> ......-...-..---] *0
word.1 [ <= @0(......-...-..) => ---] *1
cont.1 [......-...-.. <=> ---] *1
word.1 [ <= @0(......-...-..---) => ] *1
cont.1 [......-...-..---] *1
word.1 [ <= @0(.-..) => ..-...-..---] *1
word.1 [ <= @0(---) => ...-...-..---] *1
done.1 [......-...-..---] *1
loop.1 [......-...-.. <=> ---] *0
word.1 [......-...-.. <= @0(......-...-..) => ---] *1
word.1 [......-...-.. <= @0(......-...-..---) => ] *1
word.1 [......-...-.. <= @0(.-..) => ..-...-..---] *1
word.1 [......-...-.. <= @0(---) => ...-...-..---] *1
cont.1 [......-...-..|---] *1
done.1 [......-...-..|---] *1
loop:4


nb words 7
encoded words {'......-...-..': 1, '......-...-..---': 1, '.-----.-..-..-..': 1, '---.-----.-..-..-..': 1, '-....-': 1, '.-..': 1, '---': 1}
nb encoded words 7
nb possible words 4
count: 2


### short sample message - multiple options with permutations

In [36]:
morse_sentence = '......-...-..---' # HELLO with confusion EH/HE
words = ['HELL', 'HELLO', 'WORLD', 'OWORLD', 'TEST', 'HE', 'EH', 'L', 'O'] 

res = count_runner(morse_sentence, words, verbose=True)
print(f'count: {res}')

assert res == 4 # HELLO, HELL O, HE L L O, EH L L O

loop.1 [ <=> ......-...-..---] *0


nb words 9
encoded words {'......-...-..': 1, '......-...-..---': 1, '.-----.-..-..-..': 1, '---.-----.-..-..-..': 1, '-....-': 1, '.....': 2, '.-..': 1, '---': 1}
nb encoded words 8
nb possible words 5


word.1 [ <= @0(......-...-..) => ---] *1
cont.1 [......-...-.. <=> ---] *1
word.1 [ <= @0(......-...-..---) => ] *1
cont.1 [......-...-..---] *1
word.1 [ <= @0(.....) => .-...-..---] *2
cont.1 [..... <=> .-...-..---] *2
word.1 [ <= @0(.-..) => ..-...-..---] *1
word.1 [ <= @0(---) => ...-...-..---] *1
done.1 [......-...-..---] *1
loop.1 [......-...-.. <=> ---] *0
word.1 [......-...-.. <= @0(......-...-..) => ---] *1
word.1 [......-...-.. <= @0(......-...-..---) => ] *1
word.1 [......-...-.. <= @0(.....) => .-...-..---] *2
word.1 [......-...-.. <= @0(.-..) => ..-...-..---] *1
word.1 [......-...-.. <= @0(---) => ...-...-..---] *1
cont.1 [......-...-..|---] *1
done.1 [......-...-..|---] *1
loop.1 [..... <=> .-...-..---] *0
word.1 [..... <= @0(......-...-..) => ---] *1
word.1 [..... <= @0(......-...-..---) => ] *1
word.1 [..... <= @0(.....) => .-...-..---] *2
word.1 [..... <= @0(.-..) => ..-...-..---] *1
cont.1 [.....|.-.. <=> .-..---] *1
word.1 [..... <= @0(---) => ...-...-..---] *1
loop.1

count: 4


### sample message

In [37]:
morse_sentence = '......-...-..---.-----.-..-..-..' # HELLOWORLD
words = ['HELL', 'HELLO', 'WORLD', 'OWORLD', 'TEST'] 

start = time.perf_counter()
res = count_runner(morse_sentence, words, verbose=True)
stop = time.perf_counter()
print(f"duration {stop-start}", file=sys.stderr, flush=True)

print(f'count: {res}')

assert res == 2 # HELLO WORLD, HELL OWORLD

nb words 5

loop.1 [ <=> ......-...-..---.-----.-..-..-..] *0
word.1 [ <= @0(......-...-..) => ---.-----.-..-..-..] *1
cont.1 [......-...-.. <=> ---.-----.-..-..-..] *1
word.1 [ <= @0(......-...-..---) => .-----.-..-..-..] *1
cont.1 [......-...-..--- <=> .-----.-..-..-..] *1
word.1 [ <= @0(.-----.-..-..-..) => .-----.-..-..-..] *1
word.1 [ <= @0(---.-----.-..-..-..) => ---.-..-..-..] *1
loop.1 [......-...-..--- <=> .-----.-..-..-..] *0
word.1 [......-...-..--- <= @0(......-...-..) => ---.-----.-..-..-..] *1
word.1 [......-...-..--- <= @0(......-...-..---) => .-----.-..-..-..] *1
word.1 [......-...-..--- <= @0(.-----.-..-..-..) => .-----.-..-..-..] *1
cont.1 [......-...-..---|.-----.-..-..-..] *1
word.1 [......-...-..--- <= @0(---.-----.-..-..-..) => ---.-..-..-..] *1
done.1 [......-...-..---|.-----.-..-..-..] *1
loop.1 [......-...-.. <=> ---.-----.-..-..-..] *0



encoded words {'......-...-..': 1, '......-...-..---': 1, '.-----.-..-..-..': 1, '---.-----.-..-..-..': 1, '-....-': 1}
nb encoded words 5
nb possible words 4


word.1 [......-...-.. <= @0(......-...-..) => ---.-----.-..-..-..] *1
word.1 [......-...-.. <= @0(......-...-..---) => .-----.-..-..-..] *1
word.1 [......-...-.. <= @0(.-----.-..-..-..) => .-----.-..-..-..] *1
word.1 [......-...-.. <= @0(---.-----.-..-..-..) => ---.-..-..-..] *1
cont.1 [......-...-..|---.-----.-..-..-..] *1
done.1 [......-...-..|---.-----.-..-..-..] *1
loop:5
duration 0.017806156999999878


count: 2


### other sample

In [38]:
morse_sentence = '--.-------..' # HELLOWORLD
words = ['GOD', 'GOOD', 'MORNING', 'G', 'HELLO'] 
# A .- B -... C -.-. D -.. E . F ..-. G --. H .... 
# I .. J .--- K -.- L .-.. M -- N -. O --- P .--. 
# Q --.- R .-. S ... T - U ..- V ...- W .-- X -..- Y -.-- Z --..
start = time.perf_counter()
res = count_runner(morse_sentence, words, verbose=True)
stop = time.perf_counter()
print(f"duration {stop-start}", file=sys.stderr, flush=True)

print(f'count: {res}')

assert res == 1 # GOOD

loop.1 [ <=> --.-------..] *0


nb words 5
encoded words {'--.----..': 1, '--.-------..': 1, '-----.-.-...-.--.': 1, '--.': 1, '......-...-..---': 1}
nb encoded words 5
nb possible words 2


word.1 [ <= @0(--.-------..) => ] *1
cont.1 [--.-------..] *1
word.1 [ <= @0(--.) => -------..] *1
cont.1 [--. <=> -------..] *1
done.1 [--.-------..] *1
loop.1 [--. <=> -------..] *0
word.1 [--. <= @0(--.-------..) => ] *1
word.1 [--. <= @0(--.) => -------..] *1
loop:3
duration 0.009379651999999794


count: 1


count avec startwith 
unitaire  5.9784999997702926e-05

# -------------------------------------

# long string

### long string generation fixture

In [39]:
import random
def generate_random_morse_sentence(length, signs=None, seed=1234, chunk_size=None):
    random.seed(seed)
    sentence = []
    stats = {}
    tokens = []
    current_token = []
    if not chunk_size: chunk_size = random.randint(0, 4) + random.randint(0, 16)

    if not signs: signs = list(alphabet_map.keys())
    max_sign = len(signs) -1
    for s in signs:
        stats[s] = 0
    for i in range(length):
        letter = signs[random.randint(0, max_sign)]
        sentence.append(alphabet_map[letter])
        stats[letter] += 1
        current_token.append(letter)
        if len(current_token) >= chunk_size:
            tokens.append(''.join(current_token))
            current_token = []
    
    if current_token:
        tokens.append(''.join(current_token))
         
    unique_tokens = list(set(tokens))
    
    return ''.join(sentence), stats, unique_tokens

In [40]:
alphabet_map = morse.get_encoded_letters_map()

generated_morse_sentence, stats, tokens = generate_random_morse_sentence(1, signs='E', chunk_size=5)
assert len(generated_morse_sentence) == 1
assert stats['E'] == 1

generated_morse_sentence, stats, tokens = generate_random_morse_sentence(2, signs='ET', chunk_size=5)
assert len(generated_morse_sentence) == 2
assert stats['E'] == 1
assert stats['T'] == 1

generated_morse_sentence, stats, tokens = generate_random_morse_sentence(2, chunk_size=5)
assert len(generated_morse_sentence) == 7
assert stats['O'] == 1
assert stats['Y'] == 1

In [41]:
if False:
    generated_morse_sentence, stats, tokens = generate_random_morse_sentence(10, signs='ET', chunk_size=5)
    print(tokens)
    assert len(tokens) == 2
    assert len(tokens[0]) == 5
    assert len(tokens[1]) == 5
    assert tokens[0] == 'TEEEE'
    assert tokens[1] == 'EETEE'

    generated_morse_sentence, stats, tokens = generate_random_morse_sentence(10, signs='E', chunk_size=5)
    print(tokens)
    assert len(tokens) == 1
    assert len(tokens[0]) == 5
    assert tokens[0] == 'EEEEE'

    generated_morse_sentence, stats, tokens = generate_random_morse_sentence(8, signs='E', chunk_size=5)
    print(tokens)
    assert len(tokens) == 2
    assert len(tokens[0]) == 3
    assert len(tokens[1]) == 5
    assert tokens[0] == 'EEE'
    assert tokens[1] == 'EEEEE'

    generated_morse_sentence, stats, tokens = generate_random_morse_sentence(8, chunk_size=2)
    print(tokens)
    assert len(tokens) == 4
    assert len(tokens[0]) == 2
    assert tokens[0] == 'YO'

    generated_morse_sentence, stats, tokens = generate_random_morse_sentence(40)
    print(tokens)
    assert len(tokens) == 7
    assert tokens[0] == 'ACZSBV'


### size of the sentence

In [120]:
#sentence_size = 20
sentence_size = 800
verbose = False

https://jakevdp.github.io/PythonDataScienceHandbook/01.07-timing-and-profiling.html

In [73]:
##%load_ext line_profiler

### long string - 1-char word - 1 option - stackoverflow

assume count for 1 word is 1

In [86]:
morse_sentence, stats, tokens = generate_random_morse_sentence(sentence_size, signs='E')
print(f'stats {stats}')
print(f'length {len(morse_sentence)}')
words = ['E'] 

start = time.perf_counter() 
%prun res = count_runner(morse_sentence, words, verbose=verbose)
##%lprun -f count_runner res = count_runner(morse_sentence, words, verbose=verbose)
stop = time.perf_counter()
print(f"duration {stop-start}", file=sys.stderr, flush=True)

print(f'count: {res}')

assert res == 1

loop:51
duration 0.003133666999929119


stats {'E': 50}
length 50
nb words 1
nb encoded words 1
nb possible words 1
 count: 1


### long string - 2 1-char words - multiple options - stackoverflow

In [87]:
morse_sentence, stats, tokens = generate_random_morse_sentence(sentence_size, signs='ET')
print(f'stats {stats}')
print(f'length {len(morse_sentence)}')
words = ['E', 'T'] 

start = time.perf_counter()
%prun res = count_runner(morse_sentence, words, verbose=verbose)
stop = time.perf_counter()
print(f"duration {stop-start}", file=sys.stderr, flush=True)

print(f'count: {res}')

assert res == 1

loop:51
duration 0.00335969700017813


stats {'E': 34, 'T': 16}
length 50
nb words 2
nb encoded words 2
nb possible words 2
 count: 1


### long string - few words - multiple options

issue = a large number of words ':' -> execeed recursion limit

In [98]:
morse_sentence, stats, tokens = generate_random_morse_sentence(sentence_size, signs='E', chunk_size=5)
print(f'stats {stats}')
print(f'length {len(morse_sentence)}')
words = tokens
print(f'nb words {len(words)}')

start = time.perf_counter()
%prun res = count_runner(morse_sentence, words, verbose=verbose)
stop = time.perf_counter()
print(f"duration {stop-start}", file=sys.stderr, flush=True)

print(f'count: {res}')

assert res >= 1 # 1 si multiple 2 ou 4 sinon

loop:81
duration 0.004060274000039499


stats {'E': 400}
length 400
nb words 1
nb words 1
nb encoded words 1
nb possible words 1
 count: 1


### long string - more words 

In [121]:
morse_sentence, stats, tokens = generate_random_morse_sentence(sentence_size)
print(f'stats {stats}')
print(f'length {len(morse_sentence)}')
words = tokens
print(f'nb words {len(words)}')

start = time.perf_counter()
%prun res = count_runner(morse_sentence, words, verbose=verbose)
stop = time.perf_counter()
print(f"total duration {stop-start}", file=sys.stderr, flush=True)

print(f'count: {res}')

assert res >= 1

loop:139
total duration 0.026160770999922534


stats {'A': 29, 'B': 25, 'C': 38, 'D': 20, 'E': 32, 'F': 31, 'G': 30, 'H': 26, 'I': 31, 'J': 30, 'K': 34, 'L': 32, 'M': 31, 'N': 31, 'O': 32, 'P': 35, 'Q': 37, 'R': 30, 'S': 42, 'T': 31, 'U': 18, 'V': 38, 'W': 33, 'X': 18, 'Y': 34, 'Z': 32}
length 2528
nb words 134
nb words 134
nb encoded words 134
nb possible words 134
 count: 1


### long string - few words 

In [90]:
morse_sentence, stats, tokens = generate_random_morse_sentence(sentence_size, signs='ET')
print(f'stats {stats}')
print(f'length {len(morse_sentence)}')
words = tokens
print(f'nb words {len(words)}')

start = time.perf_counter()
%prun res = count_runner(morse_sentence, words, verbose=verbose)
stop = time.perf_counter()
print(f"duration {stop-start}", file=sys.stderr, flush=True)

print(f'count: {res}')

assert res >= 1

loop:16
duration 0.004061856000134867


stats {'E': 34, 'T': 16}
length 50
nb words 8
nb words 8
nb encoded words 8
nb possible words 8
 count: 2


### long sentence - lots of permutations

In [92]:
morse_sentence, stats, tokens = generate_random_morse_sentence(sentence_size, signs='EISH')
print(f'stats {stats}')
print(f'length {len(morse_sentence)}')
words = tokens
print(f'nb words {len(words)}')

start = time.perf_counter()
verbose = False
%prun res = count_runner(morse_sentence, words, verbose=verbose)
stop = time.perf_counter()
print(f"duration {stop-start}", file=sys.stderr, flush=True)

print(f'count: {res}')

assert res >= 1

stats {'E': 23, 'I': 11, 'S': 7, 'H': 9}
length 102
nb words 9
nb words 9
nb encoded words 7
nb possible words 7


KeyboardInterrupt: 

# -------------------------------------

## long sentence - with repeating pattern

In [None]:
morse_sentence = '.-.-.-.-.-.-.-.-' # ETETETETETETETET
words = ['E', 'T'] 

start = time.perf_counter()
res = count_runner(morse_sentence, words, verbose=True)
stop = time.perf_counter()
print(f"duration {stop-start}", file=sys.stderr, flush=True)

print(f'count: {res}')

assert res == 1 # HELLO WORLD, HELL OWORLD

# -------------------------------------

# many words

In [None]:
import random
def generate_words(nb, max_length=20, signs=None, seed=1234):
    random.seed(seed)
    words = []
    
    for i in range(nb):
        current_token = []
        size = random.randint(0, 4) + random.randint(0, max_length)

        #if not signs: signs = list(alphabet_map.keys())
        #max_sign = len(signs) -1
        #for s in signs:
        #    stats[s] = 0
        for i in range(size):
            letter = random.randint(0, 25)
            current_token.append('ABCDEFGHIJKLMNOPQRSTUVWXYZ'[letter])
            #stats[letter] += 1
        words.append(''.join(current_token))
         
    return words

In [None]:
alphabet_map = morse.get_encoded_letters_map()
nb = 10000

start = time.perf_counter()
words = generate_words(nb, seed=1234)
stop = time.perf_counter()
print(f"duration {stop-start}", file=sys.stderr, flush=True)

assert len(words) == nb
print(words[0])

In [None]:
start = time.perf_counter()
lengths = {w:len(w) for w in words}
stop = time.perf_counter()
print(f"duration {stop-start}", file=sys.stderr, flush=True)

In [None]:
start = time.perf_counter()
for w in words:
    n = lengths[w]
stop = time.perf_counter()
print(f"duration {stop-start}", file=sys.stderr, flush=True)