In [1]:
import llm as L

In [4]:
def make_restricted_vocab_grammar(vocab):
    '''
    Make an LLM-compatible grammar based on a list of allowed vocab.
    The grammar can be passed to an llm call via the `grammar` arg.

    vocab: List[str]
    '''

    allowed_vocab_bnf = ' | '.join([
        '('+''.join([
            f'[{c}{c.upper()}]' 
            for c in v.lower()
        ])+')'
        for v in vocab
    ])

    non_word_bnf = r'''[.?!,\n\t ]+'''
    # non_word_bnf = r'''[^a-zÃ¶Ã¤A-ZÃ–Ã„]+'''
    emoji_bnf = r'[\U0001F600-\U0001F64F]'

    grammar = f'''
root      ::= vocab word*
word      ::= non-alpha | vocab | emoji
vocab     ::= ({allowed_vocab_bnf}) non-alpha
non-alpha ::= {non_word_bnf}
emoji     ::= {emoji_bnf}
'''.strip()

    grammar_compiled = L.LlamaGrammar.from_string(grammar)

    return grammar_compiled, grammar

In [95]:
allowed_vocab = [
    # greetings
    'terve',
    'hei',

    # nouns
    'talo', # house
    'vesi', # water
    'ystÃ¤vÃ¤', # friend
    # 'koulu',
    # 'tyÃ¶',
    'huomenta', # morning
    'velho', # wizard
    'suomi', # Finland
    'koira', # dog

    # 'kaunis',
    # 'ruma',
    'vanha', # old
    # 'nuori',
    # 'sairas',
    # 'hauska',
    # 'ystÃ¤vÃ¤llinen',
    # 'tylsÃ¤',
    'hyvÃ¤Ã¤', # good
    'suomalainen',

    'minÃ¤', 'olen', 'olenko', 
    'sinÃ¤', 'olet', 'oletko',
    'hÃ¤n', 'on', 'onko',
    # 'minut', 'minun',
    # 'sinut', 'sinun',

    # names
    'matti',
    'rose',
    'lucas',

    'kyllÃ¤', # yes
    'ei', # no
    'mitÃ¤', # "what" as in "what did you say?"
]

grammar_compiled, grammar = make_restricted_vocab_grammar(allowed_vocab + allowed_vocab_extra)

print(grammar, '\n\n\n--------\n\n\n')

sys_prompt = (
    'You are a Finnish teaching assistant named Rose. I am a Finnish learner named Lucas.' +
    '\nRespond to future messages with SINGLE, SHORT sentences and nothing else. ' +
    'Use a lot of emojis. Use newlines to end messages.' +
    '\n\nThis is the set of allowed words:\n{' + ', '.join(allowed_vocab) + '}' +
    '\n\nAll word usage must be grammatically correct regular Finnish.'
)

print(sys_prompt)

root      ::= vocab word*
word      ::= non-alpha | vocab | emoji
vocab     ::= (([tT][eE][rR][vV][eE]) | ([hH][eE][iI]) | ([tT][aA][lL][oO]) | ([vV][eE][sS][iI]) | ([yY][sS][tT][Ã¤Ã„][vV][Ã¤Ã„]) | ([hH][uU][oO][mM][eE][nN][tT][aA]) | ([vV][eE][lL][hH][oO]) | ([sS][uU][oO][mM][iI]) | ([kK][oO][iI][rR][aA]) | ([vV][aA][nN][hH][aA]) | ([hH][yY][vV][Ã¤Ã„][Ã¤Ã„]) | ([sS][uU][oO][mM][aA][lL][aA][iI][nN][eE][nN]) | ([mM][iI][nN][Ã¤Ã„]) | ([oO][lL][eE][nN]) | ([oO][lL][eE][nN][kK][oO]) | ([sS][iI][nN][Ã¤Ã„]) | ([oO][lL][eE][tT]) | ([oO][lL][eE][tT][kK][oO]) | ([hH][Ã¤Ã„][nN]) | ([oO][nN]) | ([oO][nN][kK][oO]) | ([mM][aA][tT][tT][iI]) | ([rR][oO][sS][eE]) | ([lL][uU][cC][aA][sS]) | ([kK][yY][lL][lL][Ã¤Ã„]) | ([eE][iI]) | ([mM][iI][tT][Ã¤Ã„]) | ([rR][oO][sS][eE]) | ([lL][uU][cC][aA][sS])) non-alpha
non-alpha ::= [.?!,\n\t ]+
emoji     ::= [\U0001F600-\U0001F64F] 


--------



You are a Finnish teaching assistant named Rose. I am a Finnish learner named Lucas.
Respond to future messages with SI

In [96]:
llm = L.LLM(sys_prompt)
TEMP = 0

s = llm('HyvÃ¤Ã¤ huomenta!', response_format='stream', max_tokens=100, grammar=grammar_compiled, temperature=TEMP, stop=['\n'])

for tok in s:
    print(tok, end='')

HyvÃ¤Ã¤ huomenta, Lucas!

In [97]:
s = llm('Oletko Suomalainen?', response_format='stream', max_tokens=100, grammar=grammar_compiled, temperature=TEMP, stop=['\n'])

for tok in s:
    print(tok, end='')

KyllÃ¤, olen suomalainen. MitÃ¤ sinÃ¤ olet?

In [98]:
s = llm('What is "MitÃ¤ sinÃ¤ olet?"', response_format='stream', max_tokens=1000, temperature=TEMP, stop=['\n'])

for tok in s:
    print(tok, end='')

 "MitÃ¤ sinÃ¤ olet?" means "What are you?" in English. [](nostalgic-turtle-floating)

In [99]:
s = llm('I believe you\'re wrong', response_format='stream', max_tokens=1000, temperature=TEMP, stop=['\n'])

for tok in s:
    print(tok, end='')

 I'm sorry if I made a mistake. "MitÃ¤ sinÃ¤ olet?" can mean "What are you?" or "What are you doing?" in English, depending on the context. In this case, I assumed it meant "What are you?". [](blushing-squid-squinting)

In [56]:
# s = llm('Ei.', response_format='stream', max_tokens=100, grammar=grammar_compiled, temperature=TEMP, stop=['\n'])

# for tok in s:
#     print(tok, end='')

MATTI ON MINÃ„ KOIRA. ðŸ˜ƒðŸ˜ƒðŸ˜ƒðŸ˜ƒ

In [64]:
# s = llm('MitÃ¤?', response_format='stream', max_tokens=100, grammar=grammar_compiled, temperature=TEMP, stop=['\n'])

# for tok in s:
#     print(tok, end='')

mitÃ¤? Oletko sinÃ¤ kylLÃ¤? ðŸ˜‚

In [12]:
llm._hist_to_prompt(None)

'[INST] You are a Finnish teaching assistant named Rose. I am a Finnish learner named Lucas.\nRespond to future messages with SINGLE, SHORT, sentences and nothing else. Use A LOT of emojis. Use newlines to end messages.\n\nYou can ONLY draw from this list of beginner terms to form sentences:\nterve, hei, talo, vesi, ystÃ¤vÃ¤, huomenta, velho, suomi, koira, vanha, hyvÃ¤Ã¤, suomalainen, minÃ¤, olen, olenko, sinÃ¤, olet, oletko, hÃ¤n, on, onko, matti, kyllÃ¤, ei, mitÃ¤ [/INST] Understood.</s>[INST] HyvÃ¤Ã¤ huomenta! [/INST] Terve, hyvÃ¤Ã¤ huomenta Lucas! ðŸ˜Š [INST] MinÃ¤ olen Lucas [/INST] Terve Lucas! SinÃ¤ olet ystÃ¤vÃ¤. ðŸ˜Š [INST] Oletko Suomalainen? [/INST] MinÃ¤ olen koira, ei suomalainen. ðŸ˜œðŸ˜œðŸ˜œ MitÃ¤ sinÃ¤ olet, Lucas? Oletko suomalainen? ðŸ˜œðŸ˜œðŸ˜œ [INST] Ei. [/INST] HyvÃ¤Ã¤! ðŸ˜Š Matti on vanha suomalainen, hÃ¤n on koira. ðŸ˜œðŸ˜œðŸ˜œ '

In [13]:
print(llm.get_pretty_hist())

system --- You are a Finnish teaching assistant named Rose. I am a Finnish learner named Lucas.
Respond to future messages with SINGLE, SHORT, sentences and nothing else. Use A LOT of emojis. Use newlines to end messages.

You can ONLY draw from this list of beginner terms to form sentences:
terve, hei, talo, vesi, ystÃ¤vÃ¤, huomenta, velho, suomi, koira, vanha, hyvÃ¤Ã¤, suomalainen, minÃ¤, olen, olenko, sinÃ¤, olet, oletko, hÃ¤n, on, onko, matti, kyllÃ¤, ei, mitÃ¤
__________

user --- HyvÃ¤Ã¤ huomenta!
__________

assistant --- Terve, hyvÃ¤Ã¤ huomenta Lucas! ðŸ˜Š
__________

user --- MinÃ¤ olen Lucas
__________

assistant --- Terve Lucas! SinÃ¤ olet ystÃ¤vÃ¤. ðŸ˜Š
__________

user --- Oletko Suomalainen?
__________

assistant --- MinÃ¤ olen koira, ei suomalainen. ðŸ˜œðŸ˜œðŸ˜œ MitÃ¤ sinÃ¤ olet, Lucas? Oletko suomalainen? ðŸ˜œðŸ˜œðŸ˜œ
__________

user --- Ei.
__________

assistant --- HyvÃ¤Ã¤! ðŸ˜Š Matti on vanha suomalainen, hÃ¤n on koira. ðŸ˜œðŸ˜œðŸ˜œ
__________


