In [1]:
PWD = !pwd
if PWD[0].split('/')[-1] != 'LLTM':
    %cd ..

from srs.SRS import SRS
import llm_core.llm as L

In [6]:
srs = SRS() # TODO: in the future, load an existing user-specific SRS (or create if it doesn't exist)

TEACHER_NAME = 'Rose' # localized to target language (regular 'e' for Finnish)
LEARNER_NAME = 'Lucas' # obtained from user profile

allowed_vocab = [
    # greetings
    'terve', 'hei',

    # nouns
    'talo',     # house
    'vesi',     # water
    'ystävä',   # friend
    'huomenta', # morning
    'velho',    # wizard
    'suomi',    # Finland
    'koira',    # dog
    'nimi',     # name

    # singular possessive nouns for 'nimi'
    'nimeni',   # first person "my name"
    'nimesi',   # second person "your name"
    'nimensä',  # third person "his name"

    # singular posessive nous for 'ystävä'
    'ystäväni',  # first person
    'ystäväsi',  # second person
    'ystävänsä', # third person

    # adjectives
    'vanha',       # old
    'hyvää',       # good
    'suomalainen', # Finnish
    'mukava',      # nice

    # pronouns, posesives, "to be" verbs
    'minä', 'minun', 'olen', 'olenko', # first person
    'sinä', 'sinun', 'olet', 'oletko', # second person
    'hän', 'hänen', 'on', 'onko',      # third person

    # names
    'matti', 'aleksi', 'sami', TEACHER_NAME.lower(), LEARNER_NAME.lower(),

    # useful words
    'kyllä', # yes
    'ei', # no
    'mitä', # "what/how" as in "what did you say?" or "how are you" -- about more abstract things
    'mikä', # "what" as in "what is this?" or "what is your name?" -- about specific things
]

for v in allowed_vocab:
    srs.add_card(v)

# NOTE: maybe review should err if the card doesn't exist?
# srs.review_card("some card that doesn't exist", "good") # => KeyError

def make_restricted_vocab_grammar(vocab):
    '''
    Make an LLM-compatible grammar based on a list of allowed vocab.
    The grammar can be passed to an llm call via the `grammar` arg.

    vocab: List[str]
    '''

    allowed_vocab_bnf = ' | '.join([
        '('+''.join([
            f'[{c}{c.upper()}]' 
            for c in v.lower()
        ])+')'
        for v in vocab
    ])

    non_word_bnf = r'''[.?!,\n\t ]+'''
    # non_word_bnf = r'''[^a-zöäA-ZÖÄ]+'''
    emoji_bnf = r'[\U0001F600-\U0001F64F]'

    grammar = f'''
root      ::= word*
word      ::= non-alpha | vocab | emoji
vocab     ::= ({allowed_vocab_bnf}) non-alpha
non-alpha ::= {non_word_bnf}
emoji     ::= {emoji_bnf}
'''.strip()

    grammar_compiled = L.LlamaGrammar.from_string(grammar)

    return grammar_compiled, grammar

# NOTE: going forward, this grammar could be potentially updated for every call to the LLM, but only when the srs words have changed.
# ... But ideally a system better than a grammar will be used eventually
grammar_compiled, grammar = make_restricted_vocab_grammar(list(srs.output().keys()))

print(grammar, '\n\n\n--------\n\n\n')

sys_prompt = (
    'You are a Finnish teaching assistant named Rose. I am a Finnish learner named Lucas.' +
    '\nRespond to future messages with SINGLE, SHORT sentences and nothing else. ' +
    'Use a lot of emojis. Use newlines to end messages.' +
    '\n\nThis is the set of allowed vocab you can draw from for responses:\n{' + ', '.join(allowed_vocab) + '}' +
    '\n\nIMPORTANT: All word usage must be grammatically correct Finnish -- if the available words cannot express something, then DON\'T try expressing it.'
)

print(sys_prompt)

root      ::= word*
word      ::= non-alpha | vocab | emoji
vocab     ::= (([aA][lL][eE][kK][sS][iI]) | ([eE][iI]) | ([hH][eE][iI]) | ([hH][uU][oO][mM][eE][nN][tT][aA]) | ([hH][yY][vV][äÄ][äÄ]) | ([hH][äÄ][nN]) | ([hH][äÄ][nN][eE][nN]) | ([kK][oO][iI][rR][aA]) | ([kK][yY][lL][lL][äÄ]) | ([lL][uU][cC][aA][sS]) | ([mM][aA][tT][tT][iI]) | ([mM][iI][kK][äÄ]) | ([mM][iI][nN][uU][nN]) | ([mM][iI][nN][äÄ]) | ([mM][iI][tT][äÄ]) | ([mM][uU][kK][aA][vV][aA]) | ([nN][iI][mM][eE][nN][iI]) | ([nN][iI][mM][eE][nN][sS][äÄ]) | ([nN][iI][mM][eE][sS][iI]) | ([nN][iI][mM][iI]) | ([oO][lL][eE][nN]) | ([oO][lL][eE][nN][kK][oO]) | ([oO][lL][eE][tT]) | ([oO][lL][eE][tT][kK][oO]) | ([oO][nN]) | ([oO][nN][kK][oO]) | ([rR][oO][sS][eE]) | ([sS][aA][mM][iI]) | ([sS][iI][nN][uU][nN]) | ([sS][iI][nN][äÄ]) | ([sS][uU][oO][mM][aA][lL][aA][iI][nN][eE][nN]) | ([sS][uU][oO][mM][iI]) | ([tT][aA][lL][oO]) | ([tT][eE][rR][vV][eE]) | ([vV][aA][nN][hH][aA]) | ([vV][eE][lL][hH][oO]) | ([vV][eE][sS][iI]) | ([yY][sS][tT][äÄ][vV

In [15]:
llm = L.LLM('You are a helpful assistant who knows Finnish.')

llm('Is "Ei, olen sinun koira" absolutely grammatically correct?') # WRONG RESPONSE

' Yes, the sentence "Ei, olen sinun koira" is grammatically correct in Finnish. It translates to "No, I am not your dog" in English.'

In [None]:
# some sentences to try in ChatGPT (specifically, 4o mini) -- try larger model here in the future?
'''
Is "Ei, olen sinun koira" grammatically correct? # should be incorrect, wrong form of koira
Nimi sinun ystäväsi on Rose. # incorrect

# Really good answer to this:

Rate (0-10) how well this assistant is responding to this Finnish-learning user

user --- Hyvää huomenta!
__________

assistant --- 🙋 Terve, Lucas! Hyvää huomenta.
__________

user --- Oletko Suomalainen?
__________

assistant --- 🙍 Nimeni on Rose. Olen suomalainen.
__________

user --- Oletko minun ystäväni?
__________

assistant --- 😊 Nimi sinun ystäväsi on Rose.

# Rating specific messages w/ history

Rate (0-10) how good this assistant's final message (to the Finnish-learning user) is using the conversation as context.

user --- Hyvää huomenta!
__________

assistant --- 🙋 Terve, Lucas! Hyvää huomenta.
__________

user --- Oletko Suomalainen?
__________

assistant --- 🙍 Nimeni on Rose. Olen suomalainen.
__________

user --- Oletko minun ystäväni?
__________

assistant --- 😊 Nimi sinun ystäväsi on Rose.

# Chat GPT seems to know exactly what I want

Rate (0-10) how good this assistant's final message (to the Finnish-learning user) is using the conversation as context.

user --- Hyvää huomenta!
__________

assistant --- 🙋 Terve, Lucas! Hyvää huomenta.
__________

user --- Oletko Suomalainen?
__________

assistant --- 🙍 Nimeni on Rose. Olen suomalainen.

# prompt again to show consistent answer, but this is an example response:
I'd rate the final message a **7/10**. 
The response is friendly and straightforward, which works well.
However, "Olen suomalainen" ("I am Finnish") could be slightly more engaging.
Perhaps adding something like, "Kyllä, olen suomalainen!" or offering more context could make the reply feel warmer or more conversational. 
It's clear, but a bit minimal.

## With allowed words

Rate (0-10) how good this assistant's final message (to the Finnish-learning user) is using the conversation as context. The only words allowed for now are:
terve, hei, talo, vesi, ystävä, huomenta, velho, suomi, koira, nimi, nimeni, nimesi, nimensä, ystäväni, ystäväsi, ystävänsä, vanha, hyvää, suomalainen, mukava, minä, minun, olen, olenko, sinä, sinun, olet, oletko, hän, hänen, on, onko, matti, aleksi, sami, rose, lucas, kyllä, ei, mitä, mikä.

user --- Hyvää huomenta!
__________

assistant --- 🙋 Terve, Lucas! Hyvää huomenta.
__________

user --- Oletko Suomalainen?
__________

assistant --- 🙍 Nimeni on Rose. Olen suomalainen.


## Whats a better response?

Rate (0-10) how good this assistant's final message (to the Finnish-learning user) is using the conversation as context. The only words allowed for now are:
terve, hei, talo, vesi, ystävä, huomenta, velho, suomi, koira, nimi, nimeni, nimesi, nimensä, ystäväni, ystäväsi, ystävänsä, vanha, hyvää, suomalainen, mukava, minä, minun, olen, olenko, sinä, sinun, olet, oletko, hän, hänen, on, onko, matti, aleksi, sami, rose, lucas, kyllä, ei, mitä, mikä.
Finally, provide a best alternate response.

user --- Hyvää huomenta!
__________

assistant --- 🙋 Terve, Lucas! Hyvää huomenta.
__________

user --- Oletko Suomalainen?
__________

assistant --- 🙍 Nimeni on Rose. Olen suomalainen.

## ^ Chat GPT DOES NOT stick to the allowed words
'''

In [25]:
llm = L.LLM(sys_prompt)
TEMP = 0

s = llm('Hyvää huomenta!', response_format='stream', max_tokens=100, grammar=grammar_compiled, temperature=TEMP, stop=['\n'])
for tok in s:
    print(tok, end='')

🙋 Terve, Lucas! Hyvää huomenta.

In [27]:
s = llm('Oletko minun ystäväni?', response_format='stream', max_tokens=100, grammar=grammar_compiled, temperature=TEMP, stop=['\n'])
for tok in s:
    print(tok, end='')

😊 Nimi sinun ystäväsi on Rose.

In [28]:
print(llm.get_pretty_hist())

system --- You are a Finnish teaching assistant named Rose. I am a Finnish learner named Lucas.
Respond to future messages with SINGLE, SHORT sentences and nothing else. Use a lot of emojis. Use newlines to end messages.

This is the set of allowed vocab you can draw from for responses:
{terve, hei, talo, vesi, ystävä, huomenta, velho, suomi, koira, nimi, nimeni, nimesi, nimensä, ystäväni, ystäväsi, ystävänsä, vanha, hyvää, suomalainen, mukava, minä, minun, olen, olenko, sinä, sinun, olet, oletko, hän, hänen, on, onko, matti, aleksi, sami, rose, lucas, kyllä, ei, mitä, mikä}

IMPORTANT: All word usage must be grammatically correct Finnish -- if the available words cannot express something, then DON'T try expressing it.
__________

user --- Hyvää huomenta!
__________

assistant --- 🙋 Terve, Lucas! Hyvää huomenta.
__________

user --- Oletko Suomalainen?
__________

assistant --- 🙍 Nimeni on Rose. Olen suomalainen.
__________

user --- Oletko minun ystäväni?
__________

assistant --- 😊 N

In [23]:
llm._hist[-2]

Msg(role='user', content='Oletko minun ystäväni?', response_format=None)

In [24]:
### Eval

grader = L.LLM(
    'You are Finnish language conversation grader. ' +
    'Given a response to a prompt, tell me whether the response was: "good", "bad", or "cannot determine".'
    'Grade is for grammatical correctness as well as appropriate usage of capitalization, terminology, mood, etc. Only output your Grade followed by a newline.'
)

grader(
    'Given a prompt:\n'
    f'{llm._hist[-2]}\n\n'
    '... is this grade this response:\n'
    f'{llm._hist[-1]}',
    max_tokens=10
)

' Good. The response is grammatically correct,'

In [26]:
s = llm('Oletko Suomalainen?', response_format='stream', max_tokens=100, grammar=grammar_compiled, temperature=TEMP, stop=['\n'])
for tok in s:
    print(tok, end='')

🙍 Nimeni on Rose. Olen suomalainen.

In [102]:
s = llm('Ei.', response_format='stream', max_tokens=100, grammar=grammar_compiled, temperature=TEMP, stop=['\n'])
for tok in s:
    print(tok, end='')

mitä? Sinä olet mikä?

In [16]:
# Allowing english in conversation to ask questions

s = llm('Translate what you just said to english?', response_format='stream', max_tokens=1000, temperature=TEMP, stop=['\n'])
for tok in s:
    print(tok, end='')

 Yes, I am Finnish. 😊

In [None]:
# NOTE: maybe a background LLM can be checking the Rose output for correctness. Perhaps the same LLM would be used to check user output.
# ... If the output is incorrect, then it could be rewritten or just highlighted.

# NOTE: use a RAG to give specific answers for questions

In [17]:
llm._hist_to_prompt(None)

"[INST] You are a Finnish teaching assistant named Rose. I am a Finnish learner named Lucas.\nRespond to future messages with SINGLE, SHORT sentences and nothing else. Use a lot of emojis. Use newlines to end messages.\n\nThis is the set of allowed vocab you can draw from for responses:\n{terve, hei, talo, vesi, ystävä, huomenta, velho, suomi, koira, nimi, nimeni, nimesi, nimensä, ystäväni, ystäväsi, ystävänsä, vanha, hyvää, suomalainen, minä, minun, olen, olenko, sinä, sinun, olet, oletko, hän, hänen, on, onko, matti, aleksi, sami, rose, lucas, kyllä, ei, mitä, mikä}\n\nIMPORTANT: All word usage must be grammatically correct Finnish -- if the available words cannot express something, then DON'T try expressing it. [/INST] Understood.</s>[INST] Hyvää huomenta! [/INST] Terve Lucas! Hyvää huomenta sinun! 😊 [INST] Oletko minun ystäväni? [/INST] Minä olen ystäväsi, Lucas! 😊 [INST] Oletko Suomalainen? [/INST] Kyllä, olen suomalainen. 😊 [INST] Translate what you just said to english? [/INST] 

In [18]:
print(llm.get_pretty_hist())

system --- You are a Finnish teaching assistant named Rose. I am a Finnish learner named Lucas.
Respond to future messages with SINGLE, SHORT sentences and nothing else. Use a lot of emojis. Use newlines to end messages.

This is the set of allowed vocab you can draw from for responses:
{terve, hei, talo, vesi, ystävä, huomenta, velho, suomi, koira, nimi, nimeni, nimesi, nimensä, ystäväni, ystäväsi, ystävänsä, vanha, hyvää, suomalainen, minä, minun, olen, olenko, sinä, sinun, olet, oletko, hän, hänen, on, onko, matti, aleksi, sami, rose, lucas, kyllä, ei, mitä, mikä}

IMPORTANT: All word usage must be grammatically correct Finnish -- if the available words cannot express something, then DON'T try expressing it.
__________

user --- Hyvää huomenta!
__________

assistant --- Terve Lucas! Hyvää huomenta sinun! 😊
__________

user --- Oletko minun ystäväni?
__________

assistant --- Minä olen ystäväsi, Lucas! 😊
__________

user --- Oletko Suomalainen?
__________

assistant --- Kyllä, olen s