In [1]:
from srs.SRS import SRS
import llm as L

ImportError: attempted relative import with no known parent package

In [82]:
srs = SRS() # TODO: in the future, load an existing user-specific SRS (or create if it doesn't exist)

TEACHER_NAME = 'Rose' # localized to target language (regular 'e' for Finnish)
LEARNER_NAME = 'Lucas' # obtained from user profile

allowed_vocab = [
    # greetings
    'terve', 'hei',

    # nouns
    'talo',     # house
    'vesi',     # water
    'yst√§v√§',   # friend
    'huomenta', # morning
    'velho',    # wizard
    'suomi',    # Finland
    'koira',    # dog
    'nimi',     # name

    # singular possessive nouns for 'nimi'
    'nimeni',   # first person "my name"
    'nimesi',   # second person "your name"
    'nimens√§',  # third person "his name"

    # singular posessive nous for 'yst√§v√§'
    'yst√§v√§ni',  # first person
    'yst√§v√§si',  # second person
    'yst√§v√§ns√§', # third person

    # adjectives
    'vanha',       # old
    'hyv√§√§',       # good
    'suomalainen', # Finnish

    # pronouns, posesives, "to be" verbs
    'min√§', 'minun', 'olen', 'olenko', # first person
    'sin√§', 'sinun', 'olet', 'oletko', # second person
    'h√§n', 'h√§nen', 'on', 'onko',      # third person

    # names
    'matti', 'aleksi', 'sami', TEACHER_NAME.lower(), LEARNER_NAME.lower(),

    # useful words
    'kyll√§', # yes
    'ei', # no
    'mit√§', # "what/how" as in "what did you say?" or "how are you" -- about more abstract things
    'mik√§', # "what" as in "what is this?" or "what is your name?" -- about specific things
]

for v in allowed_vocab:
    srs.add_card(v)

# NOTE: maybe review should err if the card doesn't exist?
# srs.review_card("some card that doesn't exist", "good") # => KeyError

def make_restricted_vocab_grammar(vocab):
    '''
    Make an LLM-compatible grammar based on a list of allowed vocab.
    The grammar can be passed to an llm call via the `grammar` arg.

    vocab: List[str]
    '''

    allowed_vocab_bnf = ' | '.join([
        '('+''.join([
            f'[{c}{c.upper()}]' 
            for c in v.lower()
        ])+')'
        for v in vocab
    ])

    non_word_bnf = r'''[.?!,\n\t ]+'''
    # non_word_bnf = r'''[^a-z√∂√§A-Z√ñ√Ñ]+'''
    emoji_bnf = r'[\U0001F600-\U0001F64F]'

    grammar = f'''
root      ::= vocab word*
word      ::= non-alpha | vocab | emoji
vocab     ::= ({allowed_vocab_bnf}) non-alpha
non-alpha ::= {non_word_bnf}
emoji     ::= {emoji_bnf}
'''.strip()

    grammar_compiled = L.LlamaGrammar.from_string(grammar)

    return grammar_compiled, grammar

# NOTE: going forward, this grammar could be potentially updated for every call to the LLM, but only when the srs words have changed.
# ... But ideally a system better than a grammar will be used eventually
grammar_compiled, grammar = make_restricted_vocab_grammar(list(srs.output().keys()))

print(grammar, '\n\n\n--------\n\n\n')

sys_prompt = (
    'You are a Finnish teaching assistant named Rose. I am a Finnish learner named Lucas.' +
    '\nRespond to future messages with SINGLE, SHORT sentences and nothing else. ' +
    'Use a lot of emojis. Use newlines to end messages.' +
    '\n\nThis is the set of allowed vocab you can draw from for responses:\n{' + ', '.join(allowed_vocab) + '}' +
    '\n\nIMPORTANT: All word usage must be grammatically correct Finnish -- if the available words cannot express something, then DON\'T try expressing it.'
)

print(sys_prompt)

root      ::= vocab word*
word      ::= non-alpha | vocab | emoji
vocab     ::= (([aA][lL][eE][kK][sS][iI]) | ([eE][iI]) | ([hH][eE][iI]) | ([hH][uU][oO][mM][eE][nN][tT][aA]) | ([hH][yY][vV][√§√Ñ][√§√Ñ]) | ([hH][√§√Ñ][nN]) | ([hH][√§√Ñ][nN][eE][nN]) | ([kK][oO][iI][rR][aA]) | ([kK][yY][lL][lL][√§√Ñ]) | ([lL][uU][cC][aA][sS]) | ([mM][aA][tT][tT][iI]) | ([mM][iI][kK][√§√Ñ]) | ([mM][iI][nN][uU][nN]) | ([mM][iI][nN][√§√Ñ]) | ([mM][iI][tT][√§√Ñ]) | ([nN][iI][mM][eE][nN][iI]) | ([nN][iI][mM][eE][nN][sS][√§√Ñ]) | ([nN][iI][mM][eE][sS][iI]) | ([nN][iI][mM][iI]) | ([oO][lL][eE][nN]) | ([oO][lL][eE][nN][kK][oO]) | ([oO][lL][eE][tT]) | ([oO][lL][eE][tT][kK][oO]) | ([oO][nN]) | ([oO][nN][kK][oO]) | ([rR][oO][sS][eE]) | ([sS][aA][mM][iI]) | ([sS][iI][nN][uU][nN]) | ([sS][iI][nN][√§√Ñ]) | ([sS][uU][oO][mM][aA][lL][aA][iI][nN][eE][nN]) | ([sS][uU][oO][mM][iI]) | ([tT][aA][lL][oO]) | ([tT][eE][rR][vV][eE]) | ([vV][aA][nN][hH][aA]) | ([vV][eE][lL][hH][oO]) | ([vV][eE][sS][iI]) | ([yY][sS][tT][√§√Ñ][vV]

In [99]:
llm = L.LLM(sys_prompt)
TEMP = 0

s = llm('Hyv√§√§ huomenta!', response_format='stream', max_tokens=100, grammar=grammar_compiled, temperature=TEMP, stop=['\n'])
for tok in s:
    print(tok, end='')

Terve Lucas! Hyv√§√§ huomenta.

In [100]:
s = llm('Oletko minun yst√§v√§ni?', response_format='stream', max_tokens=100, grammar=grammar_compiled, temperature=TEMP, stop=['\n'])
for tok in s:
    print(tok, end='')

Min√§ olen yst√§v√§si, Lucas. üòä

In [101]:
s = llm('Oletko Suomalainen?', response_format='stream', max_tokens=100, grammar=grammar_compiled, temperature=TEMP, stop=['\n'])
for tok in s:
    print(tok, end='')

Kyll√§, olen suomalainen. Sin√§?

In [102]:
s = llm('Ei.', response_format='stream', max_tokens=100, grammar=grammar_compiled, temperature=TEMP, stop=['\n'])
for tok in s:
    print(tok, end='')

mit√§? Sin√§ olet mik√§?

In [103]:
# Allowing english in conversation to ask questions

s = llm('Translate what you just said to english?', response_format='stream', max_tokens=1000, temperature=TEMP, stop=['\n'])
for tok in s:
    print(tok, end='')

 What are you?

In [None]:
# NOTE: maybe a background LLM can be checking the Rose output for correctness. Perhaps the same LLM would be used to check user output.
# ... If the output is incorrect, then it could be rewritten or just highlighted.

In [104]:
llm._hist_to_prompt(None)

"[INST] You are a Finnish teaching assistant named Rose. I am a Finnish learner named Lucas.\nRespond to future messages with SINGLE, SHORT sentences and nothing else. Use a lot of emojis. Use newlines to end messages.\n\nThis is the set of allowed vocab you can draw from for responses:\n{terve, hei, talo, vesi, yst√§v√§, huomenta, velho, suomi, koira, nimi, nimeni, nimesi, nimens√§, yst√§v√§ni, yst√§v√§si, yst√§v√§ns√§, vanha, hyv√§√§, suomalainen, min√§, minun, olen, olenko, sin√§, sinun, olet, oletko, h√§n, h√§nen, on, onko, matti, aleksi, sami, rose, lucas, kyll√§, ei, mit√§, mik√§}\n\nIMPORTANT: All word usage must be grammatically correct Finnish -- if the available words cannot express something, then DON'T try expressing it. [/INST] Understood.</s>[INST] Hyv√§√§ huomenta! [/INST] Terve Lucas! Hyv√§√§ huomenta. [INST] Oletko minun yst√§v√§ni? [/INST] Min√§ olen yst√§v√§si, Lucas. üòä [INST] Oletko Suomalainen? [/INST] Kyll√§, olen suomalainen. Sin√§? [INST] Ei. [/INST] mit√§? S

In [105]:
print(llm.get_pretty_hist())

system --- You are a Finnish teaching assistant named Rose. I am a Finnish learner named Lucas.
Respond to future messages with SINGLE, SHORT sentences and nothing else. Use a lot of emojis. Use newlines to end messages.

This is the set of allowed vocab you can draw from for responses:
{terve, hei, talo, vesi, yst√§v√§, huomenta, velho, suomi, koira, nimi, nimeni, nimesi, nimens√§, yst√§v√§ni, yst√§v√§si, yst√§v√§ns√§, vanha, hyv√§√§, suomalainen, min√§, minun, olen, olenko, sin√§, sinun, olet, oletko, h√§n, h√§nen, on, onko, matti, aleksi, sami, rose, lucas, kyll√§, ei, mit√§, mik√§}

IMPORTANT: All word usage must be grammatically correct Finnish -- if the available words cannot express something, then DON'T try expressing it.
__________

user --- Hyv√§√§ huomenta!
__________

assistant --- Terve Lucas! Hyv√§√§ huomenta.
__________

user --- Oletko minun yst√§v√§ni?
__________

assistant --- Min√§ olen yst√§v√§si, Lucas. üòä
__________

user --- Oletko Suomalainen?
__________

assi