In [None]:
srun --job-name=LLTM -G1 --pty \
    bash -c "source /data/ai_club/team_3_2024-25/team3-env-finetune/bin/activate; \
    hostname; \
    jupyter notebook \
        --ServerApp.root_dir=$(pwd) \
        --ServerApp.password='' \
        --ServerApp.open_browser=False \
        --ServerApp.allow_origin='*' \
        --ServerApp.allow_remote_access=True \
        --ServerApp.port=14321 \
        --ServerApp.ip='*'
"

srun --job-name=LLTM -G2 --nodes 1 --pty \
    bash -c "source /data/ai_club/team_3_2024-25/team3-env-finetune/bin/activate; \
    hostname; \
    jupyter notebook \
        --ServerApp.root_dir=$(pwd) \
        --ServerApp.password='' \
        --ServerApp.open_browser=False \
        --ServerApp.allow_origin='*' \
        --ServerApp.allow_remote_access=True \
        --ServerApp.port=14321 \
        --ServerApp.ip='*'
"

srun --job-name=LLTM -G3 --nodes 1 --partition dgx --pty \
    singularity exec -B/data:/data,/home:/home /data/containers/msoe-tensorflow-24.05-tf2-py3.sif \
    bash -c "source /data/ai_club/team_3_2024-25/team3-env-finetune/bin/activate; \
    hostname; \
    jupyter notebook \
        --ServerApp.root_dir=$(pwd) \
        --ServerApp.password='' \
        --ServerApp.open_browser=False \
        --ServerApp.allow_origin='*' \
        --ServerApp.allow_remote_access=True \
        --ServerApp.port=14321 \
        --ServerApp.ip='*' \
"

In [69]:
from transformers import AutoModelForCausalLM, AutoTokenizer, BitsAndBytesConfig, pipeline
import torch
import json

---

token logic dev

In [17]:
# ---- token logic dev -- load stuff ----

model_id = "meta-llama/Llama-3.3-70B-Instruct"

tokenizer = AutoTokenizer.from_pretrained(
    model_id,
    # token=TOKEN
)


In [61]:
def tokof(s, check=True):
    toks = tokenizer(s, add_special_tokens=False)['input_ids']
    if check:
        if len(toks) > 1: raise Exception(f'This is more than one tok: {toks}')
        return toks[0]
    return toks

In [62]:
tokof('ter', check=False)

[466]

In [65]:
{
    tokof('T'): {
        tokof('erve'): {},
        tokof('alo'): {}
    },
    tokof('ter'): {
        tokof('ve'): {}
    }
}

{51: {5976: {}, 12812: {}}, 466: {588: {}}}

In [102]:
tokof('huomenta.', False), tokof('huomenta', False)

([17156, 13209, 64, 13], [17156, 13209, 64])

In [118]:
vocab = [
    'terve', 'hei', 'talo', 'vesi', 'ystävä', 'huomenta', 'velho', 'suomi', 'koira', 'nimi', 'nimeni', 'nimesi', 'nimensä',
    'ystäväni', 'ystäväsi', 'ystävänsä', 'vanha', 'hyvää', 'suomalainen', 'mukava', 'minä', 'minun', 'olen', 'olenko', 'sinä', 'sinun', 'olet',
    'oletko', 'hän', 'hänen', 'on', 'onko', 'matti', 'aleksi', 'sami', 'kyllä', 'ei', 'mitä', 'mikä', 'kuka', 'rossi', 'lucas', '.', '!'
]

vocab += [v[0].upper() + v[1:] for v in vocab]
vocab += [(' '+v if v.isalpha() else v) for v in vocab]
# vocab += [v+'.' for v in vocab]

vocab = list(set(vocab))

In [124]:
trie = {}

for v in vocab:
    curr_node = trie

    toks = tokof(v, check=False)

    for tok in toks:
        tok = tokenizer.decode(tok) # FOR VISUALIZING
        if tok not in curr_node:
            curr_node[tok] = {}
        curr_node = curr_node[tok]

    curr_node[None] = {}

In [137]:
given = [' o']
given += ['len']
# given += ['ko']

allowed = trie
for tok in given:
    allowed = allowed[tok] # assume given already has valid seq

# allowed # allowed paths from given
list(allowed.keys()) # list of allowed NOW

[None, 'ko']

In [140]:
# None means, in addition to allowed NOW, also start at beginning

# but (ideally) only those starting with space or punct

[t for t in trie.keys() if t[0] == ' ' or t in ['.', '!']] +\
list(allowed.keys())

[' Hu',
 ' ro',
 ' o',
 ' y',
 ' Y',
 ' Sin',
 ' Su',
 ' On',
 '.',
 ' m',
 ' Sam',
 ' k',
 ' N',
 ' van',
 ' ale',
 ' T',
 ' O',
 ' n',
 ' hy',
 ' Van',
 ' Vel',
 ' on',
 ' ei',
 ' sami',
 ' mik',
 ' sin',
 ' Min',
 ' ter',
 ' su',
 ' matt',
 ' tal',
 ' vel',
 '!',
 ' He',
 ' Ky',
 ' Lucas',
 ' K',
 ' Matt',
 ' h',
 ' E',
 ' hä',
 ' nimi',
 ' v',
 ' hu',
 ' Rossi',
 ' mit',
 ' V',
 ' Hy',
 ' Hä',
 ' Mik',
 ' Muk',
 ' min',
 ' hei',
 ' ko',
 ' Mit',
 ' H',
 ' Ko',
 ' luc',
 ' Ale',
 None,
 'ko']

In [120]:
print(json.dumps(
    trie,
    indent=4,
    ensure_ascii=False
))

{
    " Hu": {
        "oment": {
            "a": {
                "null": 1
            }
        }
    },
    "hu": {
        "oment": {
            "a": {
                "null": 1
            }
        }
    },
    " ro": {
        "ssi": {
            "null": 1
        }
    },
    " o": {
        "let": {
            "null": 1,
            "ko": {
                "null": 1
            }
        },
        "len": {
            "null": 1,
            "ko": {
                "null": 1
            }
        }
    },
    " y": {
        "st": {
            "ä": {
                "vä": {
                    "si": {
                        "null": 1
                    },
                    "ni": {
                        "null": 1
                    },
                    "ns": {
                        "ä": {
                            "null": 1
                        }
                    },
                    "null": 1
                }
            }
        }
    },
    " Y"

</token logic dev>

---

In [None]:
# model_id = "microsoft/Phi-4-mini-instruct" # ms claims it knows finnish, but FAKE!!!
# model_id = "utter-project/EuroLLM-9B-Instruct" # not great
# model_id = "Finnish-NLP/llama-7b-finnish-instruct-v0.2" # let's try doing all processing monolingually
# model_id = "LumiOpen/Poro-34B-chat" # specifically trained in Finnish and English, but kind of big
model_id = "meta-llama/Llama-3.3-70B-Instruct"

tokenizer = AutoTokenizer.from_pretrained(
    model_id,
    # token=TOKEN
)

bnb = BitsAndBytesConfig(
    load_in_8bit=True,
    # bnb_8bit_use_double_quant=True,
    # bnb_8bit_quant_type="nf8",
    # bnb_8bit_compute_dtype=torch.bfloat16,

    llm_int8_enable_fp32_cpu_offload=True
)

# TOKEN = ''

model = AutoModelForCausalLM.from_pretrained(
    model_id,
    device_map="auto",
    low_cpu_mem_usage=True,
    # attn_implementation="flash_attention_2",
    torch_dtype=torch.bfloat16,
    # quantization_config=bnb,
    # token=TOKEN
)

In [10]:
# prompt = '''<|alku|> Sinä olet suomen kielen opettaja.
# <|ihminen|> Korjaa tämä lause:
# Tämä on minun koirani, joka on valkoinen
# <|avustaja|> Vastaus: '''

# prompt = '''<|alku|> Sinä olet suomen kielen opettaja.
# <|ihminen|> Mikä on vialla tämän lauseen kanssa?
# Tämä on minun koirasi
# <|avustaja|> '''

prompt = '''<|im_start|>system 
You are a Finnish language teacher.<|im_end|>
<|im_start|>user 
Tell me the most obvious mistake in this sentence (if it has one): "Tämä on minun koirasi"<|im_end|>
<|im_start|>assistant 
The most obvious mistake in the sentence "Tämä on minun koirani" is that it is not a'''

tokenizer.decode(
    model.generate(
        **tokenizer(prompt, return_tensors='pt').to('cuda'),
        max_new_tokens=1,
        pad_token_id=tokenizer.eos_token_id,
        eos_token_id=tokenizer.convert_tokens_to_ids("<|loppu|>"),
    )[0]
)

'<|im_start|>system \nYou are a Finnish language teacher.<|im_end|>\n<|im_start|>user \nTell me the most obvious mistake in this sentence (if it has one): "Tämä on minun koirasi"<|im_end|>\n<|im_start|>assistant \nThe most obvious mistake in the sentence "Tämä on minun koirani" is that it is not a correct'

In [3]:
vocab = [
    'terve', 'hei', 'talo', 'vesi', 'ystävä', 'huomenta', 'velho', 'suomi', 'koira', 'nimi', 'nimeni', 'nimesi', 'nimensä',
    'ystäväni', 'ystäväsi', 'ystävänsä', 'vanha', 'hyvää', 'suomalainen', 'mukava', 'minä', 'minun', 'olen', 'olenko', 'sinä', 'sinun', 'olet',
    'oletko', 'hän', 'hänen', 'on', 'onko', 'matti', 'aleksi', 'sami', 'kyllä', 'ei', 'mitä', 'mikä', 'kuka', 'rossi', 'lucas'
] + ['sinulla']

In [10]:
pipe = pipeline(
    "text-generation",
    model=model,
    tokenizer=tokenizer,
)
 
generation_args = {
    "max_new_tokens": 25,
    "return_full_text": False,
    "temperature": 0,
    "do_sample": False
}

pipe(
    [
        # {"role": "system", "content": f"You are a Finnish language teacher.\n\n'IMPORTANT: Your responses must only use words in this allowed vocab: {vocab} and any emoji/punctuation."},
        # {"role": "user", "content": "Use the words in the allowed vocab to make a Finnish sentence asking if I have something (you decide the thing)."}

        # {"role": "system", "content": f"Sinä olet suomen kielen opettaja.\n\n'IMPORTANT: TÄRKEÄÄ: Vastauksesi saa käyttää vain sanoja tästä sallitusta sanastosta: {vocab} sekä mitä tahansa emojeita tai välimerkkejä."},
        # {"role": "user", "content": "Käytä sallittuja sanoja tehdäksesi suomenkielisen kysymyksen, jossa kysyt, onko minulla jokin asia (sinä päätät, mikä se on)."}

        # {"role": "system", "content": f"You are a Finnish language teacher."},
        # {"role": "user", "content": "What are the mistakes in this sentence:\nSyön jäätelö"}
      
        # {"role": "system", "content": f"You are a Finnish language teacher."},
        # {"role": "user", "content": "What are the mistakes in this sentence:\nTämä on minun koirasi"}

        {"role": "system", "content": f"Sinä olet suomen kielen opettaja."},
        # {"role": "user", "content": "Mitkä ovat tämän lauseen virheet, jos niitä on?\nTämä on minun koirasi"}
        # {"role": "user", "content": "Mitkä ovat tämän lauseen virheet, jos niitä on?\nSyön jäätelö"}
        {"role": "user", "content": "Onko tämä lause oikein, muodollista suomea?\nSyön jäätelö"}
    ],
    **generation_args
)

Device set to use cuda:0


[{'generated_text': 'Kyllä, lauseesi on oikein ja se on muodollista suomea.'}]

---

In [1]:
import tiktoken

from openai import OpenAI

client = OpenAI(
    base_url = "http://dh-dgxh100-2.hpc.msoe.edu:8000/v1",
    api_key = "not_used"
)

In [40]:
out = client.chat.completions.create(
    model="meta/llama-3.3-70b-instruct",
    messages=[
        { "role": "system", "content": 'You are an assistant' },
        {
            "role": "user",
            "content": 'Hello. Give me an English sentence. Your response must be JSON mapping "response" to your output.',
        }
    ],
    max_tokens=1024,
    stream=True,
    temperature=0.2,
    # response_format={'type': 'json_object'}
)

for t in out:
    tok = t.choices[0].delta.content
    print(tok, end='')

{"response": "The sun is shining brightly in the clear blue sky today."}

In [5]:
vocab = [
    'terve', 'hei', 'talo', 'vesi', 'ystävä', 'huomenta', 'velho', 'suomi', 'koira', 'nimi', 'nimeni', 'nimesi', 'nimensä',
    'ystäväni', 'ystäväsi', 'ystävänsä', 'vanha', 'hyvää', 'suomalainen', 'mukava', 'minä', 'minun', 'olen', 'olenko', 'sinä', 'sinun', 'olet',
    'oletko', 'hän', 'hänen', 'on', 'onko', 'matti', 'aleksi', 'sami', 'kyllä', 'ei', 'mitä', 'mikä', 'kuka', 'rossi', 'lucas'
] + ['sinulla']

In [4]:
out = client.chat.completions.create(
    model="meta/llama-3.3-70b-instruct",
    messages=[
        { "role": "system", "content": "You are a Finnish teacher." },
        {
            "role": "user",
            "content": "hei! Kuka sina olet",
        #     "content": "What are the mistakes in this sentence:\nSyön jäätelö"
        #     # "content": "What is the most obvious mistake in this sentence (if it has one) \"Tämä on minun koirasi\"?",
        #     # "content": "What is the most obvious mistake in this sentence (if it has one) \"Tämä on minun koirani\"? Respond in JSON {'correct_sentence': str, 'breif explanation': str|'NA'}.",
        },

      
        # {"role": "system", "content": f"You are a Finnish language teacher.\n\n'IMPORTANT: Your responses must only use words in this allowed vocab: {vocab} and any emoji/punctuation."},
        # {"role": "user", "content": "Use the words in the allowed vocab to ask if I have something (you decide the thing)"}  
    ],
    max_tokens=1024,
    stream=True,
    temperature=0,
    # response_format={'type': 'json_object'}
)

# out_str = ''
for t in out:
    tok = t.choices[0].delta.content
    # if not tok: continue
    print(tok, end='')
    # out_str += tok



Hei! Minä olen suomen kielen opettaja. Opetan suomea ulkomaalaisille ja autan heitä oppimaan kaunista suomen kieltä. Miten voinkin auttaa sinua tänään? Haluaisitko harjoitella suomen kieltä tai onko sinulla jotain tiettyä aiheesta, josta haluaisit keskustella?

In [19]:
import json
json.loads(out_str+'}')

JSONDecodeError: Extra data: line 9 column 2 (char 84)

In [9]:
out = client.chat.completions.create(
    model="meta/llama-3.1-70b-instruct",
    messages=[
        { "role": "system", "content": "You are a Finnish teacher." },
        {
            "role": "user",
            "content": "What is the most obvious mistake in this sentence (if it has one) \"Tämä on minun koirani\"?",
        },
    ],
    max_tokens=100,
)

out


ChatCompletion(id='chat-224c2af44cd046ba8a2f4706dc524330', choices=[Choice(finish_reason='length', index=0, logprobs=None, message=ChatCompletionMessage(content='In Finnish, the correct sentence would be "Tämä on minun koirani" actually has one mistake that is quite common for non-native speakers.\n\nThe mistake is the use of "minun" instead of "omani". "Minun" is the genitive form of the pronoun "minä", whereas "omani" is the possessive form.\n\nSo, the corrected sentence would be: "Tämä on omani koira."\n\nHowever, it\'s worth noting', refusal=None, role='assistant', annotations=None, audio=None, function_call=None, tool_calls=None), stop_reason=None)], created=1742499135, model='meta/llama-3.1-70b-instruct', object='chat.completion', service_tier=None, system_fingerprint=None, usage=CompletionUsage(completion_tokens=100, prompt_tokens=48, total_tokens=148, completion_tokens_details=None, prompt_tokens_details=None))

In [10]:
out.choices[0].message.content

'In Finnish, the correct sentence would be "Tämä on minun koirani" actually has one mistake that is quite common for non-native speakers.\n\nThe mistake is the use of "minun" instead of "omani". "Minun" is the genitive form of the pronoun "minä", whereas "omani" is the possessive form.\n\nSo, the corrected sentence would be: "Tämä on omani koira."\n\nHowever, it\'s worth noting'

In [11]:

enc.encode("You are a Finnish teacher.").__len__() + 4 # 4 extra tokens for llama: start_header, end_header, eos, 2 newlines (part of prompt format

NameError: name 'enc' is not defined

In [3]:
from openai import OpenAI
import tiktoken
import getpass
import json
from dataclasses import dataclass

USER = getpass.getuser()

TOKEN_COUNT_PATH = None

client = OpenAI(
    base_url = "http://dh-dgxh100-2.hpc.msoe.edu:8000/v1",
    api_key = "not_used"
)

enc = tiktoken.get_encoding("cl100k_base")

# TODO REMOVE
TOKEN_COUNT_PATH = '/data/ai_club/team_3_2024-25/tokcounts2/'

def _ntoks(text):
    return enc.encode(text).__len__() + 4 # 4 extra tokens for llama: start_header, end_header, eos, 2 newlines (part of prompt format)

def _inc_tok_count(mode, amt):
    if TOKEN_COUNT_PATH is None:
        raise Exception('Set TOKEN_COUNT_PATH before infernece')
    fname = f'{USER}_{mode}.txt'
    try:
        with open(TOKEN_COUNT_PATH+fname, 'r') as f:
            count = int(f.read().strip())
    except FileNotFoundError:
        count = 0
    except ValueError:
        raise Exception(f'Token Count Corrupted: {fname}')
    
    count += amt

    with open(TOKEN_COUNT_PATH+fname, 'w') as f:
        f.write(str(count)+'\n')

@dataclass
class Msg:
    role: str
    content: str
    response_format: list = None

class LLM:
    def __init__(self, sys_prompt:str=None):
        self._hist = []
        self._awaiting_streamed = False

    def _hist_to_prompt(self):
        prompt = []
        tok_count = 0
        for msg in self._hist:
            content = msg.content
            is_last = msg == self._hist[-1]
            if msg.response_format and is_last:
                json_format = {k:'...' for k in msg.response_format}
                content += f'\n\nRespond in this json: {json_format}'
            elif msg.response_format:
                content += '\n\nRespond in JSON.'
            
            tok_count += _ntoks(content)

            prompt.append({
                'role': msg.role,
                'content': content
            })

        return prompt, tok_count

    def _call_default(self, messages, temperature, max_tokens):
        out = client.chat.completions.create(
            model="meta/llama-3.1-70b-instruct",
            messages=messages,
            max_tokens=max_tokens,
            temperature=temperature
        )
        out = out.choices[0].message.content
        out_toks = _ntoks(out)

        _inc_tok_count('out', out_toks)

        self._hist.append(Msg('assistant', out))

        return out
        
    def _call_stream(self, messages, temperature, max_tokens):
        out = client.chat.completions.create(
            model="meta/llama-3.1-70b-instruct",
            messages=messages,
            max_tokens=max_tokens,
            temperature=temperature,
            stream=True
        )

        self._hist.append(Msg('assistant', ''))
        self._awaiting_streamed = True

        def tok_stream():
            for t in out:
                tok = t.choices[0].delta.content

                if not tok: continue
                _inc_tok_count('out', 1)
                self._hist[-1].content += tok
                yield tok

            _inc_tok_count('out', 4) # 4 exta used in llama prompt format
            self._awaiting_streamed = False

        return tok_stream()

    def _call_fmted(self, messages, temperature, max_tokens, response_format):
        out = client.chat.completions.create(
            model="meta/llama-3.1-70b-instruct",
            messages=messages,
            max_tokens=max_tokens,
            temperature=temperature,
            response_format={'type': 'json_object'}   
        )
        out = out.choices[0].message.content
        out_toks = _ntoks(out)
        _inc_tok_count('out', out_toks)
        
        try:
            out = json.loads(out)
        except:
            raise Exception(f'Bad JSON output. {out} != {resposne_format}')

        if not all(k in out.keys() for k in response_format):
            raise Exception(f'Missing json keys. {out.keys()} != {response_format}')

        return out

    def __call__(self, prompt, response_format:str|list|None=None, temperature=0, max_tokens=1024):
        if self._awaiting_streamed:
            raise Exception('Cannot start a new message before ending a streamed one.')

        is_resp_fmted = type(response_format) is list

        self._hist.append(Msg('user', prompt))
        if is_resp_fmted:
            self._hist[-1].response_format = response_format

        messages, in_toks = self._hist_to_prompt()
        _inc_tok_count('in', in_toks)

        if response_format is None:
            return self._call_default(messages, temperature, max_tokens)
        elif response_format == 'stream':
            return self._call_stream(messages, temperature, max_tokens)
        elif is_resp_fmted:
            return self._call_fmted(messages, temperature, max_tokens, response_format)
        else:
            raise Exception(f'Unsupported Response Format: {response_format}')


In [4]:
l = LLM()

In [9]:
out = l('who you', response_format='stream')

Exception: Cannot start a new message before ending a streamed one.

In [6]:
out

{'name': 'Assistant', 'job': 'AI Conversational Model'}

In [None]:
out

In [None]:
l._hist

[Msg(role='user', content='who you', response_format=None),
 Msg(role='assistant', content='', response_format=None)]

In [None]:
enc.encode('Hello! It seems like we\'re having a friendly echo. How can I assist you today?').__len__() + 4