In [None]:
python -m vllm.entrypoints.openai.api_server --host 127.0.0.1 --port 8881 --model amew0/Meta-Llama-3-8B-Instruct-v240714045919 --dtype float16 --download-dir /dpc/kunf0097/l3-8b/model & ../ngrok http 8881

In [None]:
qnas = " \n\n### 1\nQ: What is the patient's liver condition based on the provided lab results?\nM: cirrhosis, chronic hepatitis, liver damage, liver failure\nA: chronic hepatitis\n\n### 2\nQ: What is the patient's serum albumin level?\nM: 3.4, 7.54, 11.3, 15.6\nA: 3.4\n\n### 3\nQ: Which of the following is a recommendation for the patient's treatment?\nM: avoid fatty diet, take beta blocker, take more sugar cane juice, consult gastroenterologist\nA: consult gastroenterologist\n\n### 4\nQ: What is the patient's bilirubin level?\nM: 2.38, 4.88, 17.16, 25.8\nA: 17.16\n\n### 5\nQ: What is the patient's creatinine level?\nM: 2.5, 4.88, 7.5, 10.5\nA: 4.88\n"

In [None]:
start_index = qnas.find("### 1")

qna_section = qnas[start_index:].strip()

# Skip the empty string before the first ###
qna_blocks = qna_section.split("### ")[1:]


qnas_formatted = []
for block in qna_blocks:
    lines = block.strip()
    q = lines[lines.find("Q: "):lines.find("A: ")].strip()
    a = lines[lines.find("A: "):].strip()
    
    qnas_formatted.append({"Q": q, "A":a})

In [None]:
qnas_formatted

In [None]:
import torch
torch.cuda.is_available()

In [None]:
import os
import torch
import transformers
import huggingface_hub
import wandb
from scipy.stats import pearsonr
from datetime import datetime
from datasets import load_dataset
from dotenv import load_dotenv
from transformers import AutoTokenizer, AutoModelForCausalLM
from time import time
import gc
import json
import yaml
import argparse
import re
from tqdm import tqdm
import fire
import inspect


logg = lambda x: print(f"------------------------ {x} ---------------------------")


def inspectt(frame):
    logg("")
    args, _, _, values = inspect.getargvalues(frame)
    for arg in args:
        print(f"\t{arg}: {values[arg]}")
    logg("")

def get_prompts_from_template(filepath, name, eval_name):
    default_config = {
        "max_new_tokens": 256,
        "do_sample": True,
        "temperature": 0.6,
        "top_p": 0.9,
    }
    with open(filepath, "r") as f:
        data = yaml.safe_load(f)

    candidate_prompt = data[name]["candidate_prompt"]
    evaluator_prompt = data[eval_name]["evaluator_prompt"]
    candidate_generation_config = data[name].get("candidate_generation_config", default_config)
    evaluator_generation_config = data[eval_name].get(
        "evaluator_generation_config", default_config
    )

    print("candidate_prompt: ", candidate_prompt)
    print("evaluator_prompt: ", evaluator_prompt)
    print("candidate_generation_config: ", candidate_generation_config)
    print("evaluator_generation_config: ", evaluator_generation_config)

    return (
        candidate_prompt,
        evaluator_prompt,
        candidate_generation_config,
        evaluator_generation_config,
    )


def get_tokenizer_and_model(model_name: str, cache_dir: str):
    tokenizer = AutoTokenizer.from_pretrained(
        model_name,
        cache_dir=f"{cache_dir}/tokenizer",
        pad_token_id=0,
    )
    model = AutoModelForCausalLM.from_pretrained(
        model_name,
        cache_dir=f"{cache_dir}/model",
        torch_dtype=torch.float16,
        device_map="auto",
        offload_buffers=True,
    )
    return tokenizer, model


def tokenize(prompt, tokenizer):
    tokenized = tokenizer(prompt, return_tensors="pt")
    return tokenized


def generate_and_tokenize_prompt(batch, tokenizer, prompt_template):
    # print(batch)
    prompts = [prompt_template.format(d[0], d[1]) for d in zip(batch["instruction"], batch["input"])]
    print("a", prompts)
    tokenized_prompts = tokenizer(prompts, padding=True, return_tensors="pt")
    print("b", tokenized_prompts)
    return tokenized_prompts


def eval_prompt_tokenizer(generated, output, eval_tokenizer, prompt=None):
    prompt = prompt.format(generated, output)
    tokenized_full_prompt = tokenize(prompt, tokenizer=eval_tokenizer)
    return tokenized_full_prompt


def extract_score(text):
    match = re.search(r"\b\d+\.\d+\b", text)
    return float(match.group(0)) if match else -1.0


def log2json(results, json_result):
    with open(json_result, "w") as f:
        json.dump(results, f, ensure_ascii=False, indent=4)

def generate_response(model, tokenizer, input_ids, attention_mask, generation_config):
    # torch.LongTensor(input_ids).to(model.device)
    # torch.LongTensor(attention_mask).to(model.device)
    # try:
        output = model.generate(
            input_ids=torch.stack(input_ids).to(model.device),
            attention_mask=torch.stack(attention_mask).to(model.device),
            eos_token_id=tokenizer.eos_token_id,
            **generation_config,
        )
        print(output[0])
        response_ids = output[0][len(input_ids[0]) :]
        response = tokenizer.decode(response_ids, skip_special_tokens=True)
        return response, output
    # except RuntimeError as e:
    #     if "inf" in str(e) or "nan" in str(e):
    #         print(f"Skipping example due to invalid output: {e}")
    #         return None
    #     else:
    #         raise 



In [None]:
output_dir=f"./out"
cache_dir=f"/dpc/kunf0097/l3-8b"
eval_data_path="./data/1/eval_sample.json"
log_file=None
name="meta-llama/Meta-Llama-3-8B-Instruct"
eval_name="meta-llama/Meta-Llama-3-8B-Instruct"
run_id=datetime.now().strftime("%y%m%d%H%M%S")
log2wandb: bool = True
project="huggingface"
entity="my-ku-org"
evals_per_example=2
batch_size=2

In [None]:
candidate_name="meta-llama/Meta-Llama-3-8B-Instruct"
evaluator_name="meta-llama/Meta-Llama-3-8B-Instruct"

In [None]:
(
    candidate_prompt,
    evaluator_prompt,
    candidate_generation_config,
    evaluator_generation_config,
) = get_prompts_from_template("template.yaml", candidate_name, evaluator_name)


if log2wandb and (project is None or entity is None):
    raise ValueError("Both 'project' and 'entity' must be set if 'log2wandb' is True.")

if log_file is None:
    log_file = f"{output_dir}/results_{name.split('/')[1]}_{run_id}.json"

inspectt(inspect.currentframe())

In [None]:
# evaluator_tokenizer, evaluator_model = get_tokenizer_and_model(
#     model_name=eval_name, cache_dir=cache_dir
# )

candidate_tokenizer, candidate_model = get_tokenizer_and_model(
    model_name=name, cache_dir=cache_dir
)

# candidate_tokenizer = AutoTokenizer.from_pretrained(
#         name,
#         cache_dir=f"{cache_dir}/tokenizer",
#         pad_token_id=0,
#     )



In [None]:
candidate_tokenizer.pad_token = candidate_tokenizer.bos_token 
candidate_tokenizer.padding_side = "left"

In [None]:
data = load_dataset("json", data_files=eval_data_path)
eval_dataset = data["train"].map(
    lambda x: generate_and_tokenize_prompt(x, candidate_tokenizer, candidate_prompt),
    batched=True,  # Process in batches
    batch_size=batch_size
)

In [None]:
batched_eval_dataset  =torch.utils.data.DataLoader(eval_dataset, batch_size=batch_size)

In [None]:
for batch in batched_eval_dataset :
    print(batch)
    break

In [None]:
torch.stack(batch["input_ids"])[:,0]

In [None]:
torch.stack(batch["attention_mask"])[:,1]

In [None]:
print(candidate_tokenizer.decode(torch.stack(batch["input_ids"])[:,1]))

In [None]:
response, output = generate_response(
    candidate_model,
    candidate_tokenizer,
    batch["input_ids"],
    batch["attention_mask"],
    candidate_generation_config,
)

In [None]:
output[0].shape

In [None]:
print(candidate_tokenizer.decode(output[0]))

### enshiallah

In [None]:
import json
with open("out/results_240623023136_240628153415.json", "r") as f:
    results = json.load(f)

In [None]:
import wandb
table = wandb.Table(columns=list(results[0].keys()))
for r in results:
    table.add_data(*r.values())
run =wandb.init(project="huggingface", entity="my-ku-org", name="laaj-llama-3-8b-medical-v240623023136")
wandb.log({"Evaluation Results": table})
wandb.finish()

### Evals **MMLU**

In [None]:
from datasets import load_dataset

In [None]:
data =load_dataset("cais/mmlu", "clinical_knowledge")

In [None]:
eval_dataset = data["dev"]

In [None]:
eval_dataset[0]

### FT

In [1]:
import gc
import inspect
import os
from datetime import datetime
from time import time

import fire
import huggingface_hub
import torch
import transformers
import wandb
from datasets import load_dataset
from dotenv import load_dotenv
from transformers import (
    AutoModelForCausalLM,
    AutoTokenizer,
    BitsAndBytesConfig,
    TrainingArguments,
)
from trl import SFTTrainer, SFTConfig

from peft import LoraConfig, PeftModel
from utils.eval_helper import inspectt, logg
from transformers import TrainerCallback
from utils.ft_helper import generate_and_tokenize_prompt

In [5]:
output_dir = f"./out"
cache_dir = f"/dpc/kunf0097/l3-8b"
train_data_path = "./data/medical-36-row.json"
model_name: str = "meta-llama/Meta-Llama-3-8B-Instruct"
model_save_path: str = None
chpt_dir: str = None
run_id = datetime.now().strftime("%y%m%d%H%M%S")
# run_id = "240714045919"
prompt_template=None

if model_save_path is None:
    model_save_path = f"{cache_dir}/model/{model_name}-v{run_id}"
if chpt_dir is None:
    chpt_dir = f"{cache_dir}/chpt/{run_id}"

last_checkpoint = None
if os.path.isdir(chpt_dir):
    checkpoints = [d for d in os.listdir(chpt_dir) if d.startswith("checkpoint-")]
    if checkpoints:
        last_checkpoint = os.path.join(
            chpt_dir, max(checkpoints, key=lambda cp: int(cp.split("-")[-1]))
        )
import yaml
if prompt_template is None:
        with open("tuning.yaml", "r") as f:
            tuning_config = yaml.safe_load(f)
            prompt_template = tuning_config[model_name]["prompt_template"]

In [24]:
prompt_template

'<|start_header_id|>system<|end_header_id|>\n{}<|eot_id|>\n<|start_header_id|>user<|end_header_id|>\nQuestion: {}<|eot_id|>\n<|start_header_id|>assistant<|end_header_id|>\n{}<|eot_id|>\n'

In [33]:
def tokenize(prompt, tokenizer, cutoff_len: int = None):
    result = tokenizer(
        prompt,
        truncation=True,
        max_length=cutoff_len,
        padding="max_length",
        return_tensors="pt",
    )

    result["input_ids"] = result["input_ids"].flatten()
    result["attention_mask"] = result["attention_mask"].flatten()

    result["labels"] = result["input_ids"].clone()  # Clone input_ids for labels
    return result


def generate_and_tokenize_promptt(
    data_point, tokenizer, cutoff_len: int = None, prompt_template: str = None
):
    train_on_input = False
    if cutoff_len is None:
        tokenized_full_prompt = tokenize(
            prompt_template.format(
                data_point["instruction"], data_point["input"], data_point["output"]
            ),
            tokenizer=tokenizer,
        )
    else:
        tokenized_full_prompt = tokenize(
            prompt_template.format(
                data_point["instruction"], data_point["input"], data_point["output"]
            ),
            tokenizer=tokenizer,
            cutoff_len=cutoff_len,
        )
        if not train_on_input:
            prompt_template = prompt_template.split(
                "<|start_header_id|>assistant<|end_header_id|>"
            )[0]
            tokenized_user_prompt = tokenize(
                prompt_template.format(data_point["instruction"], data_point["input"]),
                tokenizer=tokenizer,
            )
            user_prompt_len = len(tokenized_user_prompt["input_ids"])
            labels_prefix = torch.full((user_prompt_len,), -100)
            tokenized_full_prompt["labels"] = torch.cat(
                (
                    labels_prefix,
                    torch.tensor(tokenized_full_prompt["labels"][user_prompt_len:]),
                )
            )

    return tokenized_full_prompt

In [7]:
tokenizer = AutoTokenizer.from_pretrained(model_name, cache_dir=f"{cache_dir}/tokenizer")

# # Initialize model
# bnb_config = BitsAndBytesConfig(
#     load_in_4bit=True,
#     bnb_4bit_quant_type="nf4",
#     bnb_4bit_compute_dtype=torch.float16,
#     bnb_4bit_use_double_quant=True,
# )

# model = AutoModelForCausalLM.from_pretrained(
#     model_name,
#     cache_dir=f"{cache_dir}/model",
#     # quantization_config=bnb_config,
#     torch_dtype=torch.float16,
#     device_map="auto",
#     low_cpu_mem_usage=True,
#     # return_dict=True,
# )

Special tokens have been added in the vocabulary, make sure the associated word embeddings are fine-tuned or trained.


In [9]:
tokenizer.add_special_tokens({'pad_token': '[PAD]'})
model.resize_token_embeddings(len(tokenizer))

0

In [7]:
# Prepare model for LoRA training
peft_config = LoraConfig(
    r=4,
    lora_alpha=16,
    target_modules=["q_proj", "o_proj", "k_proj", "v_proj", "gate_proj"],
    lora_dropout=0.05,
    bias="none",
    task_type="CAUSAL_LM",
)

In [8]:
from datasets import concatenate_datasets

def reorder_dataset(dataset, start_index):
    # Split the dataset into two parts: before and after the start index
    dataset_part1 = dataset.select(range(start_index, len(dataset)))
    dataset_part2 = dataset.select(range(start_index))
    
    # Concatenate the two parts to get the reordered dataset
    reordered_dataset = concatenate_datasets([dataset_part1, dataset_part2])
    return reordered_dataset


In [34]:
per_device_train_batch_size = 2
gradient_accumulation_steps = 1
start_index = 0
if last_checkpoint is not None:
    start_index = (
        int(last_checkpoint.split("-")[-1])
        * per_device_train_batch_size
        * gradient_accumulation_steps
    )

data = load_dataset("json", data_files=train_data_path, split="train")
# data = reorder_dataset(data, start_index)

cutoff_len = 296  # (75% of the data wont be affected)
train_dataset = data.map(lambda x: generate_and_tokenize_promptt(x, tokenizer, cutoff_len, prompt_template))

Map:   0%|          | 0/36 [00:00<?, ? examples/s]

167
105
95
97
98
124
121
113
115
146
127
119
91
131
126
98
235
130
111
109
138
101
133
251
129
108
100
141
98
102
125
125
133
138
155
150


  torch.tensor(tokenized_full_prompt["labels"][user_prompt_len:]),


In [56]:
tokenizer.decode([i for i in train_dataset[0]["labels"] if i != -100])

'<|start_header_id|>assistant<|end_header_id|>\nHi, Thank you for posting your query. The most likely cause for your symptoms is benign paroxysmal positional vertigo (BPPV), a type of peripheral vertigo. In this condition, the most common symptom is dizziness or giddiness, which is made worse with movements. Accompanying nausea and vomiting are common. The condition is due to problem in the ear, and improves in a few days on own. Betahistine tablets would help relieve your symptoms. Doing vestibular rehabilitation or adaptation exercises would prevent the recurrence of these symptoms. An ENT evaluation would also help. I hope it helps.'

In [10]:
# load it from .yaml
train_args = SFTConfig(
    run_name=f"ft-{model_name.split('/')[1]}-{run_id}-v{start_index}",
    per_device_train_batch_size=per_device_train_batch_size,
    gradient_accumulation_steps=gradient_accumulation_steps,  # only 1 is allowed on the no shuffler [needs revision]
    eval_accumulation_steps=1,  # !very important to send data to cpu
    warmup_steps=1,
    num_train_epochs=3,
    learning_rate=3e-4,
    fp16=False,
    logging_steps=1,
    optim="adamw_torch",
    output_dir=f"{chpt_dir}",
    group_by_length=False,
    dataloader_drop_last=False,
    save_steps=2,
    save_total_limit=3,
    max_seq_length=cutoff_len,
    resume_from_checkpoint=last_checkpoint,
)

In [11]:
import logging
logging.basicConfig(level=logging.INFO)
logger = logging.getLogger(__name__)

In [16]:
from transformers import Trainer
from torch.utils.data import DataLoader, SequentialSampler
import datasets
from transformers.trainer_utils import seed_worker


class SFTTrainerNoShuffle(SFTTrainer):
    def training_step(self, model, inputs):
        if (self.state.global_step % self.args.save_steps) == 0:
            inputs_decoded = tokenizer.decode(inputs["input_ids"][0])
            logger.info(f"{self.state.global_step}: {inputs_decoded}")
        return super().training_step(model, inputs)

    def _get_train_sampler(self):
        return SequentialSampler(self.train_dataset)  # to prevent shuffling

In [18]:
trainer = SFTTrainerNoShuffle(
# trainer = SFTTrainer(
    model=model,
    tokenizer=tokenizer,
    peft_config=peft_config,
    train_dataset=train_dataset,
    args=train_args,
    # callbacks=[PrintExampleCallback()]
)

INFO:peft.tuners.tuners_utils:Already found a `peft_config` attribute in the model. This will lead to having multiple adapters in the model. Make sure to know what you are doing!


In [None]:
trainer.train()

### Eval / Chpt

In [26]:
ftmodel = PeftModel.from_pretrained(model, last_checkpoint)
ftmodel = ftmodel.merge_and_unload()

In [10]:
def eval_tokenize(prompt, tokenizer):
    tokenized = tokenizer(prompt, return_tensors="pt")
    return tokenized


def eval_generate_and_tokenize_prompt(data_point, tokenizer, prompt=None):
    prompt = prompt.format(data_point["instruction"], data_point["input"])
    tokenized_full_prompt = eval_tokenize(prompt, tokenizer=tokenizer)
    return tokenized_full_prompt

prompt_template = "<|start_header_id|>system<|end_header_id|> {}<|eot_id|><|start_header_id|>user<|end_header_id|> This is the question: {}<|eot_id|><|start_header_id|>assistant<|end_header_id|>"
eval_split = "train"
data = load_dataset("json", data_files=train_data_path, split=eval_split)
eval_dataset = data.map(
    lambda x: eval_generate_and_tokenize_prompt(x, tokenizer, prompt_template)
)

Map:   0%|          | 0/36 [00:00<?, ? examples/s]

In [11]:
example = next(iter(eval_dataset))

In [20]:
tokenizer.decode(example["input_ids"][0])

"<|begin_of_text|><|start_header_id|>system<|end_header_id|> If you are a doctor, please answer the medical questions based on the patient's description.<|eot_id|><|start_header_id|>user<|end_header_id|> This is the question: I woke up this morning feeling the whole room is spinning when i was sitting down. I went to the bathroom walking unsteadily, as i tried to focus i feel nauseous. I try to vomit but it wont come out.. After taking panadol and sleep for few hours, i still feel the same.. By the way, if i lay down or sit down, my head do not spin, only when i want to move around then i feel the whole world is spinning.. And it is normal stomach discomfort at the same time? Earlier after i relieved myself, the spinning lessen so i am not sure whether its connected or coincidences.. Thank you doc!<|eot_id|><|start_header_id|>assistant<|end_header_id|>"

In [29]:
from utils.eval_helper import generate_response

response = generate_response(
    ftmodel,
    tokenizer,
    example["input_ids"],
    example["attention_mask"],
    {},
)

Setting `pad_token_id` to `eos_token_id`:128009 for open-end generation.


In [28]:
# base
response

' Hello, The symptoms as mentioned in your post can be attributed to a condition known as Benign Paroxysmal Positional Vertigo (BPPV). It is a disorder of the vestibular apparatus in the inner ear, in which episodes of vertigo are triggered by changes in the position of the head. The symptoms you mention in your post can be attributed to BPPV. I would like to suggest you to consult a Neurologist/ ENT Specialist for a complete clinical examination & relevant investigations. Investigations will be needed to confirm the diagnosis. There is a good chance of it getting cured with specific therapy. Till then, you can follow these measures'

In [30]:
# tuned
response

' Hello, I understand your concern. I am Chat Doctor, infectious diseases specialist, answering your query. In my opinion you should go for complete blood count, and you may need the same. The reason for your symptom is the anemia. It makes you feel dizzy. I will suggest you to take iron supplements for the same. You can take it after consulting your doctor. In my opinion you should visit the nearby hospital as soon as possible. I will be happy to answer your further concern, you can ask me on bit.ly/ Chat Doctor.  Thank you. ChatDoctorInfectious diseases specialist.'

In [None]:
load_dotenv()
HF_TOKEN_WRITE = os.environ["HF_TOKEN_WRITE"]
tokenizer.push_to_hub(f"{model_name.split('/')[1]}-v{run_id}_si{start_index}", token=HF_TOKEN_WRITE)
ftmodel.push_to_hub(f"{model_name.split('/')[1]}-v{run_id}_si{start_index}", token=HF_TOKEN_WRITE)

In [None]:
perp