In [21]:
import torch
from transformers import AutoModelForCausalLM, AutoTokenizer
from datasets import load_dataset

### Preparing the Counsel Chat Dataset

In [22]:
dataset_name = "nbertagnolli/counsel-chat"
dataset = load_dataset(dataset_name, split="all")
dataset = dataset.shuffle(seed=42)

Repo card metadata block was not found. Setting CardData to empty.


### Inference from already fine-tuned model

In [23]:
model_id = "llama32-sft-fine-tune-counselchat"
tokenizer = AutoTokenizer.from_pretrained(model_id)
tokenizer.padding_side = "left"
tokenizer.model_max_length = 2048

In [24]:
model = AutoModelForCausalLM.from_pretrained(model_id, torch_dtype=torch.bfloat16, device_map="auto") # Must be float32 for MacBooks!
model.config.pad_token_id = tokenizer.pad_token_id # Updating the model config to use the special pad token

In [51]:
input0 = [{"role": "user", "content": dataset[0]['questionText']}]
input1 = [{"role": "user", "content": dataset[1]['questionText']}]
input2 = [{"role": "user", "content": dataset[2]['questionText']}]
input3 = [{"role": "user", "content": dataset[3]['questionText']}]
input4 = [{"role": "user", "content": dataset[4]['questionText']}]


texts = tokenizer.apply_chat_template([input0, input1, input2, input3, input4], tokenize=False, add_generation_prompt=True)
inputs = tokenizer(texts, padding="longest", truncation=True, return_tensors="pt")
inputs = {key: val.to(model.device) for key, val in inputs.items()}
temp_texts = tokenizer.batch_decode(inputs['input_ids'], skip_special_tokens=True)

In [52]:
terminators = [
    tokenizer.eos_token_id,
    tokenizer.convert_tokens_to_ids("<|eot_id|>")
]

In [53]:
gen_tokens = model.generate(
    **inputs, 
    max_new_tokens=2048, 
    pad_token_id=tokenizer.pad_token_id, 
    eos_token_id=terminators,
    do_sample=True,
    temperature=0.6,
    top_p=0.9
)

In [54]:
gen_text = tokenizer.batch_decode(gen_tokens, skip_special_tokens=True)
gen_text = [i[len(temp_texts[idx]):] for idx, i in enumerate(gen_text)]

In [55]:
gen_text

["I'm glad you're willing to work on your trust issues and make your relationship work. Trust is an ongoing process, and it's normal to have ups and downs. Trust is built and broken over time, and it's a process that requires both partners to be willing to work on it. If you're willing to put in the work, I encourage you to seek counseling or therapy with a professional who can help you both to better understand each other. If you're both willing to put in the work, I would recommend that you work on your communication skills. Being able to communicate openly and honestly about your feelings and concerns is a key part of building trust.",
 "I'm sorry to hear that you are going through this.\xa0 It's not easy to hear that you cannot afford to keep the dog.\xa0 It's also not easy to hear that you and your boyfriend cannot agree on the dog.\xa0 I think it would be helpful to have a conversation about the dog's behavior and how it is affecting you.\xa0 I think you both need to work togethe