In [1]:
# ---------------- Imports ----------------
import os
import json
import sys
import textwrap

import yaml
import torch

from transformers import AutoModelForCausalLM, AutoTokenizer, pipeline
from peft import PeftModel



In [2]:
# ---------------- Args ----------------
#model_choice = "meta-llama/Llama-3.1-8B-Instruct"
#model_choice = "meta-llama/Llama-3.2-3B-Instruct"
model_choice = "deepseek-ai/DeepSeek-R1-Distill-Llama-8B"
adapter_choice = "20251219T2015-deepseek-r1-distill-llama-8b-sft-m3trained"



In [3]:
# ---------------- Config ----------------
with open("../../config/config.yaml", "r") as f:
    config = yaml.safe_load(f)

data_path = config["paths"]["proj_store"]
models_folderpath = config["paths"]["models"]


base_model_path = os.path.join(models_folderpath, model_choice)
adapter_model_path = os.path.join(data_path, "experiments", "fine-tuning", "adapter-models", adapter_choice)



In [4]:
# Load base model
base_model = AutoModelForCausalLM.from_pretrained(
    base_model_path,
    dtype=torch.float16,
    device_map="auto"
)

# Load adapters
model = PeftModel.from_pretrained(base_model, adapter_model_path)

# Merge adapters into base model
model = model.merge_and_unload()

# Load tokenizer
tokenizer = AutoTokenizer.from_pretrained(base_model_path, trust_remote_code=True)
tokenizer.pad_token = tokenizer.eos_token

# Inference
pipe = pipeline("text-generation", model=model, tokenizer=tokenizer, max_new_tokens=500)



Loading checkpoint shards:   0%|          | 0/2 [00:00<?, ?it/s]

Device set to use cuda:0


In [5]:
# your messages in HF format
messages = [{"role": "system", "content": "Act as an information elicitation agent for journalistic interviews."}, {"role": "user", "content": "Oh, we brainstormed about it. We started, as I said, we did a lot of research on, since our early ages, let's say in all our scientific career is based on embodiment, and activities in these areas and slowly we kept wondering more and more, on the role of these areas. And then, the way it goes, we normally speak about a topic and include other people and keep brainstorming, and I think, that's how it goes. At least for us. And in humans, we are also developing similar paradigms, so it just came one plus one equal to two. [laughs]"}, {"role": "assistant", "content": "What were the roles of other people involved in this study?"}, {"role": "user", "content": "So, Julen Hernandez-Lallement, the first author of the study, was the one that basically made the vast majority of the experiments. And he was helped by the other two: [Augustine Triumph] Attah: who did part of the other animal groups, and then Cindy [Pinhal] and Efe [Soyman] helped to analyse the data. And basically Valeria and me have been kind of help designed this study initially and helped to analyse the data and wind up the study. So we kind of lead the lab, but it's the other people that really manipulate the rats and do all the hard work."}, {"role": "assistant", "content": "What was the timeline of this study?"}, {"role": "user", "content": "Well, it was long.\n\nYeah, I would say, four years?."}]



# get the converted prompt
prompt_str = tokenizer.apply_chat_template(messages, tokenize=False, add_generation_prompt=True)
prompt_str = prompt_str.replace("<think>", "") # in case of deepseek


print(textwrap.fill(prompt_str, width=80))



<｜begin▁of▁sentence｜>Act as an information elicitation agent for journalistic
interviews.<｜User｜>Oh, we brainstormed about it. We started, as I said, we did a
lot of research on, since our early ages, let's say in all our scientific career
is based on embodiment, and activities in these areas and slowly we kept
wondering more and more, on the role of these areas. And then, the way it goes,
we normally speak about a topic and include other people and keep brainstorming,
and I think, that's how it goes. At least for us. And in humans, we are also
developing similar paradigms, so it just came one plus one equal to two.
[laughs]<｜Assistant｜>What were the roles of other people involved in this
study?<｜end▁of▁sentence｜><｜User｜>So, Julen Hernandez-Lallement, the first author
of the study, was the one that basically made the vast majority of the
experiments. And he was helped by the other two: [Augustine Triumph] Attah: who
did part of the other animal groups, and then Cindy [Pinhal] and Efe [

In [7]:
# Run inference
result = pipe(prompt_str)

#print(textwrap.fill(result[0]['generated_text'], width=80))
print(result[0]['generated_text'])



<｜begin▁of▁sentence｜>Act as an information elicitation agent for journalistic interviews.<｜User｜>Oh, we brainstormed about it. We started, as I said, we did a lot of research on, since our early ages, let's say in all our scientific career is based on embodiment, and activities in these areas and slowly we kept wondering more and more, on the role of these areas. And then, the way it goes, we normally speak about a topic and include other people and keep brainstorming, and I think, that's how it goes. At least for us. And in humans, we are also developing similar paradigms, so it just came one plus one equal to two. [laughs]<｜Assistant｜>What were the roles of other people involved in this study?<｜end▁of▁sentence｜><｜User｜>So, Julen Hernandez-Lallement, the first author of the study, was the one that basically made the vast majority of the experiments. And he was helped by the other two: [Augustine Triumph] Attah: who did part of the other animal groups, and then Cindy [Pinhal] and Efe [