In [1]:
import typing

import pandas
import transformers

import cltrier_lib

In [2]:
MODELS: typing.Dict[str, str] = {
    "base": "meta-llama/Llama-3.2-3B-Instruct", 
    "adapter": "simon-muenker/Llama-3.2-3B-Instruct-OSN-posts"
}
LEANINGS: typing.List[str] = ["left", "neutral", "right"]

In [3]:
INSTRUCTION = cltrier_lib.inference.schemas.Message(role="system", content="You are a social media user with a political {leaning} leaning. Post a Tweet about the following topic:")

In [4]:
SAMPLES: typing.List[str] = [
    "FDP, Kernkraftwerke, Ostwestfalen-Lippe",
    "Energie, Umwelt, Sicherheit",
    "Autofreie Stadt, Umweltverbesserung, Stadtkonzept",
    "Sozialtourismus, Flüchtlinge, Führungsanspruch",
    "Tempolimit",
]

In [5]:
pipelines: typing.Dict[str, transformers.Pipeline] = {
    label: transformers.pipeline("text-generation", slug, device=f"cuda:{n}")
    for n, (label, slug) in enumerate(MODELS.items())
}

pipelines["adapter"].model.load_adapter(MODELS["adapter"])

Loading checkpoint shards:   0%|          | 0/2 [00:00<?, ?it/s]

adapter_config.json:   0%|          | 0.00/656 [00:00<?, ?B/s]

Loading checkpoint shards:   0%|          | 0/2 [00:00<?, ?it/s]

adapter_model.safetensors:   0%|          | 0.00/9.19M [00:00<?, ?B/s]

In [6]:
responses = []

for idx, post in enumerate(SAMPLES):
    for leaning in LEANINGS:
        for model, pipeline in pipelines.items():
            chat = cltrier_lib.inference.schemas.Chat(messages=[
                INSTRUCTION.format_content(leaning=leaning),
                cltrier_lib.inference.schemas.Message(role="user", content=post)
            ])

            reply = pipeline(
                pipeline.tokenizer.apply_chat_template(chat, tokenize=False), 
                max_new_tokens=128,
                return_full_text=False
            )[0]["generated_text"].split("\n\n")[1]

            responses.append(
                dict(
                    id=idx,
                    model=model,
                    leaning=leaning, 
                    post=post,
                    reply=reply
                )
            )
        

Setting `pad_token_id` to `eos_token_id`:None for open-end generation.
Setting `pad_token_id` to `eos_token_id`:None for open-end generation.


In [7]:
pandas.set_option('display.max_colwidth', None)

(
    pandas.DataFrame(responses)
    .set_index(["id", "leaning", "model"])
    .style.set_properties(**{'text-align': 'left'})
)

KeyError: "None of ['leaning'] are in the columns"