Add the project root to Python's import path so local modules (src/...) can be imported easily.

In [None]:
import sys
from pathlib import Path

project_path = Path.cwd().parent

sys.path.append(str(project_path.resolve()))

Import dataset helpers and Hugging Face Transformers components used for training and tokenization.

In [None]:
from transformers import pipeline
from src.dataset.load_data_soda import SODADataLoader

Set finetuned model name from HuggingFace

In [None]:
HF_MODEL_NAME = "abirmondalind/story2dialogue-SODA-BERT"

Create the SODA dataset loader with simple filtering options and retrieve the dataset object.

In [None]:
soda_dataset_obj = SODADataLoader(
    data_types=['test'],
    samples_per_split=50,
    min_story_length=20,
    max_story_length=250,
    join_dialogue_and_speakers=True,
    add_characters_in_narrative=True,
    add_turns_count_in_narrative=True
)
soda_ds = soda_dataset_obj.dataset

Create text2text generation pipeline

In [None]:
generator = pipeline(
    "text2text-generation",
    model=HF_MODEL_NAME,
    tokenizer=HF_MODEL_NAME
)

Generate dialogue from a narrative and compare with actual dialogue

In [None]:
i = 10
text = soda_ds['test'][i]['narrative']
actual_dialogue = soda_ds['test'][i]['dialogue']

output = generator(
        text,
        max_new_tokens=1024,
        # min_length=150,
        num_beams=8,
        length_penalty=1.2,
        # no_repeat_ngram_size=3,
        early_stopping=True
    )

print("Narrative:", text, "\n", sep="\n")
print("Generated Dialogue:", output[0]['generated_text'], "\n", sep="\n")
print("Actual Dialogue:", actual_dialogue, sep="\n")