In [19]:
# in story_dialogue.ipynb

%pip install google-genai pillow  # run once in the notebook


Note: you may need to restart the kernel to use updated packages.


In [None]:
from google import genai
from google.genai import types
import os

API_KEY = os.environ.get("GEMINI_API_KEY", "<PUT_YOUR_KEY_HERE>")
client = genai.Client(api_key=API_KEY)

story_chat = client.chats.create(
    model="gemini-2.5-flash",
    config=types.GenerateContentConfig(
        system_instruction=(
            "You are Furhat, a friendly speaking robot and interactive storyteller. "
            "You always speak directly to the user as 'you'. "
            "Your answers must be medium short, 4-6 sentences, suitable for being spoken aloud."
        )
    )
)

def generate_scene_text_llm(story, scene, emotion: str, user_mood: str) -> str:
    """
    Use LLM to generate narration for ONE scene.
    Reads scene['llmHints'] and injects them into the prompt.
    """
    hints = scene.get("llmHints", {})
    general_hint = hints.get("general", "")
    emotion_hint = hints.get(emotion, "")

    prompt = f"""
Story id: {story['id']}
Story name: {story['name']}
Genre: {story['genre']}
Target moods: {', '.join(story.get('targetMoods', []))}

Current scene id: {scene['id']}
Scene type: {scene.get('type', 'normal')}
Scene description: {scene['description']}

User mood label: {user_mood}
Detected emotion label: {emotion}

General scene hint: {general_hint}
Emotion-specific hint: {emotion_hint}

Write the narration for THIS scene only.

Requirements:
- 4 to 6 sentences.
- Match the emotional tone to the detected emotion = {emotion}.
- Do NOT mention the option texts or describe future choices explicitly.
- Do NOT jump to future scenes.
- Talk directly to the user as "you".
"""
    resp = story_chat.send_message(prompt)
    return resp.text.strip()


In [22]:
from pathlib import Path
import json
from furhat_remote_api import FurhatRemoteAPI
furhat = FurhatRemoteAPI("localhost")

In [23]:
furhat.set_face(character='Titan', mask="Adult")
furhat.set_voice(name='Joanna')

{'message': 'Successfully changed Furhat voice', 'success': True}

In [24]:
DATA_PATH = Path("../data/processed/LLM")

In [25]:
def get_scene(story, scene_id):
    for scene in story["scenes"]:
        if scene["id"] == scene_id:
            return scene
    return None

def choose_template(scene, emotion):
    """Pick template for emotion, fall back to neutral or description."""
    return scene["templates"].get(
        emotion,
        scene["templates"].get("neutral", scene["description"])
    )

def next_scene(scene, chosen_option_id):
    for opt in scene.get("options", []):
        if opt["id"] == chosen_option_id:
            return opt["nextScene"]
    return None


In [26]:
def load_all_stories():
    stories = {}
    for path in DATA_PATH.glob("*.json"):
        story_id = path.stem
        with open(path, "r") as f:
            stories[story_id] = json.load(f)
    return stories

STORIES = load_all_stories()
list(STORIES.keys())


['LLM_as', 'LLM_lk', 'LLM_sa']

In [27]:
def get_all_emotions(stories):
    emos = set()
    for story in stories.values():
        for scene in story["scenes"]:
            emos.update(scene.get("templates", {}).keys())
    return sorted(emos)

EMOTIONS = get_all_emotions(STORIES)
EMOTIONS

[]

In [29]:
def get_scene(story, scene_id):
    for scene in story["scenes"]:
        if scene["id"] == scene_id:
            return scene
    return None

def choose_template(scene, emotion):
    """Pick template for emotion, fall back to neutral or description."""
    return scene["templates"].get(
        emotion,
        scene["templates"].get("neutral", scene["description"])
    )

def next_scene(scene, chosen_option_id):
    for opt in scene.get("options", []):
        if opt["id"] == chosen_option_id:
            return opt["nextScene"]
    return None

In [30]:
def select_story(mood: str, emotion: str, stories=STORIES):
    """
    Choose the story whose targetMoods best match mood/emotion.
    """
    best_id = None
    best_score = -1

    for sid, story in stories.items():
        target = set(story.get("targetMoods", []))
        score = 0
        if mood in target:
            score += 2
        if emotion in target:
            score += 1
        if score > best_score:
            best_score = score
            best_id = sid

    if best_id is None:
        best_id = next(iter(stories.keys()))
    return best_id

In [31]:
KNOWN_MOODS = [
    "tired", "comfort-seeking", "sad",
    "excited", "happy", "energized",
    "neutral", "curious"
] # fix to match emotions?

def detect_mood_from_text(text: str) -> str:
    t = text.lower()
    if "comfort" in t:
        return "comfort-seeking"
    for m in KNOWN_MOODS:
        if m in t:
            return m
    # fallback
    return "neutral"


def detect_emotion_from_text(text: str) -> str:
    """
    Placeholder until the webcam model is integrated.
    Maps some common words to your emotion labels.
    """
    t = text.lower()
    if any(w in t for w in ["happy", "great", "good", "nice"]):
        return "happy"
    if any(w in t for w in ["sad", "down", "bad"]):
        return "sad"
    if any(w in t for w in ["angry", "mad", "annoyed"]):
        return "angry"
    if any(w in t for w in ["scared", "afraid", "fear", "nervous"]):
        return "fear"
    if any(w in t for w in ["disgust", "gross"]):
        return "disgust"
    if any(w in t for w in ["surprised", "wow"]):
        return "surprised"
    return "neutral"

In [32]:
def listen_text(language: str = "en-US") -> str:
    """Listen once and return lowercase text, or '' if nothing."""
    response = furhat.listen(language=language)
    if response and getattr(response, "message", None):
        return response.message.strip()
    return ""

In [33]:
def choose_option_from_speech(scene, text: str):
    """
    Try to map user's utterance to an option:
    - number words (one, two, three...)
    - digits (1, 2, 3)
    - keywords from the option text
    Returns option object or None.
    """
    t = text.lower()

    options = scene.get("options", [])
    if not options:
        return None

    # digit index
    for i, opt in enumerate(options, start=1):
        if str(i) in t:
            return opt

    # word -> index
    words_to_num = {
        "one": 1, "first": 1,
        "two": 2, "second": 2,
        "three": 3, "third": 3
    }
    for w, num in words_to_num.items():
        if w in t and 1 <= num <= len(options):
            return options[num - 1]

    # keyword match: if option text words appear
    for opt in options:
        key = opt["text"].split()[0].lower()  # very simple
        if key in t:
            return opt

    # fallback: first option
    return options[0]

In [34]:
def run_story_session():
    # --- Greeting & mood ---
    furhat.say(text="Hello! I am your interactive storyteller.", blocking=True)
    furhat.say(text="How are you feeling right now?", blocking=True)
    mood_text = listen_text()
    mood = detect_mood_from_text(mood_text)

    # TEMP: ask for emotion verbally (until webcam integration)
    furhat.say(
        text="If I looked at your face, would you say you feel happy, sad, angry, afraid, disgusted, surprised, or neutral?",
        blocking=True
    )
    emo_text = listen_text()
    emotion = detect_emotion_from_text(emo_text)
    if emotion not in EMOTIONS:
        emotion = "neutral"

    # --- Select story based on mood/emotion ---
    story_id = select_story(mood, emotion, STORIES)
    story = STORIES[story_id]
    furhat.say(text=f"I will tell you a story called {story['name']}.", blocking=True)

    # intro = first scene in JSON
    scene_id = story["scenes"][0]["id"]

    while True:
        scene = get_scene(story, scene_id)

        try:
            text_to_say = generate_scene_text_llm(story, scene, emotion, mood)
        except Exception as e:
            print("LLM error:", e)
            # fallback to old rule-based template
            text_to_say = scene.get("description", "I cannot think of anything to say right now.")

        furhat.say(text=text_to_say, blocking=True)


        # no options â†’ story end
        if not scene.get("options"):
            furhat.say(text="That was the end of this story.", blocking=True)
            break

        # read options
        furhat.say(text="What would you like to do next?", blocking=True)
        for idx, opt in enumerate(scene["options"], start=1):
            furhat.say(text=f"Option {idx}: {opt['text']}", blocking=True)

        answer = listen_text()

        # global intents
        if "quit" in answer or "stop" in answer:
            furhat.say(text="Okay, I will stop the story here.", blocking=True)
            break

        if "change" in answer or "another" in answer:
            new_story_id = select_story(mood, emotion, STORIES)
            if new_story_id != story_id:
                story_id = new_story_id
                story = STORIES[story_id]
                scene_id = story["scenes"][0]["id"]
                furhat.say(text=f"Let's try another story: {story['name']}.", blocking=True)
                continue
            else:
                furhat.say(text="We will stay with the same story for now.", blocking=True)

        if "repeat" in answer:
            furhat.say(text="Let me repeat that part.", blocking=True)
            continue

        # choose option based on speech
        opt = choose_option_from_speech(scene, answer)
        scene_id = opt["nextScene"]

In [35]:
run_story_session()

LLM error: 400 INVALID_ARGUMENT. {'error': {'code': 400, 'message': 'API key not valid. Please pass a valid API key.', 'status': 'INVALID_ARGUMENT', 'details': [{'@type': 'type.googleapis.com/google.rpc.ErrorInfo', 'reason': 'API_KEY_INVALID', 'domain': 'googleapis.com', 'metadata': {'service': 'generativelanguage.googleapis.com'}}, {'@type': 'type.googleapis.com/google.rpc.LocalizedMessage', 'locale': 'en-US', 'message': 'API key not valid. Please pass a valid API key.'}]}}
LLM error: 400 INVALID_ARGUMENT. {'error': {'code': 400, 'message': 'API key not valid. Please pass a valid API key.', 'status': 'INVALID_ARGUMENT', 'details': [{'@type': 'type.googleapis.com/google.rpc.ErrorInfo', 'reason': 'API_KEY_INVALID', 'domain': 'googleapis.com', 'metadata': {'service': 'generativelanguage.googleapis.com'}}, {'@type': 'type.googleapis.com/google.rpc.LocalizedMessage', 'locale': 'en-US', 'message': 'API key not valid. Please pass a valid API key.'}]}}
LLM error: 400 INVALID_ARGUMENT. {'error