# Speech Module Demo Notebook

This notebook mirrors milestone 2: rule-based parser, filler injector, prosody controller, and an integrated pipeline using the current stubs (ChatTTS backend placeholder).

In [None]:
# Notebook setup and imports
import os
import json
from pprint import pprint

from src.speech import (
    PerformanceMarkerParser,
    FillerInjector,
    EmotionController,
    ProsodyController,
    SpeechSynthesizer,
    SynthesisRequest,
)
from src.speech import types as speech_types


: 

## Note on __init__.py formatting
The speech package `__init__.py` has been rewritten to export clean symbols and avoid earlier mixed-language formatting. No action needed in this notebook; we focus on parser/filler/prosody behavior.

In [None]:
# Rule-based parser demo
parser = PerformanceMarkerParser()
script = "今晚聊聊期末周(*tone:吐槽*), 老师总爱说(*stress:复习*)，然后(*pause:0.5*)考试(*role:punch*)？"
parsed = parser.parse(script)
for i, seg in enumerate(parsed.segments):
    print(f"Segment {i}: role={seg.role}, tone={seg.tone_hint}")
    print("  text:", seg.text)
    print("  pauses_before:", [(p.reason, p.duration_s) for p in seg.pauses_before])
    print("  pauses_after:", [(p.reason, p.duration_s) for p in seg.pauses_after])
    print("  stresses:", [(s.token, s.strength) for s in seg.stresses])


In [None]:
# Filler injector demo
injector = FillerInjector(max_fillers=2)
filler_plan = injector.inject(parsed.segments, deterministic=True)
for i, seg in enumerate(filler_plan.segments):
    print(f"Segment {i} (role={seg.role}): {seg.text}")


In [None]:
# Prosody plan demo
emotion_controller = EmotionController()
emotion_plan = emotion_controller.plan(parsed, preferred="吐槽")
prosody_controller = ProsodyController()
prosody_plan = prosody_controller.build_plan(filler_plan.segments, emotion_plan)

for i, inst in enumerate(prosody_plan.instructions):
    print(f"Instruction {i}:")
    print("  text:", inst.text)
    print("  speed_scale:", inst.speed_scale)
    print("  f0_shift_semitones:", inst.f0_shift_semitones)
    print("  energy_scale:", inst.energy_scale)
    print("  pauses_before:", [(p.reason, p.duration_s) for p in inst.pauses_before])
    print("  pauses_after:", [(p.reason, p.duration_s) for p in inst.pauses_after])
    print("  stresses:", [(s.token, s.strength) for s in inst.stresses])


In [None]:
# Integrated pipeline demo
synth = SpeechSynthesizer()
req = SynthesisRequest(script=script, stream=False, evaluate=True, emotion_profile="吐槽")
result, report = synth.synthesize(req)
print("Audio bytes length:", len(result.audio))
print("Sample rate:", result.sample_rate)
if report:
    print("Eval report notes:", report.notes)


In [None]:
# Lightweight asserts
# Parser produces at least one punch segment
assert any(seg.role == "punch" for seg in parsed.segments)
# Filler injector does not exceed max_fillers
assert len([seg for seg in filler_plan.segments if seg.text.startswith(tuple(injector.fillers))]) <= injector.max_fillers
# Prosody plan aligns with segments
assert len(prosody_plan.instructions) == len(filler_plan.segments)
print("Lightweight checks passed.")
