In [11]:
import re

from dataclasses import dataclass
from freespeech.types import Character, Language
from freespeech.lib import text, speech


@dataclass(frozen=True)
class Interval:
    speech_ms: int
    pause_ms: int
    speech_rate: float
    character: Character
    chunks: list[str | int]


def merge(a: Interval, b: Interval) -> Interval:
    speech_ms = a.speech_ms + b.speech_ms
    speech_rate = (a.speech_rate * a.speech_ms + b.speech_rate * b.speech_ms) / speech_ms
    pause_ms = a.pause_ms + b.pause_ms

    if a.character != b.character:
        raise ValueError(f"character in a and b should be the same")

    return Interval(
        pause_ms=pause_ms,
        speech_ms=speech_ms,
        speech_rate=speech_rate,
        chunks=a.chunks + b.chunks,
        character=a.character
    )


def adjust(a: Interval, base_rate: float) -> Interval:
    speech_ms = min(
            a.speech_ms + a.pause_ms,  # can't be longer than total duration
            a.speech_ms * (a.speech_rate / base_rate)
        )
    pause_ms = (a.speech_ms + a.pause_ms) - speech_ms
    speech_rate = a.speech_rate * (a.speech_ms / speech_ms)

    return Interval(
        speech_ms=round(speech_ms),
        pause_ms=round(pause_ms),
        speech_rate=speech_rate,
        character=a.character,
        chunks=[
            chunk if isinstance(chunk, str) else round(chunk * (pause_ms / a.pause_ms))
            for chunk in a.chunks
        ]
    )


def chunk_paragraph(s: str, lang: Language) -> list[str | int]:
    sentences = text.sentences(s, lang=lang)
    outline = sum([list(reversed(speech.extract_pauses(sentence))) for sentence in sentences], [])
    outline = [item.strip() if isinstance(item, str) else item for item in outline]
    return [item for item in outline if item]


In [17]:
chunk_paragraph("#1.0# Hello world! #1.0#", lang="en-US")

[1500, 'Hello  world!', 1000]

In [30]:
from itertools import permutations


def all_merges(intervals: list[Interval], n: int) -> list[tuple[bool]]:
    return sum((
        list(set(permutations([True] * i + [False] * (len(intervals) - i - 1))))
        for i in range(n)), [])

In [31]:
def merge_intervals(intervals: list[Interval], flags: tuple[bool]) -> list[Interval]:
    intervals = intervals.copy()
    acc = [intervals.pop(0)]

    for interval, should_merge in zip(intervals, flags):
        if not should_merge:
            acc += [interval]
        else:
            acc += [merge(interval, acc.pop())]

    return acc

In [32]:
solutions = [merge_intervals(intervals, flags) for flags in all_merges(intervals, len(intervals))]

In [33]:
solutions

[[Interval(speech_ms=10, pause_ms=0, speech_rate=1.0),
  Interval(speech_ms=10, pause_ms=0, speech_rate=2.0),
  Interval(speech_ms=20, pause_ms=10, speech_rate=1.0),
  Interval(speech_ms=20, pause_ms=0, speech_rate=2.0)],
 [Interval(speech_ms=20, pause_ms=0, speech_rate=1.5),
  Interval(speech_ms=20, pause_ms=10, speech_rate=1.0),
  Interval(speech_ms=20, pause_ms=0, speech_rate=2.0)],
 [Interval(speech_ms=10, pause_ms=0, speech_rate=1.0),
  Interval(speech_ms=10, pause_ms=0, speech_rate=2.0),
  Interval(speech_ms=40, pause_ms=10, speech_rate=1.5)],
 [Interval(speech_ms=10, pause_ms=0, speech_rate=1.0),
  Interval(speech_ms=30, pause_ms=10, speech_rate=1.3333333333333333),
  Interval(speech_ms=20, pause_ms=0, speech_rate=2.0)],
 [Interval(speech_ms=20, pause_ms=0, speech_rate=1.5),
  Interval(speech_ms=40, pause_ms=10, speech_rate=1.5)],
 [Interval(speech_ms=40, pause_ms=10, speech_rate=1.25),
  Interval(speech_ms=20, pause_ms=0, speech_rate=2.0)],
 [Interval(speech_ms=10, pause_ms=0, 

In [35]:
[[adjust(interval, base_rate=1.3) for interval in intervals] for intervals in solutions]

[[Interval(speech_ms=8, pause_ms=2, speech_rate=1.3),
  Interval(speech_ms=10, pause_ms=0, speech_rate=2.0),
  Interval(speech_ms=15, pause_ms=15, speech_rate=1.3),
  Interval(speech_ms=20, pause_ms=0, speech_rate=2.0)],
 [Interval(speech_ms=20, pause_ms=0, speech_rate=1.5),
  Interval(speech_ms=15, pause_ms=15, speech_rate=1.3),
  Interval(speech_ms=20, pause_ms=0, speech_rate=2.0)],
 [Interval(speech_ms=8, pause_ms=2, speech_rate=1.3),
  Interval(speech_ms=10, pause_ms=0, speech_rate=2.0),
  Interval(speech_ms=46, pause_ms=4, speech_rate=1.3000000000000003)],
 [Interval(speech_ms=8, pause_ms=2, speech_rate=1.3),
  Interval(speech_ms=31, pause_ms=9, speech_rate=1.3),
  Interval(speech_ms=20, pause_ms=0, speech_rate=2.0)],
 [Interval(speech_ms=20, pause_ms=0, speech_rate=1.5),
  Interval(speech_ms=46, pause_ms=4, speech_rate=1.3000000000000003)],
 [Interval(speech_ms=38, pause_ms=12, speech_rate=1.3),
  Interval(speech_ms=20, pause_ms=0, speech_rate=2.0)],
 [Interval(speech_ms=8, pause

In [24]:
merge(
    Interval(10, 0, 1.0),
    Interval(10, 10, 2.0),
)

Interval(speech_ms=20, pause_ms=10, speech_rate=1.5)

In [18]:
adjust_speech_rate(Interval(5, 10, 2.0), 1.0)

Interval(signal_ms=10, silence_ms=5, speech_rate=1.0)