In [1]:
import os, sys

import json

import numpy as np

from treeseg import TreeSeg


In [2]:
import nest_asyncio
nest_asyncio.apply()

In [3]:
transcript = [
    {'speaker': 'Alice', 'composite': 'Okay team, let\'s kick off the weekly sync. First agenda item is the Q3 roadmap planning.'},
    {'speaker': 'Bob', 'composite': 'Right. I\'ve drafted the initial proposal based on the feedback from the product team.'},
    {'speaker': 'Alice', 'composite': 'Great. Can you share the highlights? We need to finalize the key initiatives this week.'},
    {'speaker': 'Bob', 'composite': 'Sure. The main pillars are customer acquisition, platform stability, and launching the new mobile feature.'},
    {'speaker': 'Charlie', 'composite': 'On platform stability, I wanted to raise an issue regarding the recent deployment.'},
    {'speaker': 'Charlie', 'composite': 'We saw a spike in error rates after the update went live Tuesday.'},
    {'speaker': 'Alice', 'composite': 'Okay, thanks Charlie. Let\'s make that the next discussion point after Bob finishes the roadmap overview.'},
    {'speaker': 'Bob', 'composite': 'Okay, back to the roadmap. For customer acquisition, we\'re planning two major campaigns...'}
    # ... more utterances
]

In [4]:
import os
EMBEDDINGS_HEADERS = {
        "Content-Type": "application/json",
        "Authorization": "Bearer " + os.getenv("OPENAI_API_KEY",""),
    }


In [5]:
config =   {
        "MIN_SEGMENT_SIZE": 2,
        "LAMBDA_BALANCE": 0,
        "UTTERANCE_EXPANSION_WIDTH": 2,
        "EMBEDDINGS_HEADERS": EMBEDDINGS_HEADERS,
        "EMBEDDINGS_TYPE": "ollama",  # or "openai"
        "EMBEDDINGS_MODEL": "nomic-embed-text",  # or "text-embedding-ada-002" for openai         
        "EMBEDDINGS_ENDPOINT": os.getenv("OLLAMA_HOST","")   # "https://api.openai.com/v1/embeddings"
    }


In [6]:
segmenter = TreeSeg(configs=config, entries=transcript)

[2m2025-04-18 07:51:08[0m [[32m[1minfo     [0m] [1mProcess blocks embeddings     [0m
[2m2025-04-18 07:51:08[0m [[32m[1minfo     [0m] [1mExtracting 8 embeddings in 1 batches.[0m
[2m2025-04-18 07:51:09[0m [[32m[1minfo     [0m] [1mI have collected 8/8 embeddings.[0m


In [7]:
print(segmenter.entries)

[{'speaker': 'Alice', 'composite': "Okay team, let's kick off the weekly sync. First agenda item is the Q3 roadmap planning."}, {'speaker': 'Bob', 'composite': "Right. I've drafted the initial proposal based on the feedback from the product team."}, {'speaker': 'Alice', 'composite': 'Great. Can you share the highlights? We need to finalize the key initiatives this week.'}, {'speaker': 'Bob', 'composite': 'Sure. The main pillars are customer acquisition, platform stability, and launching the new mobile feature.'}, {'speaker': 'Charlie', 'composite': 'On platform stability, I wanted to raise an issue regarding the recent deployment.'}, {'speaker': 'Charlie', 'composite': 'We saw a spike in error rates after the update went live Tuesday.'}, {'speaker': 'Alice', 'composite': "Okay, thanks Charlie. Let's make that the next discussion point after Bob finishes the roadmap overview."}, {'speaker': 'Bob', 'composite': "Okay, back to the roadmap. For customer acquisition, we're planning two majo

In [8]:
segments = segmenter.segment_meeting(3)

[2m2025-04-18 07:51:17[0m [[32m[1minfo     [0m] [1mLoss reduction: 1.8871057994539147=>0.9288287069882675 | 103.17048614624179%[0m
[2m2025-04-18 07:51:17[0m [[32m[1minfo     [0m] [1mLoss reduction: 0.9288287069882675=>0.48232831871665927 | 92.57187914232794%[0m


In [None]:
print(segmenter.blocks)


In [9]:
print(segments)


[0, 0, 0, 1, 0, 1, 0, 0]
