In [1]:
import json
from pathlib import Path

from gliner import GLiNER
from pydantic import BaseModel

from chunking_utils import get_chunks
from llm_utils import ask_llm
from metadata_utils import get_meta
from nlp_utils import get_entities, get_tags, get_relevant_chunks
from transcript_utils import srt_to_text


class Entity(BaseModel):
    start: int
    end: int
    text: str
    label: str
    score: float


model = GLiNER.from_pretrained("urchade/gliner_base", max_length=768)

Fetching 4 files:   0%|          | 0/4 [00:00<?, ?it/s]



In [None]:
files = [
    file
    for file in sorted(Path("../files/rotl").iterdir())
    if ".srt" in file.name
    if "398" in file.name
]

LLM_MODEL = "qwen2.5:14b"

for file in files:
    file_name, episode_number, episode_date, episode_title = get_meta(file)
    transcript = srt_to_text(file)
    chunks = get_chunks(transcript)
    results = get_entities(chunks, model)

    for entity, data in results.items():
        if entity == "Eleanor":
            labels = data["labels"]
            indexes = data["indexes"]
            relevant_chunks = get_relevant_chunks(chunks, indexes)

            context = "\n".join(relevant_chunks)
            question = f"What do John and Merlin say about {entity}?"
            answer = ask_llm(f"{context}\n\n{question}", model=LLM_MODEL, tokens=500)

            tags = get_tags(answer, model, stopwords=["john", "merlin"])

            info = f"{entity}\n\n{', '.join(labels)}\n\n{', '.join(tags)}\n\n{answer}\n\n{indexes}"

            id = f"{entity}_rotl_{episode_number}"
            metadata = {
                "chunks": indexes,
                "show": "Roderick on the Line",
                "episode": episode_number,
                "title": episode_title,
                "subject": entity,
                "category": labels,
                "tags": tags,
            }

            print(answer)

In the conversation, John mentions Eleanor as his old landlord's wife who didn't want the cats in the house. Specifically, when John describes the situation with the old man feeding the cats, he says:
"John: And she was like, no, it's crazy.
John: There are cats everywhere."
This indicates that Eleanor was against her husband continuing to feed the cats and eventually forbade him from doing so. When the cats left, the rats moved in, leading to a new problem for the old man who then started feeding the rats. This shows that Eleanor played a significant role in the changes that occurred after the cats were no longer fed.


In [None]:
"""
Alyeska

Location

Alaska, Alyeska, paper towels, ski resort

Here's what John and Merlin say about Alyeska:
* **Alyeska is the name of a ski resort where John grew up.**  He says it's a name you might want to give a dog or child, but not in Girdwood, Alaska. 
* **In Girdwood, many dogs are named Alyeska and many are named Max** (after Mount Max's Mountain). John finds this strange and doesn't understand the trend.
* **John's cat is named Alyeska.** He says that if you named your dog Alyeska in Girdwood, they would bury you in a peat bog. 
Let me know if you have any other questions about their conversation!

metadata: {
            "chunks":[5, 6, 7],
            "show":"Roderick on the Line",
            "episode":"389",
            "title":"The New March",
            "subject":"Alyeska",
            "Category":["Location","Person"]
            "tags":[
                    "Alaska", "Alyeska", "Girdwood", "Max"
                    ]
            }

id: rotl_398_Alyeska
"""