In [3]:
import re


def clean_response(text):
    lines = re.sub(r"\n+", "\n", text).splitlines()
    clean_lines = [line for line in lines if "Let me know if you" not in line]
    return "\n".join(clean_lines)

In [4]:
from pathlib import Path
import sys

from ollama import Client

parent_dir = str(Path().resolve().parents[0])
sys.path.insert(0, parent_dir)

episodes = [
    "030",
    "051",
    "061",
    "069",
    "098",
    "118",
    "186",
    "240",
    "298",
    "398",
    "405",
    "430",
]

out_dir = Path("../files/chat/roadwork")
out_dir.mkdir(exist_ok=True)

files_dir = Path("../files/text/roadwork")

files = [
    file
    for file in sorted(files_dir.iterdir())
    # if any(episode in str(file) for episode in episodes)
]

client = Client(host="https://mlkyway.anselbrandt.net/ollama")

questions = [
    "What are the names mentioned in this conversation and who are they?",
    "What placenames are mentioned in this conversation?",
    "What musicians, bands or songs are mentioned in this conversation?",
    "What books or novels are mentioned in this conversation and who are the authors of those books?",
    "What are the distinct segments of conversation?",
]

models = [
    "gemma2:27b",
]

for file in files:
    text = open(file, "r").read()
    for model in models:
        responses = []
        for question in questions:
            content = f"{text}\n\n{question}"
            response = client.chat(
                model=model,
                messages=[
                    {
                        "role": "user",
                        "content": content,
                    },
                ],
            )
            content = response["message"]["content"]
            responses.append(clean_response(content))
        metadata = "\n\n".join(responses)
        file_name = file.stem + ".md"
        out_path = out_dir / file_name
        with open(out_path, "w") as f:
            f.write(metadata)