In [1]:
import json
from pathlib import Path

from cdp_backend.pipeline.transcript_model import SectionAnnotation, Transcript, TranscriptAnnotations

###############################################################################
# Parameters

# Set ANNOTATION_TARGET to the transcript / metadata prefix you want to annotate
ANNOTATION_TARGET = "000ab33594c1"

###############################################################################
# Globals

UNANNOTATED_DATASET = Path("unannotated").resolve(strict=True)
ANNOTATED_DATASET = Path("annotated").resolve()
ANNOTATED_DATASET.mkdir(exist_ok=True)

TARGET_METADATA_PATH = UNANNOTATED_DATASET / f"{ANNOTATION_TARGET}-metadata.json"
with open(TARGET_METADATA_PATH, "r") as open_target_metadata_file:
    TARGET_METADATA = json.load(open_target_metadata_file)
    
UNANNOTATED_TRANSCRIPT_PATH = UNANNOTATED_DATASET / TARGET_METADATA["transcript_name"]
with open(UNANNOTATED_TRANSCRIPT_PATH, "r") as open_unannotated_transcript_file:
    UNANNOTATED_TRANSCRIPT = Transcript.from_json(open_unannotated_transcript_file.read())

In [2]:
from typing import List, NamedTuple, Optional

class SectionTarget(NamedTuple):
    index: int
    name: str
    description: Optional[str]

section_targets: List[SectionTarget] = []
for i, emi in enumerate(sorted(TARGET_METADATA["event_minutes_items"], key=lambda emi: emi["index"])):
    section_targets.append(
        SectionTarget(
            index=i,
            name=emi["minutes_item_ref"]["name"],
            description=emi["minutes_item_ref"]["description"]
        )
    )
        
section_targets = sorted(section_targets, key=lambda t: t.index)
section_targets

[SectionTarget(index=0, name='Call To Order', description=None),
 SectionTarget(index=1, name='Approval of the Agenda', description=None),
 SectionTarget(index=2, name='Public Comment', description=None),
 SectionTarget(index=3, name='CB 120148', description='AN ORDINANCE relating to parking enforcement; amending Ordinance 126237, which adopted the 2021 Budget; transferring positions out of the Seattle Police Department; and ratifying and confirming certain prior acts.'),
 SectionTarget(index=4, name='Inf 1861', description='Community Safety Capacity Building RFP'),
 SectionTarget(index=5, name='Inf 1862', description='Seattle Police Department (SPD) Quarterly Finance and Staffing Report'),
 SectionTarget(index=6, name='Adjournment', description=None)]

In [3]:
# Add your annotations here

UNANNOTATED_TRANSCRIPT.annotations = {}
UNANNOTATED_TRANSCRIPT.annotations[TranscriptAnnotations.sections.name] = [
    SectionAnnotation(
        name="Call to Order",
        start_sentence_index=0,
        end_sentence_index=15,
        generator="Jackson Maxfield Brown"
    ),
    SectionAnnotation(
        name="Approval of the Agenda",
        start_sentence_index=16,
        end_sentence_index=23,
        generator="Jackson Maxfield Brown",
    ),
    SectionAnnotation(
        name="Public Comment",
        start_sentence_index=24,
        end_sentence_index=548,
        generator="Jackson Maxfield Brown",
    ),
    SectionAnnotation(
        name="CB 120148",
        start_sentence_index=549,
        end_sentence_index=662,
        generator="Jackson Maxfield Brown",
        description="AN ORDINANCE relating to parking enforcement; amending Ordinance 126237, which adopted the 2021 Budget; transferring positions out of the Seattle Police Department; and ratifying and confirming certain prior acts.",
    ),
    SectionAnnotation(
        name="Inf 1861",
        start_sentence_index=663,
        end_sentence_index=876,
        generator="Jackson Maxfield Brown",
        description="Community Safety Capacity Building RFP",
    ),
    SectionAnnotation(
        name="Inf 1862",
        start_sentence_index=878,
        end_sentence_index=1795,
        generator="Jackson Maxfield Brown",
        description="Seattle Police Department (SPD) Quarterly Finance and Staffing Report",
    ),
    SectionAnnotation(
        name="Adjournment",
        start_sentence_index=1796,
        end_sentence_index=1798,
        generator="Jackson Maxfield Brown",
    ),
]

In [4]:
# Sanity check sections
for section_anno in UNANNOTATED_TRANSCRIPT.annotations[TranscriptAnnotations.sections.name]:
    print(section_anno.name)
    print("start --", UNANNOTATED_TRANSCRIPT.sentences[section_anno.start_sentence_index].text[:100])
    print("end   --", UNANNOTATED_TRANSCRIPT.sentences[section_anno.end_sentence_index].text[:100])
    print("-" * 80)

Call to Order
start -- Meeting of the public safety and human services committee will come to order.
end   -- Thank you so much.
--------------------------------------------------------------------------------
Approval of the Agenda
start -- On today's committee agenda, we will hear three items.
end   -- [inaudible] hearing no objection, today's agenda [inaudible]. I will moderate the public comment per
--------------------------------------------------------------------------------
Public Comment
start -- Each Speaker will be given--well there are 39 speakers.
end   -- Thank you, everybody who was able to join us.
--------------------------------------------------------------------------------
CB 120148
start -- Will the clerk please read the first agenda item into the record?
end   -- Thank you so much, the bill will move forward on the August 16th meeting.
--------------------------------------------------------------------------------
Inf 1861
start -- Will the clerk please read in

In [5]:
# Store to annotated dataset dir
ANNOTATED_TRANSCRIPT_PATH = ANNOTATED_DATASET / TARGET_METADATA["transcript_name"]
with open(ANNOTATED_TRANSCRIPT_PATH, "w") as open_annotated_transcript_file:
    open_annotated_transcript_file.write(UNANNOTATED_TRANSCRIPT.to_json())