In [10]:
import json
from pathlib import Path

from cdp_backend.database import models as db_models
from cdp_backend.pipeline.transcript_model import SectionAnnotation, Transcript, TranscriptAnnotations
import fireo
from gcsfs import GCSFileSystem
from google.auth.credentials import AnonymousCredentials
from google.cloud.firestore import Client

# Connect to the database
fireo.connection(client=Client(
    project="cdp-seattle-staging-dbengvtn",
    credentials=AnonymousCredentials()
))

# Get twenty events
events = list(db_models.Event.collection.fetch(50))
len(events)

37

In [13]:
for event in events:
    sessions = list(db_models.Session.collection.filter("event_ref", "==", event.key).fetch())
    for session in sessions:
        print(session)
        transcript = db_models.Transcript.collection.order("-confidence").filter("session_ref", "==", session.key).get()
        print(transcript)

<cdp_backend.database.models.Session object at 0x7f91584072e0>
<cdp_backend.database.models.Transcript object at 0x7f9130327580>
<cdp_backend.database.models.Session object at 0x7f91127a7100>
<cdp_backend.database.models.Transcript object at 0x7f9112858bb0>
<cdp_backend.database.models.Session object at 0x7f9112858790>
<cdp_backend.database.models.Transcript object at 0x7f9112858460>
<cdp_backend.database.models.Session object at 0x7f91126ed9d0>
<cdp_backend.database.models.Transcript object at 0x7f9112858520>
<cdp_backend.database.models.Session object at 0x7f91126f7250>
<cdp_backend.database.models.Transcript object at 0x7f91126ed0a0>
<cdp_backend.database.models.Session object at 0x7f91126f7790>
<cdp_backend.database.models.Transcript object at 0x7f91126f7580>
<cdp_backend.database.models.Session object at 0x7f91126f7a00>
<cdp_backend.database.models.Transcript object at 0x7f9130361ee0>
<cdp_backend.database.models.Session object at 0x7f91126f73a0>
<cdp_backend.database.models.Trans

In [2]:
from typing import List, NamedTuple, Optional

class SectionTarget(NamedTuple):
    index: int
    name: str
    description: Optional[str]

section_targets: List[SectionTarget] = []
for i, emi in enumerate(sorted(TARGET_METADATA["event_minutes_items"], key=lambda emi: emi["index"])):
    section_targets.append(
        SectionTarget(
            index=i,
            name=emi["minutes_item_ref"]["name"],
            description=emi["minutes_item_ref"]["description"]
        )
    )
        
section_targets = sorted(section_targets, key=lambda t: t.index)
section_targets

[SectionTarget(index=0, name='Call To Order', description=None),
 SectionTarget(index=1, name='Approval of the Agenda', description=None),
 SectionTarget(index=2, name="Chair's Report", description=None),
 SectionTarget(index=3, name='Public Comment', description=None),
 SectionTarget(index=4, name='Inf 1843', description='Seattle Public Library 2020 Levy Report'),
 SectionTarget(index=5, name='Inf 1844', description='Seattle Public Library 2020 RSJI Report'),
 SectionTarget(index=6, name='Inf 1845', description='Seattle Center 2020 RSJI Report'),
 SectionTarget(index=7, name='Inf 1846', description='Seattle Parks and Recreation 2020 RSJI Report'),
 SectionTarget(index=8, name='Adjournment', description=None)]

In [3]:
# Add your annotations here

UNANNOTATED_TRANSCRIPT.annotations = {}
UNANNOTATED_TRANSCRIPT.annotations = TranscriptAnnotations(sections=[
    SectionAnnotation(
        name="Call to Order",
        start_sentence_index=0,
        stop_sentence_index=8,
        generator="Jackson Maxfield Brown"
    ),
    SectionAnnotation(
        name="Approval of the Agenda",
        start_sentence_index=8,
        stop_sentence_index=10,
        generator="Jackson Maxfield Brown",
    ),
    SectionAnnotation(
        name="Chair's Report",
        start_sentence_index=10,
        stop_sentence_index=22,
        generator="Jackson Maxfield Brown",
    ),
    SectionAnnotation(
        name="Public Comment",
        start_sentence_index=22,
        stop_sentence_index=60,
        generator="Jackson Maxfield Brown",
    ),
    SectionAnnotation(
        name="Inf 1843",
        start_sentence_index=60,
        stop_sentence_index=168,
        generator="Jackson Maxfield Brown",
        description="Seattle Public Library 2020 Levy Report",
    ),
    SectionAnnotation(
        name="Inf 1844",
        start_sentence_index=168,
        stop_sentence_index=325,
        generator="Jackson Maxfield Brown",
        description="Seattle Public Library 2020 RSJI Report",
    ),
    SectionAnnotation(
        name="Inf 1845",
        start_sentence_index=325,
        stop_sentence_index=436,
        generator="Jackson Maxfield Brown",
        description="Seattle Center 2020 RSJI Report",
    ),
    SectionAnnotation(
        name="Inf 1846",
        start_sentence_index=436,
        stop_sentence_index=584,
        generator="Jackson Maxfield Brown",
        description="Seattle Parks and Recreation 2020 RSJI Report",
    ),
    SectionAnnotation(
        name="Adjournment",
        start_sentence_index=584,
        stop_sentence_index=len(UNANNOTATED_TRANSCRIPT.sentences),
        generator="Jackson Maxfield Brown",
    ),
])

In [4]:
# Sanity check sections
for section_anno in UNANNOTATED_TRANSCRIPT.annotations.sections:
    print(section_anno.name)
    section = UNANNOTATED_TRANSCRIPT.sentences[section_anno.start_sentence_index:section_anno.stop_sentence_index]
    print("start --", section[0].text[:100])
    print("end   --", section[-1].text[:100])
    print("-" * 80)

Call to Order
start -- I am Council member Juarez, chair of the committee, will the clerk please call the role.
end   -- Council member Mosqueda is excused.
--------------------------------------------------------------------------------
Approval of the Agenda
start -- Hearing no objection.
end   -- The agenda is adopted.
--------------------------------------------------------------------------------
Chair's Report
start -- Let's move on to the chair's report.
end   -- At this time we will open the remote public comment period.
--------------------------------------------------------------------------------
Public Comment
start -- It remains the strong intent of the Seattle city council have public comment regularly included on m
end   -- Moving on to items of business.
--------------------------------------------------------------------------------
Inf 1843
start -- Nagine can you read item one in the record.
end   -- Any questions for my colleagues? seeing no questions, is there any

In [5]:
# Store to annotated dataset dir
ANNOTATED_TRANSCRIPT_PATH = ANNOTATED_DATASET / TARGET_METADATA["transcript_name"]
with open(ANNOTATED_TRANSCRIPT_PATH, "w") as open_annotated_transcript_file:
    open_annotated_transcript_file.write(UNANNOTATED_TRANSCRIPT.to_json())