diff --git a/components/db/events.ts b/components/db/events.ts index 6af3881ab..31582c738 100644 --- a/components/db/events.ts +++ b/components/db/events.ts @@ -42,7 +42,11 @@ type SpecialEvent = BaseEvent & { } type SpecialEventContent = BaseContent -type Hearing = BaseEvent & { type: "hearing"; content: HearingContent } +type Hearing = BaseEvent & { + type: "hearing" + content: HearingContent + committeeChairs: string[] +} type HearingContent = BaseContent & { Description: string Name: string diff --git a/functions/src/events/scrapeEvents.ts b/functions/src/events/scrapeEvents.ts index 5398728f5..76c6420ac 100644 --- a/functions/src/events/scrapeEvents.ts +++ b/functions/src/events/scrapeEvents.ts @@ -22,6 +22,7 @@ import { sha256 } from "js-sha256" import { isValidVideoUrl, withinCutoff } from "./helpers" import ffmpeg from "fluent-ffmpeg" import fs from "fs" +import { Committee } from "../committees/types" abstract class EventScraper { private schedule private timeout @@ -297,6 +298,37 @@ const shouldScrapeVideo = async (EventId: number) => { return false } +const loadCommitteeChairNames = async ( + generalCourtNumber: number, + committeeCode: string +) => { + try { + const committeeSnap = await db + .collection(`generalCourts/${generalCourtNumber}/committees`) + .doc(committeeCode) + .get() + + if (!committeeSnap.exists) return [] as string[] + + const { members, content } = Committee.check(committeeSnap.data()) + const chairCodes = new Set() + const maybeHouse = content.HouseChairperson?.MemberCode + const maybeSenate = content.SenateChairperson?.MemberCode + + if (maybeHouse) chairCodes.add(maybeHouse) + if (maybeSenate) chairCodes.add(maybeSenate) + return (members ?? []) + .filter(member => chairCodes.has(member.id)) + .map(member => member.name) + } catch (error) { + console.warn( + `Failed to load committee chairs for ${committeeCode} (${generalCourtNumber}):`, + error + ) + return [] as string[] + } +} + class HearingScraper extends EventScraper { constructor() { super("every 60 minutes", 480, "4GB") @@ -313,6 +345,15 @@ class HearingScraper extends EventScraper { console.log("content in getEvent()", content) + const host = content.HearingHost + const committeeChairs = + host?.CommitteeCode && host?.GeneralCourtNumber + ? await loadCommitteeChairNames( + host.GeneralCourtNumber, + host.CommitteeCode + ) + : undefined + if (await shouldScrapeVideo(EventId)) { try { const maybeVideoUrl = await getHearingVideoUrl(EventId) @@ -338,6 +379,7 @@ class HearingScraper extends EventScraper { ...this.timestamps(content), videoURL: maybeVideoUrl, videoFetchedAt: Timestamp.now(), + committeeChairs, videoTranscriptionId: transcriptId // using the assembly Id as our transcriptionId } as Hearing } @@ -347,6 +389,7 @@ class HearingScraper extends EventScraper { id: `hearing-${EventId}`, type: "hearing", content, + committeeChairs, ...this.timestamps(content) } as Hearing } @@ -355,6 +398,7 @@ class HearingScraper extends EventScraper { id: `hearing-${EventId}`, type: "hearing", content, + committeeChairs, ...this.timestamps(content) } as Hearing } diff --git a/functions/src/events/types.ts b/functions/src/events/types.ts index b3dbeaa27..368d2161d 100644 --- a/functions/src/events/types.ts +++ b/functions/src/events/types.ts @@ -52,20 +52,43 @@ export type Session = Static export const Session = BaseEvent.extend({ type: L("session") }) - +export type HearingLocation = Static +export const HearingLocation = Record({ + AddressLine1: String, + AddressLine2: Nullable(String), + City: String, + LocationName: String, + State: String, + ZipCode: String +}) export type HearingContent = Static export const HearingContent = BaseEventContent.extend({ - RescheduledHearing: Nullable( - Record({ - EventDate: String, - StartTime: String - }) - ), + Description: String, + Name: String, + Status: String, + HearingHost: Record({ + CommitteeCode: String, + GeneralCourtNumber: Number + }), + Location: HearingLocation, HearingAgendas: Array( Record({ DocumentsInAgenda: Array( - Record({ BillNumber: String, GeneralCourtNumber: Number }) + Record({ + BillNumber: String, + GeneralCourtNumber: Number, + PrimarySponsor: Nullable(Record({ Id: String })), + Title: String + }) ), + StartTime: String, + EndTime: String, + Topic: String + }) + ), + RescheduledHearing: Nullable( + Record({ + EventDate: String, StartTime: String }) ) @@ -80,7 +103,8 @@ export const Hearing = BaseEvent.extend({ content: HearingContent, videoURL: Optional(String), videoTranscriptionId: Optional(String), - videoFetchedAt: Optional(InstanceOf(Timestamp)) + videoFetchedAt: Optional(InstanceOf(Timestamp)), + committeeChairs: Array(String) }) export type Event = Static diff --git a/functions/src/hearings/search.ts b/functions/src/hearings/search.ts new file mode 100644 index 000000000..8f568d3f5 --- /dev/null +++ b/functions/src/hearings/search.ts @@ -0,0 +1,95 @@ +import { DateTime } from "luxon" +import { db } from "../firebase" +import { createSearchIndexer } from "../search" +import { Hearing } from "../events/types" +import { timeZone } from "../malegislature" + +type HearingSearchRecord = { + id: string + eventId: number + title: string + description?: string + startsAt: number + month: string + year: number + committeeCode?: string + committeeName?: string + locationName?: string + locationCity?: string + committeeChairs: string[] + agendaTopics: string[] + billNumbers: string[] + billSlugs: string[] + hasVideo: boolean +} + +export const { + syncToSearchIndex: syncHearingToSearchIndex, + upgradeSearchIndex: upgradeHearingSearchIndex +} = createSearchIndexer({ + sourceCollection: db.collection("events").where("type", "==", "hearing"), + documentTrigger: "events/{eventId}", + alias: "hearings", + idField: "id", + filter: data => data.type === "hearing", + schema: { + fields: [ + { name: "eventId", type: "int32", facet: false }, + { name: "title", type: "string", facet: false }, + { name: "description", type: "string", facet: false, optional: true }, + { name: "startsAt", type: "int64", facet: false }, + { name: "month", type: "string", facet: true }, + { name: "year", type: "int32", facet: true }, + { name: "committeeCode", type: "string", facet: true, optional: true }, + { name: "committeeName", type: "string", facet: true, optional: true }, + { name: "locationName", type: "string", facet: false, optional: true }, + { name: "locationCity", type: "string", facet: false, optional: true }, + { + name: "committeeChairs", + type: "string[]", + facet: true + }, + { name: "agendaTopics", type: "string[]", facet: false }, + { name: "billNumbers", type: "string[]", facet: false }, + { name: "billSlugs", type: "string[]", facet: false }, + { name: "hasVideo", type: "bool", facet: true } + ], + default_sorting_field: "startsAt" + }, + convert: data => { + const { + content, + startsAt: startsAtTimestamp, + id, + videoURL, + committeeChairs + } = Hearing.check(data) + const startsAt = startsAtTimestamp.toMillis() + const schedule = DateTime.fromMillis(startsAt, { zone: timeZone }) + const bills = content.HearingAgendas?.flatMap(({ DocumentsInAgenda }) => + DocumentsInAgenda.map(doc => ({ + number: doc.BillNumber, + slug: `${doc.GeneralCourtNumber}/${doc.BillNumber}` + })) + ) + const committeeName = content.Name + return { + id: id, + eventId: content.EventId, + title: committeeName ?? `Hearing ${content.EventId}`, + description: content.Description, + startsAt, + month: schedule.toFormat("LLLL"), + year: schedule.year, + committeeCode: content.HearingHost?.CommitteeCode, + committeeName, + locationName: content.Location?.LocationName, + locationCity: content.Location?.City, + committeeChairs, + agendaTopics: content.HearingAgendas.map(agenda => agenda.Topic), + billNumbers: bills.map(bill => bill.number), + billSlugs: bills.map(bill => bill.slug), + hasVideo: Boolean(videoURL) + } + } +}) diff --git a/functions/src/index.ts b/functions/src/index.ts index 7e7d8d3d0..4d61004de 100644 --- a/functions/src/index.ts +++ b/functions/src/index.ts @@ -16,6 +16,10 @@ export { updateCommitteeRosters } from "./committees" export { scrapeHearings, scrapeSessions, scrapeSpecialEvents } from "./events" +export { + syncHearingToSearchIndex, + upgradeHearingSearchIndex +} from "./hearings/search" export { createMemberSearchIndex, fetchMemberBatch, diff --git a/functions/src/search/SearchIndexer.ts b/functions/src/search/SearchIndexer.ts index 7fc7305ff..3e655c69d 100644 --- a/functions/src/search/SearchIndexer.ts +++ b/functions/src/search/SearchIndexer.ts @@ -4,7 +4,7 @@ import hash from "object-hash" import Collection from "typesense/lib/Typesense/Collection" import { ImportResponse } from "typesense/lib/Typesense/Documents" import { ImportError, ObjectNotFound } from "typesense/lib/Typesense/Errors" -import { db, DocumentSnapshot, QuerySnapshot } from "../firebase" +import { db, DocumentData, DocumentSnapshot, QuerySnapshot } from "../firebase" import { createClient } from "./client" import { CollectionConfig } from "./config" import { z } from "zod" @@ -30,6 +30,17 @@ export class SearchIndexer { this.collectionName = `${config.alias}_${schemaHash}` } + private passesFilter(data: DocumentData | undefined) { + if (!data) return false + if (!this.config.filter) return true + try { + return this.config.filter(data) + } catch (error) { + console.error("Filter function threw", error) + return false + } + } + static upgradePath = (alias: string) => `/search/upgrade-${alias}` async scheduleUpgradeIfNeeded(backfillConfig: unknown) { @@ -56,18 +67,27 @@ export class SearchIndexer { } async syncDocument(change: Change) { - if (!change.after.exists) { - const { id } = this.config.convert(change.before.data()!) - await (await this.getCollection()).documents().delete(id) - } else if (!change.before.exists) { - await (await this.getCollection()) - .documents() - .upsert(this.config.convert(change.after.data()!)) - } else { - const before = this.config.convert(change.before.data()!) - const after = this.config.convert(change.after.data()!) - if (!isEqual(before, after)) - await (await this.getCollection()).documents().upsert(after) + const beforeData = change.before.exists ? change.before.data() : undefined + const afterData = change.after.exists ? change.after.data() : undefined + + // if no data or doesn't match filter, delete from index + if (!afterData || !this.passesFilter(afterData)) { + if (beforeData && this.passesFilter(beforeData)) { + const { id } = this.config.convert(beforeData) + await (await this.getCollection()).documents().delete(id) + } + return + } + + const after = this.config.convert(afterData) + + // update if previous data doesn't exist, didn't match, or if the converted data changed + if ( + !beforeData || + !this.passesFilter(beforeData) || + !isEqual(this.config.convert(beforeData), after) + ) { + await (await this.getCollection()).documents().upsert(after) } } @@ -107,7 +127,9 @@ export class SearchIndexer { const docs = batch.reduce((acc, d) => { try { - const doc = convert(d.data()) + const data = d.data() + if (!this.passesFilter(data)) return acc + const doc = convert(data) acc.push(doc) } catch (error: any) { console.error(`Failed to convert document: ${error.message}`) diff --git a/functions/src/search/config.ts b/functions/src/search/config.ts index a11c1d44e..e17da089e 100644 --- a/functions/src/search/config.ts +++ b/functions/src/search/config.ts @@ -11,6 +11,7 @@ export type CollectionConfig = { readonly documentTrigger: string readonly idField: string readonly convert: (data: DocumentData) => T + readonly filter?: (data: DocumentData) => boolean } const registered: CollectionConfig[] = []