Skip to content
Open
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
6 changes: 5 additions & 1 deletion components/db/events.ts
Original file line number Diff line number Diff line change
Expand Up @@ -42,7 +42,11 @@ type SpecialEvent = BaseEvent & {
}
type SpecialEventContent = BaseContent

type Hearing = BaseEvent & { type: "hearing"; content: HearingContent }
type Hearing = BaseEvent & {
type: "hearing"
content: HearingContent
committeeChairs: string[]
}
type HearingContent = BaseContent & {
Description: string
Name: string
Expand Down
44 changes: 44 additions & 0 deletions functions/src/events/scrapeEvents.ts
Original file line number Diff line number Diff line change
Expand Up @@ -22,6 +22,7 @@ import { sha256 } from "js-sha256"
import { isValidVideoUrl, withinCutoff } from "./helpers"
import ffmpeg from "fluent-ffmpeg"
import fs from "fs"
import { Committee } from "../committees/types"
abstract class EventScraper<ListItem, Event extends BaseEvent> {
private schedule
private timeout
Expand Down Expand Up @@ -297,6 +298,37 @@ const shouldScrapeVideo = async (EventId: number) => {
return false
}

const loadCommitteeChairNames = async (
generalCourtNumber: number,
committeeCode: string
) => {
try {
const committeeSnap = await db
.collection(`generalCourts/${generalCourtNumber}/committees`)
.doc(committeeCode)
.get()

if (!committeeSnap.exists) return [] as string[]

const { members, content } = Committee.check(committeeSnap.data())
const chairCodes = new Set<string>()
const maybeHouse = content.HouseChairperson?.MemberCode
const maybeSenate = content.SenateChairperson?.MemberCode

if (maybeHouse) chairCodes.add(maybeHouse)
if (maybeSenate) chairCodes.add(maybeSenate)
return (members ?? [])
.filter(member => chairCodes.has(member.id))
.map(member => member.name)
} catch (error) {
console.warn(
`Failed to load committee chairs for ${committeeCode} (${generalCourtNumber}):`,
error
)
return [] as string[]
}
}

class HearingScraper extends EventScraper<HearingListItem, Hearing> {
constructor() {
super("every 60 minutes", 480, "4GB")
Expand All @@ -313,6 +345,15 @@ class HearingScraper extends EventScraper<HearingListItem, Hearing> {

console.log("content in getEvent()", content)

const host = content.HearingHost
const committeeChairs =
host?.CommitteeCode && host?.GeneralCourtNumber
? await loadCommitteeChairNames(
host.GeneralCourtNumber,
host.CommitteeCode
)
: undefined

if (await shouldScrapeVideo(EventId)) {
try {
const maybeVideoUrl = await getHearingVideoUrl(EventId)
Expand All @@ -338,6 +379,7 @@ class HearingScraper extends EventScraper<HearingListItem, Hearing> {
...this.timestamps(content),
videoURL: maybeVideoUrl,
videoFetchedAt: Timestamp.now(),
committeeChairs,
videoTranscriptionId: transcriptId // using the assembly Id as our transcriptionId
} as Hearing
}
Expand All @@ -347,6 +389,7 @@ class HearingScraper extends EventScraper<HearingListItem, Hearing> {
id: `hearing-${EventId}`,
type: "hearing",
content,
committeeChairs,
...this.timestamps(content)
} as Hearing
}
Expand All @@ -355,6 +398,7 @@ class HearingScraper extends EventScraper<HearingListItem, Hearing> {
id: `hearing-${EventId}`,
type: "hearing",
content,
committeeChairs,
...this.timestamps(content)
} as Hearing
}
Expand Down
42 changes: 33 additions & 9 deletions functions/src/events/types.ts
Original file line number Diff line number Diff line change
Expand Up @@ -52,20 +52,43 @@ export type Session = Static<typeof Session>
export const Session = BaseEvent.extend({
type: L("session")
})

export type HearingLocation = Static<typeof HearingLocation>
export const HearingLocation = Record({
AddressLine1: String,
AddressLine2: Nullable(String),
City: String,
LocationName: String,
State: String,
ZipCode: String
})
export type HearingContent = Static<typeof HearingContent>
export const HearingContent = BaseEventContent.extend({
RescheduledHearing: Nullable(
Record({
EventDate: String,
StartTime: String
})
),
Description: String,
Name: String,
Status: String,
HearingHost: Record({
CommitteeCode: String,
GeneralCourtNumber: Number
}),
Location: HearingLocation,
HearingAgendas: Array(
Record({
DocumentsInAgenda: Array(
Record({ BillNumber: String, GeneralCourtNumber: Number })
Record({
BillNumber: String,
GeneralCourtNumber: Number,
PrimarySponsor: Nullable(Record({ Id: String })),
Title: String
})
),
StartTime: String,
EndTime: String,
Topic: String
})
),
RescheduledHearing: Nullable(
Record({
EventDate: String,
StartTime: String
})
)
Expand All @@ -80,7 +103,8 @@ export const Hearing = BaseEvent.extend({
content: HearingContent,
videoURL: Optional(String),
videoTranscriptionId: Optional(String),
videoFetchedAt: Optional(InstanceOf(Timestamp))
videoFetchedAt: Optional(InstanceOf(Timestamp)),
committeeChairs: Array(String)
})

export type Event = Static<typeof Event>
Expand Down
95 changes: 95 additions & 0 deletions functions/src/hearings/search.ts
Original file line number Diff line number Diff line change
@@ -0,0 +1,95 @@
import { DateTime } from "luxon"
import { db } from "../firebase"
import { createSearchIndexer } from "../search"
import { Hearing } from "../events/types"
import { timeZone } from "../malegislature"

type HearingSearchRecord = {
id: string
eventId: number
title: string
description?: string
startsAt: number
month: string
year: number
committeeCode?: string
committeeName?: string
locationName?: string
locationCity?: string
committeeChairs: string[]
agendaTopics: string[]
billNumbers: string[]
billSlugs: string[]
hasVideo: boolean
}

export const {
syncToSearchIndex: syncHearingToSearchIndex,
upgradeSearchIndex: upgradeHearingSearchIndex
} = createSearchIndexer<HearingSearchRecord>({
sourceCollection: db.collection("events").where("type", "==", "hearing"),
documentTrigger: "events/{eventId}",
alias: "hearings",
idField: "id",
filter: data => data.type === "hearing",
Copy link
Collaborator Author

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

we only process events of type hearing

schema: {
fields: [
{ name: "eventId", type: "int32", facet: false },
{ name: "title", type: "string", facet: false },
{ name: "description", type: "string", facet: false, optional: true },
{ name: "startsAt", type: "int64", facet: false },
{ name: "month", type: "string", facet: true },
{ name: "year", type: "int32", facet: true },
{ name: "committeeCode", type: "string", facet: true, optional: true },
{ name: "committeeName", type: "string", facet: true, optional: true },
{ name: "locationName", type: "string", facet: false, optional: true },
{ name: "locationCity", type: "string", facet: false, optional: true },
{
name: "committeeChairs",
type: "string[]",
facet: true
},
{ name: "agendaTopics", type: "string[]", facet: false },
{ name: "billNumbers", type: "string[]", facet: false },
{ name: "billSlugs", type: "string[]", facet: false },
{ name: "hasVideo", type: "bool", facet: true }
],
default_sorting_field: "startsAt"
},
convert: data => {
const {
content,
startsAt: startsAtTimestamp,
id,
videoURL,
committeeChairs
} = Hearing.check(data)
const startsAt = startsAtTimestamp.toMillis()
const schedule = DateTime.fromMillis(startsAt, { zone: timeZone })
const bills = content.HearingAgendas?.flatMap(({ DocumentsInAgenda }) =>
DocumentsInAgenda.map(doc => ({
number: doc.BillNumber,
slug: `${doc.GeneralCourtNumber}/${doc.BillNumber}`
Comment on lines +71 to +72
Copy link
Collaborator Author

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

saving both the bill number and the slug for a more ergonomic/performant browse hearings page later on.

}))
)
const committeeName = content.Name
return {
id: id,
eventId: content.EventId,
title: committeeName ?? `Hearing ${content.EventId}`,
description: content.Description,
startsAt,
month: schedule.toFormat("LLLL"),
year: schedule.year,
committeeCode: content.HearingHost?.CommitteeCode,
committeeName,
locationName: content.Location?.LocationName,
locationCity: content.Location?.City,
committeeChairs,
agendaTopics: content.HearingAgendas.map(agenda => agenda.Topic),
billNumbers: bills.map(bill => bill.number),
billSlugs: bills.map(bill => bill.slug),
hasVideo: Boolean(videoURL)
}
}
})
4 changes: 4 additions & 0 deletions functions/src/index.ts
Original file line number Diff line number Diff line change
Expand Up @@ -16,6 +16,10 @@ export {
updateCommitteeRosters
} from "./committees"
export { scrapeHearings, scrapeSessions, scrapeSpecialEvents } from "./events"
export {
syncHearingToSearchIndex,
upgradeHearingSearchIndex
} from "./hearings/search"
export {
createMemberSearchIndex,
fetchMemberBatch,
Expand Down
50 changes: 36 additions & 14 deletions functions/src/search/SearchIndexer.ts
Copy link
Collaborator Author

@jicruz96 jicruz96 Oct 4, 2025

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

we create the helper .passesFilter() and refactor .syncDocument() to ensure it filters out events we don't want to include in search index

Original file line number Diff line number Diff line change
Expand Up @@ -4,7 +4,7 @@ import hash from "object-hash"
import Collection from "typesense/lib/Typesense/Collection"
import { ImportResponse } from "typesense/lib/Typesense/Documents"
import { ImportError, ObjectNotFound } from "typesense/lib/Typesense/Errors"
import { db, DocumentSnapshot, QuerySnapshot } from "../firebase"
import { db, DocumentData, DocumentSnapshot, QuerySnapshot } from "../firebase"
import { createClient } from "./client"
import { CollectionConfig } from "./config"
import { z } from "zod"
Expand All @@ -30,6 +30,17 @@ export class SearchIndexer {
this.collectionName = `${config.alias}_${schemaHash}`
}

private passesFilter(data: DocumentData | undefined) {
if (!data) return false
if (!this.config.filter) return true
try {
return this.config.filter(data)
} catch (error) {
console.error("Filter function threw", error)
return false
}
}

static upgradePath = (alias: string) => `/search/upgrade-${alias}`

async scheduleUpgradeIfNeeded(backfillConfig: unknown) {
Expand All @@ -56,18 +67,27 @@ export class SearchIndexer {
}

async syncDocument(change: Change<DocumentSnapshot>) {
if (!change.after.exists) {
const { id } = this.config.convert(change.before.data()!)
await (await this.getCollection()).documents().delete(id)
} else if (!change.before.exists) {
await (await this.getCollection())
.documents()
.upsert(this.config.convert(change.after.data()!))
} else {
const before = this.config.convert(change.before.data()!)
const after = this.config.convert(change.after.data()!)
if (!isEqual(before, after))
await (await this.getCollection()).documents().upsert(after)
const beforeData = change.before.exists ? change.before.data() : undefined
const afterData = change.after.exists ? change.after.data() : undefined

// if no data or doesn't match filter, delete from index
if (!afterData || !this.passesFilter(afterData)) {
if (beforeData && this.passesFilter(beforeData)) {
const { id } = this.config.convert(beforeData)
await (await this.getCollection()).documents().delete(id)
}
return
}

const after = this.config.convert(afterData)

// update if previous data doesn't exist, didn't match, or if the converted data changed
if (
!beforeData ||
!this.passesFilter(beforeData) ||
!isEqual(this.config.convert(beforeData), after)
) {
await (await this.getCollection()).documents().upsert(after)
}
}

Expand Down Expand Up @@ -107,7 +127,9 @@ export class SearchIndexer {

const docs = batch.reduce((acc, d) => {
try {
const doc = convert(d.data())
const data = d.data()
if (!this.passesFilter(data)) return acc
const doc = convert(data)
acc.push(doc)
} catch (error: any) {
console.error(`Failed to convert document: ${error.message}`)
Expand Down
1 change: 1 addition & 0 deletions functions/src/search/config.ts
Original file line number Diff line number Diff line change
Expand Up @@ -11,6 +11,7 @@ export type CollectionConfig<T extends BaseRecord = BaseRecord> = {
readonly documentTrigger: string
readonly idField: string
readonly convert: (data: DocumentData) => T
readonly filter?: (data: DocumentData) => boolean
Copy link
Collaborator Author

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

documentTrigger /events/{eventId} is more than just hearings, so we add a filter to the search indexer config so we only process hearing events.

}

const registered: CollectionConfig[] = []
Expand Down