In [1]:
!which python

/Users/jensen/Developer/better-sg/SchemesSG_v3/dataset_worfklow/.venv/bin/python


In [2]:
import os

import firebase_admin
from firebase_admin import credentials
from firebase_admin import firestore
import pandas as pd

In [3]:
# connect to firestore
cred = credentials.Certificate("../creds.json")
app = firebase_admin.initialize_app(cred)
db = firestore.client()

# Helper Functions

In [4]:
def search_and_display_results(query, top_k=10):
    """
        Search for schemes using semantic similarity and display
    results with similarity scores.

        Args:
            query (str): The search query
            top_k (int): Number of top results to return (default: 10)

        Returns:
            pd.DataFrame: DataFrame with scheme information and
    similarity scores
    """
    # Perform semantic search
    result = collection.query(query_texts=[query], n_results=top_k)

    # Create a tuple of IDs and distances from the search results
    ids = result["ids"][0]
    distances = result["distances"][0]

    # Create tuples and convert to DataFrame
    id_distance_tuples = list(zip(ids, distances))
    search_results_df = pd.DataFrame(
        id_distance_tuples, columns=["scheme_id", "similarity_distance"]
    )

    # Merge with schemes_df to get full information
    results_with_schemes = search_results_df.merge(
        schemes_df[["scheme_id", "scheme", "agency", "description"]],
        on="scheme_id",
        how="left",
    )

    # Sort by similarity score (highest first)
    results_with_schemes = results_with_schemes.sort_values(
        "similarity_distance", ascending=True
    )

    return results_with_schemes[
        ["scheme_id", "scheme", "agency", "description", "similarity_distance"]
    ]

def build_desc_booster(row):
    """
    Build a concatenated description string from selected non-null columns of a DataFrame row.

    The function checks for the presence of values in the following columns (in order):
    - 'scheme'
    - 'agency'
    - 'description'
    - 'search_booster'
    - "who_is_it_for"
    - 'what_it_gives'
    - 'scheme_type'

    Any non-null values are converted to strings and appended into a list, which is then joined
    into a single space-separated string.

    Args:
        row (pandas.Series): A row from a pandas DataFrame containing the expected columns.

    Returns:
        str: A space-separated string of all non-null column values.
    """
    components = []

    # Check each column and add non-null values
    if pd.notna(row['scheme']):
        components.append(str(row['scheme']))
    if pd.notna(row['agency']):
        components.append(str(row['agency']))
    if pd.notna(row['description']):
        components.append(str(row['description']))
    if pd.notna(row['search_booster']):
        components.append(str(row['search_booster']))
    if pd.notna(row["who_is_it_for"]):
        components.append(str(row["who_is_it_for"]))
    if pd.notna(row['what_it_gives']):
        components.append(str(row['what_it_gives']))
    if pd.notna(row['scheme_type']):
        components.append(str(row['scheme_type']))

    # Join all non-null components with spaces
    return ' '.join(components)


In [5]:
# get all existing documents from firestore
docs = db.collection("schemes").stream()
schemes_df = pd.DataFrame([{**scheme.to_dict(), "scheme_id": scheme.id} for scheme in docs])

In [6]:
schemes_df.head()

Unnamed: 0,phone,eligibility,address,summary,llm_description,scraped_text,planning_area,service_area,scheme_type,search_booster,description,what_it_gives,email,agency,scheme,link,image,who_is_it_for,how_to_apply,scheme_id
0,,,,,,Information & Advice\nInformation & Advice\nPe...,No Location,,,,Persons interested in seeking information rega...,,,PAVE,Information On Family Violence and Application...,https://pave.org.sg/our-services/#single-1,https://pave.org.sg/wp-content/uploads/2023/08...,,,00uFr8EP5kJsqgh7G33h
1,,,,,,ERROR: HTTP Error: 404,No Location,,,,The Ministry of Health provides subsidies for ...,,,Ministry of Health (MOH),Subsidies for Government-Funded Intermediate a...,https://www.moh.gov.sg/seeking-healthcare/find...,https://chidnast.sirv.com/SchemesSG/MOH.jpg,,,01hIjKbB93gr09lMfey7
2,,,,,,Programmes & Services – Ainsociety\nOUR PROGRA...,No Location,,,,Our financial assistance schemes help to allev...,,,Ain Society,Financial Assistance,https://ainsociety.org.sg/programmes-services/,https://chidnast.sirv.com/SchemesSG/ain.jpg,,,0AzNhubZMY9kejE9L2vl
3,,- Resident of South West District\n- Applicant...,,One-time $800 aid for South West caregivers.,This scheme provides a one-time $800 assistanc...,"interim one-time assistance of $800, to help c...",No Location,South West District,"Caregiver Support, Financial Assistance, Low I...","caregiver burden, daily needs financial assist...",The South West Caregiver Support Fund is an in...,Financial assistance (general),South_West_Assistance@pa.gov.sg,South West District CDC,South West Caregiver Support Fund,https://southwest.cdc.gov.sg/what-we-do/for-ca...,https://chidnast.sirv.com/SchemesSG/swcdc.jpg,"Caregivers, Low income families",Visit your nearest Community Club/Centre in th...,0IfTwD0f8BbxmpxapZQ2
4,,,,Compassionate Community movement for palliativ...,The Compassionate Community movement in pallia...,The Compassionate Community movement in pallia...,No Location,No Service Boundaries,"End-of-Life/Palliative Care,Community Funding","end of life care,community project funding,ber...",An umbrella body representing organisations th...,"End-of-life care,Support groups,Community proj...",,Singapore Hospice Council,Singapore Hospice Council,https://singaporehospice.org.sg/,https://chidnast.sirv.com/SchemesSG/shc.jpg,"Facing end of life,General public",,0NNEdNcQOpP8J2Y7ogsI


In [7]:
schemes_df.loc[4]

phone                                                           None
eligibility                                                     None
address                                                         None
summary            Compassionate Community movement for palliativ...
llm_description    The Compassionate Community movement in pallia...
scraped_text       The Compassionate Community movement in pallia...
planning_area                                            No Location
service_area                                   No Service Boundaries
scheme_type            End-of-Life/Palliative Care,Community Funding
search_booster     end of life care,community project funding,ber...
description        An umbrella body representing organisations th...
what_it_gives      End-of-life care,Support groups,Community proj...
email                                                           None
agency                                     Singapore Hospice Council
scheme                            

In [8]:
# check if there are any empty descriptions or empty string
empty_descriptions = schemes_df[(schemes_df["description"].isna()) | (schemes_df['description'] == "")]
print(f'Number of empty descriptions: {len(empty_descriptions)}')

Number of empty descriptions: 5


In [9]:
# dropping those empty descriptions for now
empty_description_schemes = schemes_df[(schemes_df["description"].isna()) | (schemes_df['description'] == "")]
print(empty_description_schemes[["scheme_id", "scheme"]])

# filtering out those empty schemes for now
schemes_df = schemes_df[~((schemes_df["description"].isna()) | (schemes_df['description'] == ""))]
schemes_df.shape

                scheme_id                                             scheme
69   BcXpy7bOUjDyOrkB3WmU                    Care Corner Student Care Centre
157  Q49szphEOJPmsqT2DXP5                 Care Corner Family Service Centres
171  S6easrpcSTJOmXhCvG9F  Appropriate Adult Scheme for Young Suspects (A...
341  r0cZr6LdA4Ha2abPr4aG                      Care Corner Families For Life
396  yZtyMYNs7xsVu4ilHOmM  Care Corner’s Learning and Special Needs Inter...


(400, 20)

# Define Vectorstore

In [10]:
import chromadb
from chromadb.utils.embedding_functions import OpenAIEmbeddingFunction

In [11]:
client = chromadb.PersistentClient(path="./chroma")

In [12]:
collection = client.get_or_create_collection(
    name="schemes_collection",
    embedding_function=OpenAIEmbeddingFunction(
        api_key=os.getenv("OPENAI_API_KEY"),
        model_name="text-embedding-3-small"
    ),
    configuration={"hnsw": {"space": "cosine"}}
)

# Baseline: Generate Document Embeddings

In this section, we will generate document embeddings for the schemes in the database.

The objective of this section is to just create a baseline with pure document embeddings, without any additional features or modifications.


In [13]:
scheme_ids = schemes_df["scheme_id"].values.tolist()
descriptions = schemes_df["description"].values.tolist()

In [14]:
collection.add(
    ids=scheme_ids,
    documents=descriptions
)

## Test query: Baseline


In [15]:
user_query = "I am pregnant teen suffering from depression and family abuse"
results = search_and_display_results(user_query, top_k=10)

In [16]:
results

Unnamed: 0,scheme_id,scheme,agency,description,similarity_distance
0,tvAS8wXjoyGDECZ26hrO,Casework,Serangoon Moral Family Service Centre,Casework management for those facing financial...,0.599593
1,lYdVGUhRZWRv0J9znzAJ,HCSA Dayspring Residential Treatment Centre,HCSA Community Services,Therapeutic Group Home service model for teena...,0.618966
2,Q4TFBLQp9llg9iLGzsjM,Various services,Big Love Child Protection Specialist Centre,Casework management; child protection; home-ba...,0.627686
3,arLWZDYgtpuPEkc1oABi,Project Athena,Singapore Indian Development Association (SINDA),Project Athena empowers single Indian mothers ...,0.631473
4,uSUMLDFP4g8fw8IiKPZB,Safe Place,Lakeside Family Services,"Safe Place provides timely, non-judgmental and...",0.63154
5,LZw0ROOAWW55iMB1Y36U,Various services,Clarity Singapore Limited,Provides rehabilitation and emotional support ...,0.632879
6,arquJpPvLNWA17FoQyWj,swiTCH-UP!,Care Corner Singapore,"Adolescence is a time of critical transition, ...",0.637325
7,kDWqW9oRJ1Fn2Kucoydo,Services for individuals,NuLife Care & Counselling Services,"Suicide intervention, emotional support, mid-c...",0.642161
8,xTIZtGulw0qT2mlbde47,Pertapis Centre for Women and Girls,Pertapis,Provides residential care for young women vict...,0.652988
9,Ex9uSGi0HnPPdDr4QPud,Counselling,En Community Services Society,"As part of the multi-disciplinary team, we aim...",0.653164


In [17]:
for res in results.itertuples(index=False):
    print(f"Scheme: {res.scheme}")
    print(f"Agency: {res.agency}")
    print(f"Description: {res.description}\n")


Scheme: Casework
Agency: Serangoon Moral Family Service Centre
Description: Casework management for those facing financial difficulties, marital and couple issues, parenting and family issues, or have mental health needs

Scheme: HCSA Dayspring Residential Treatment Centre
Agency: HCSA Community Services
Description: Therapeutic Group Home service model for teenage girls who have suffered the complex trauma of physical, sexual or emotional abuse. The model consists of two evidence-based practices namely Trauma Systems Therapy (TST) and Residential Management System (RMS). Our residential care is designed to be like a family, and each resident will be matched with a care team upon admission, comprising a clinical psychologist, a case worker and youth mentor. Our residents have regular counselling sessions to help them develop an individualised treatment plan, and their parents or guardians also have mandatory family sessions to meet family treatment goals.

Scheme: Various services
Agen

# Experiment 1: Description Booster

This section, we re-implement the description booster from initial indexing setup to see how it affects the search results.


In [19]:
client.delete_collection("schemes_collection")

In [20]:
collection = client.get_or_create_collection(
    name="schemes_collection",
    embedding_function=OpenAIEmbeddingFunction(
        api_key=os.getenv("OPENAI_API_KEY"),
        model_name="text-embedding-3-small"
    ),
    configuration={"hnsw": {"space": "cosine"}}
)

In [21]:
schemes_df["description_booster"] = schemes_df.apply(build_desc_booster, axis=1)

In [22]:
print("Sample description booster value:")
print(schemes_df["description_booster"].iloc[5])

Sample description booster value:
Compassion Fund Compassion Fund Ltd Assist students whose families are in crisis, meaning the breadwinner inflicted by illness/met an accident that resulted in a loss of income, and/or death. financial assistance, daily needs financial assistance, urgent financial help, loss of breadwinner, family bereavement, crisis support, low income, family support, comcare, financial hardship Families, Low income families, Facing financial hardship Financial assistance (general), Financial assistance for daily living expenses Financial Assistance, Family, Low Income


In [23]:
scheme_ids = schemes_df["scheme_id"].values.tolist()
description_boosters = schemes_df["description_booster"].values.tolist()


In [24]:
collection.add(
    ids=scheme_ids,
    documents=description_boosters,
)

## Test query: Description Booster

In [25]:
user_query = "I am pregnant teen suffering from depression and family abuse"
results = search_and_display_results(user_query, top_k=10)

In [26]:
results

Unnamed: 0,scheme_id,scheme,agency,description,similarity_distance
0,bb7tTlzWsVhmVxJwDo5l,Pregnancy Crisis and Support,Pregnancy Crisis and Support,"For emotional support, guidance, help and refe...",0.492948
1,l8CmX6ZKXxQi1V8nFDZ4,Babes - A Helping Hand for Pregnant Teens,Babes - A Helping Hand for Pregnant Teens,Staff will discuss the various options availab...,0.507965
2,lYdVGUhRZWRv0J9znzAJ,HCSA Dayspring Residential Treatment Centre,HCSA Community Services,Therapeutic Group Home service model for teena...,0.60824
3,uSUMLDFP4g8fw8IiKPZB,Safe Place,Lakeside Family Services,"Safe Place provides timely, non-judgmental and...",0.608714
4,NT7TklPzpmwQR86kyKV2,Information on Abortion,Association of Women for Action and Research (...,If you are thinking about ending an unwanted p...,0.635671
5,bBGZTpmeqTElDANYxZRl,HIV+ Pregnant Mothers' Fund,Action for AIDS (AFA),"To eliminate mother-to-child HIV transmission,...",0.646851
6,LfvRlIiX4ojxnn3fmiwP,SAFE Programme (Stop Abuse in Families),Trans Family Services,TRANS SAFE Centre is a Family Violence Special...,0.647422
7,rCWgF4B65MCBsvt7JHSI,Transnational Family Support Centre (Project F...,Fei Yue Community Services,Project FAMILY works together with community p...,0.647929
8,bVT2Ce8BDafBuF3soDi6,Young Marriages,INSPIRASI PPIS,PPIS serves Malay/Muslim clients who face mari...,0.65262
9,xh2aWlu7L7mcZEFhJ3KM,Divorce Transition Support,Care Corner Singapore,"Divorce can be a difficult process, resulting ...",0.653156


In [27]:
for res in results.itertuples(index=False):
    print(f"Scheme: {res.scheme}")
    print(f"Agency: {res.agency}")
    print(f"Description: {res.description}\n")

Scheme: Pregnancy Crisis and Support
Agency: Pregnancy Crisis and Support
Description: For emotional support, guidance, help and referrals that could facilitate a decision, whatever that decision may be.

Scheme: Babes - A Helping Hand for Pregnant Teens
Agency: Babes - A Helping Hand for Pregnant Teens
Description: Staff will discuss the various options available in order for client to make an informed decision. The caseworker will journey with the girl to ensure that she is well supported by her family, friends and wider community in her decision and is able to carry out her decision as best as possible. She will also be linked to formal and informal community resources.

Scheme: HCSA Dayspring Residential Treatment Centre
Agency: HCSA Community Services
Description: Therapeutic Group Home service model for teenage girls who have suffered the complex trauma of physical, sexual or emotional abuse. The model consists of two evidence-based practices namely Trauma Systems Therapy (TST) a

# Experiment 2: BM25 

This section, we will explore the performance with an additional BM25 component. We will use simple balance weights for vector and BM25 scores.

In [52]:
from langchain.docstore.document import Document
from langchain_community.retrievers import BM25Retriever

In [55]:
bm25_docs = [Document(page_content=d, metadata={"id": i}) for i, d in zip(scheme_ids, description_boosters)]
bm25_retriever = BM25Retriever.from_documents(bm25_docs)

In [70]:
# Perform semantic search
# top_k * 2 to account for non-overlaps from vector + bm25
vec_result = collection.query(query_texts=[user_query], n_results=10 * 2)

# Create a tuple of IDs and distances from the search results
vec_ids = vec_result["ids"][0]
vec_distances = vec_result["distances"][0]
vec_distances = [max(0, 1 - dist) for dist in vec_distances]
min_val = min(vec_distances)
max_val = max(vec_distances)
if max_val > min_val:
    vec_normalized_scores = [(x - min_val) / (max_val - min_val) for x in vec_distances]
else:
    # edge case: all values are the same
    vec_normalized_scores = [1.0 for _ in vec_distances]

# Create tuples and convert to DataFrame
vec_id_distance_tuples = list(zip(vec_ids, vec_normalized_scores))
vec_search_results_df = pd.DataFrame(
    vec_id_distance_tuples, columns=["scheme_id", "vec_similarity_distance"]
)

bm25_retriever.k = len(description_boosters)
bm25_result = bm25_retriever.invoke(user_query.lower())
bm25_id_score_tuples = []
for i, doc in enumerate(bm25_result):
    bm25_score = 1.0 - (i / len(bm25_result))
    bm25_id_score_tuples.append([doc.metadata["id"], bm25_score])

bm25_search_results_df = pd.DataFrame(
    bm25_id_score_tuples, columns=["scheme_id", "bm25_score"]
)

# Merge with schemes_df to get full information
results_with_schemes = (
    vec_search_results_df
    .merge(
        schemes_df[["scheme_id", "scheme", "agency", "description"]],
        on="scheme_id",
        how="left",
    )
    .merge(
        bm25_search_results_df,                  # add BM25 results
        on="scheme_id",           # or use left_on/right_on if column names differ
        how="left",
    )
)

results_with_schemes["combined_scores"] = (
    results_with_schemes["vec_similarity_distance"] * 0.5 + results_with_schemes["bm25_score"] * 0.5
)

# Sort by similarity score (highest first)
results_with_schemes = results_with_schemes.sort_values(
    "combined_scores", ascending=False
)

results = results_with_schemes[
    ["scheme_id", "scheme", "agency", "description", "combined_scores"]
].head(10).reset_index(drop=True)


In [71]:
results

Unnamed: 0,scheme_id,scheme,agency,description,combined_scores
0,l8CmX6ZKXxQi1V8nFDZ4,Babes - A Helping Hand for Pregnant Teens,Babes - A Helping Hand for Pregnant Teens,Staff will discuss the various options availab...,0.732189
1,bb7tTlzWsVhmVxJwDo5l,Pregnancy Crisis and Support,Pregnancy Crisis and Support,"For emotional support, guidance, help and refe...",0.67875
2,lYdVGUhRZWRv0J9znzAJ,HCSA Dayspring Residential Treatment Centre,HCSA Community Services,Therapeutic Group Home service model for teena...,0.668816
3,uSUMLDFP4g8fw8IiKPZB,Safe Place,Lakeside Family Services,"Safe Place provides timely, non-judgmental and...",0.666214
4,LfvRlIiX4ojxnn3fmiwP,SAFE Programme (Stop Abuse in Families),Trans Family Services,TRANS SAFE Centre is a Family Violence Special...,0.554614
5,bVT2Ce8BDafBuF3soDi6,Young Marriages,INSPIRASI PPIS,PPIS serves Malay/Muslim clients who face mari...,0.512292
6,95vRCYE0v2rkS3tolyzy,Care Corner Youth Rehabilitation,Care Corner Singapore,Transitions during the adolescent phase can be...,0.480126
7,u3DWJt5hXjR03US6HFeA,Family Violence Protection,Care Corner Singapore,Family violence is not only a family problem b...,0.4775
8,tvAS8wXjoyGDECZ26hrO,Casework,Serangoon Moral Family Service Centre,Casework management for those facing financial...,0.474797
9,T0AqOKPg5OwEArDPvLcQ,Trauma Access,Touch Community Services,"We offer a safe, supervised environment where ...",0.456346


In [72]:
for res in results.itertuples(index=False):
    print(f"Scheme: {res.scheme}")
    print(f"Agency: {res.agency}")
    print(f"Description: {res.description}\n")

Scheme: Babes - A Helping Hand for Pregnant Teens
Agency: Babes - A Helping Hand for Pregnant Teens
Description: Staff will discuss the various options available in order for client to make an informed decision. The caseworker will journey with the girl to ensure that she is well supported by her family, friends and wider community in her decision and is able to carry out her decision as best as possible. She will also be linked to formal and informal community resources.

Scheme: Pregnancy Crisis and Support
Agency: Pregnancy Crisis and Support
Description: For emotional support, guidance, help and referrals that could facilitate a decision, whatever that decision may be.

Scheme: HCSA Dayspring Residential Treatment Centre
Agency: HCSA Community Services
Description: Therapeutic Group Home service model for teenage girls who have suffered the complex trauma of physical, sexual or emotional abuse. The model consists of two evidence-based practices namely Trauma Systems Therapy (TST) a

# Experiment 3: Metadata Filtering

In [91]:
from enum import Enum
from pydantic import BaseModel, Field
from openai import OpenAI
from typing import List

In [86]:
class TargetAudience(Enum):
    # Individuals & Families
    GENERAL_PUBLIC = "general_public"
    FAMILIES = "families"
    CHILDREN = "children"
    YOUTH = "youth"
    YOUTH_AT_RISK = "youth_at_risk"
    YOUNG_ADULTS = "young_adults"
    STUDENTS = "students"
    PARENTS = "parents"
    GRANDPARENTS = "grandparents"
    CAREGIVERS = "caregivers"
    COUPLES = "couples"
    MARRIED_COUPLES = "married_couples"
    SOON_TO_WED_COUPLES = "soon_to_wed_couples"
    DIVORCING_FAMILIES = "divorcing_families"
    SINGLE_PARENTS = "single_parents"

    # Elderly & Special Needs
    ELDERLY = "elderly"
    ELDERLY_WITH_DEMENTIA = "elderly_with_dementia"
    ELDERLY_WITH_DISABILITIES = "elderly_with_disabilities"
    ELDERLY_WITH_MOBILITY_ISSUES = "elderly_with_mobility_issues"
    ELDERLY_DISABLED = "elderly_disabled"
    PERSONS_WITH_DISABILITIES = "persons_with_disabilities"
    PERSONS_WITH_INTELLECTUAL_DISABILITIES = "persons_with_intellectual_disabilities"
    PERSONS_WITH_SPECIAL_NEEDS = "persons_with_special_needs"
    PERSONS_WITH_DEMENTIA = "persons_with_dementia"

    # Vulnerable Groups
    LOW_INCOME = "low_income"
    LOW_INCOME_FAMILIES = "low_income_families"
    LOW_INCOME_ELDERLY = "low_income_elderly"
    HOMELESS = "homeless"
    NEED_HOUSING_SHELTER = "need_housing_shelter"
    NEED_FOOD_SUPPORT = "need_food_support"
    NEED_MORTGAGE_SUPPORT = "need_mortgage_support"
    LOSS_OF_BREADWINNER = "loss_of_breadwinner"

    # Health & Mental Wellbeing
    FACING_END_OF_LIFE = "facing_end_of_life"
    PERSONS_WITH_CHRONIC_ILLNESSES = "persons_with_chronic_illnesses"
    PERSONS_WITH_MENTAL_HEALTH_ISSUES = "persons_with_mental_health_issues"
    INDIVIDUALS_FACING_MENTAL_CHALLENGES = "individuals_facing_mental_challenges"
    INDIVIDUALS_STRUGGLING_WITH_LOSS = "individuals_struggling_with_loss"
    INDIVIDUALS_STRUGGLING_WITH_SUICIDAL_THOUGHTS = "individuals_struggling_with_suicidal_thoughts"
    INDIVIDUALS_BEREAVED_BY_SUICIDE = "individuals_bereaved_by_suicide"

    # Safety & Protection
    VICTIMS_OF_ABUSE = "victims_of_abuse"
    VICTIMS_OF_ABUSE_OR_HARASSMENT = "victims_of_abuse_or_harassment"
    WITNESSES_OF_ABUSE = "witnesses_of_abuse"
    PERSONS_FACING_VIOLENCE_OR_ABUSE = "persons_facing_violence_or_abuse"
    PERPETRATORS_OF_DOMESTIC_VIOLENCE = "perpetrators_of_domestic_violence"
    PERSONS_WHO_ABUSE = "persons_who_abuse"

    # Employment & Economic
    UNEMPLOYED = "unemployed"
    JOB_SEEKERS = "job_seekers"
    RETRENCHED = "retrenched"
    EMPLOYERS = "employers"
    PROFESSIONALS = "professionals"
    SOCIAL_SERVICE_PROFESSIONALS = "social_service_professionals"
    SOCIAL_SERVICE_AGENCIES = "social_service_agencies"

    # Communities & Identity
    INDIAN_COMMUNITY = "indian_community"
    MALAY_MUSLIM_COMMUNITY = "malay_muslim_community"
    MUSLIM_COMMUNITY = "muslim_community"
    CHINESE_COMMUNITY = "chinese_community"
    EURASIANS = "eurasians"
    TRANSGENDER_WOMEN = "transgender_women"
    TRANSACTIONAL_FAMILIES = "transnational_families"
    FOREIGN_SPOUSES = "foreign_spouses"

    # Migrant & Foreign Workers
    MIGRANT_WORKERS = "migrant_workers"
    MIGRANT_WORKERS_CMP = "migrant_workers_cmp"
    FOREIGN_DOMESTIC_WORKERS = "foreign_domestic_workers"
    MIGRANT_DOMESTIC_WORKERS = "migrant_domestic_workers"
    SPECIAL_PASS_HOLDERS = "special_pass_holders"
    FAMILIES_OF_INMATES = "families_of_inmates"
    INMATES = "inmates"
    EX_OFFENDERS = "ex_offenders"

    # Addiction & Recovery
    PERSONS_WITH_ADDICTIONS = "persons_with_addictions"
    DRUG_ADDICTION = "drug_addiction"
    GAMBLING_ADDICTION = "gambling_addiction"
    SPENDING_ADDICTION = "spending_addiction"
    SEX_ADDICTION = "sex_addiction"
    PORNOGRAPHY_ADDICTION = "pornography_addiction"
    LOVE_ADDICTION = "love_addiction"

    # Education & Institutions
    EDUCATORS = "educators"
    SCHOOLS = "schools"

    # Miscellaneous
    FRIENDS = "friends"
    VOLUNTEERS = "volunteers"
    AGENCIES_AFFECTED_BY_SUICIDE = "agencies_affected_by_suicide"
    PERSONS_IN_CRISIS = "persons_in_crisis"

In [117]:
class SchemeAudience(BaseModel):
    target: List[TargetAudience] = Field(description="List of potential targets that can be a benenfiacary of the scheme.")


In [118]:
client = OpenAI(api_key=os.getenv("OPENAI_API_KEY"))

In [126]:
example_scheme = schemes_df.loc[
    schemes_df["scheme"] == "Safe Place"
].iloc[0]
print(example_scheme)


phone                                                          6265-6522
eligibility            All mothers with unsupported pregnancies, rega...
address                21 Yung Ho Road #03-01, The Agape, Singapore 6...
summary                  Support for women with unsupported pregnancies.
llm_description        Safe Place by Lakeside empowers women and fami...
scraped_text           ERROR: Unexpected Scraping Error: name 'HEADER...
planning_area                                                JURONG WEST
service_area                                       No Service Boundaries
scheme_type            Women, Single Parents, Family, Housing/Shelter...
search_booster         pregnant individuals in distress, unsupported ...
description            Safe Place provides timely, non-judgmental and...
what_it_gives          Casework, Counselling, Emotional care, Referra...
email                                              lfstj@lakeside.org.sg
agency                                          Lak

In [127]:
target_audience_matching_prompt = f"""
<role>
You are an expert target audience extractor for financial/social schemes.
</role>

<tasks>
Given a information for a financial/social scheme, extract a list of target audience enums.
</tasks>

<scheme_name>
{example_scheme.scheme}
</scheme_name>

<scheme_eligibility>
{example_scheme.eligibility}
</scheme_eligibility>

<scheme_description>
{example_scheme.description}
</scheme_description>

<who_is_it_for>
{example_scheme.who_is_it_for}
</who_is_it_for>
"""

print(target_audience_matching_prompt)



<role>
You are an expert target audience extractor for financial/social schemes.
</role>

<tasks>
Given a information for a financial/social scheme, extract a list of target audience enums.
</tasks>

<scheme_name>
Safe Place
</scheme_name>

<scheme_eligibility>
All mothers with unsupported pregnancies, regardless of marital status, age, income, race, or religion. Eligibility for the Baby Safe scheme may apply to mothers in need of baby essentials.
</scheme_eligibility>

<scheme_description>
Safe Place provides timely, non-judgmental and holistic help to women facing unsupported pregnancies. Through our caring staff and volunteers, every woman will receive individual and customised help for her journey through pregnancy and beyond, regardless of age, race, religion or marital status.
</scheme_description>

<who_is_it_for>
Women, Pregnant individuals in distress, Single parents, Families
</who_is_it_for>



In [133]:
response = client.beta.chat.completions.parse(
    model="gpt-4o",
    messages=[
        {"role": "system", "content": "Given the following financial/social scheme, extract a list of target audience enums"},
        {"role": "user", "content": target_audience_matching_prompt},
    ],
    response_format=SchemeAudience
)


In [135]:
target = SchemeAudience.model_validate_json(response.choices[0].message.content)
print(target.target)

[<TargetAudience.GENERAL_PUBLIC: 'general_public'>, <TargetAudience.SINGLE_PARENTS: 'single_parents'>, <TargetAudience.FAMILIES: 'families'>, <TargetAudience.PERSONS_IN_CRISIS: 'persons_in_crisis'>]


In [136]:
example_scheme_2 = schemes_df.loc[
    schemes_df["scheme"] == "Family Violence Protection"
].iloc[0]
print(example_scheme_2)

phone                                  [6476 1482, 6250 6813, 6353 1180]
eligibility            Victims of family violence, individuals using ...
address                [7A Commonwealth Avenue #01-672 Singapore 1410...
summary                Integrated support for those affected by famil...
llm_description        Care Corner Project StART provides integrated ...
scraped_text           ERROR: Unexpected Scraping Error: name 'HEADER...
planning_area                         [QUEENSTOWN, WOODLANDS, TOA PAYOH]
service_area                                       No Service Boundaries
scheme_type            Abuse/Family Violence, Protection from Violenc...
search_booster         family violence, domestic violence, protection...
description            Family violence is not only a family problem b...
what_it_gives          Counselling, Casework, Emotional care, Helplin...
email                  [projectstart@carecorner.org.sg, ccs@carecorne...
agency                                             

In [138]:
target_audience_matching_prompt_2 = f"""
<role>
You are an expert target audience extractor for financial/social schemes.
</role>

<tasks>
Given a information for a financial/social scheme, extract a list of target audience enums.
</tasks>

<scheme_name>
{example_scheme_2.scheme}
</scheme_name>

<scheme_eligibility>
{example_scheme_2.eligibility}
</scheme_eligibility>

<scheme_description>
{example_scheme_2.description}
</scheme_description>

<who_is_it_for>
{example_scheme_2.who_is_it_for}
</who_is_it_for>
"""

print(target_audience_matching_prompt_2)


<role>
You are an expert target audience extractor for financial/social schemes.
</role>

<tasks>
Given a information for a financial/social scheme, extract a list of target audience enums.
</tasks>

<scheme_name>
Family Violence Protection
</scheme_name>

<scheme_eligibility>
Victims of family violence, individuals using violence, and vulnerable family members (including children and elderly) who have witnessed abuse. PPO applicants must be family members (spouse, ex-spouse, child, parent, parent-in-law, sibling, or other relatives as defined by the court). Applicants under 21 require a parent, guardian, or appointed social worker to apply on their behalf.
</scheme_eligibility>

<scheme_description>
Family violence is not only a family problem but a societal issue. As a Protection Specialist Centre, Care Corner Project StART provides integrated services for individuals and families affected by the issues of family violence.**By promoting safe interactions** and **healthy relationship

In [139]:
response_2 = client.beta.chat.completions.parse(
    model="gpt-4o",
    messages=[
        {"role": "system", "content": "Given the following financial/social scheme, extract a list of target audience enums"},
        {"role": "user", "content": target_audience_matching_prompt_2},
    ],
    response_format=SchemeAudience
)


In [140]:
target_2 = SchemeAudience.model_validate_json(response_2.choices[0].message.content)
print(target_2.target)

[<TargetAudience.VICTIMS_OF_ABUSE_OR_HARASSMENT: 'victims_of_abuse_or_harassment'>, <TargetAudience.FAMILIES: 'families'>, <TargetAudience.CHILDREN: 'children'>, <TargetAudience.ELDERLY: 'elderly'>, <TargetAudience.INDIVIDUALS_STRUGGLING_WITH_LOSS: 'individuals_struggling_with_loss'>, <TargetAudience.WITNESSES_OF_ABUSE: 'witnesses_of_abuse'>, <TargetAudience.PERSONS_FACING_VIOLENCE_OR_ABUSE: 'persons_facing_violence_or_abuse'>, <TargetAudience.VICTIMS_OF_ABUSE: 'victims_of_abuse'>, <TargetAudience.VICTIMS_OF_ABUSE_OR_HARASSMENT: 'victims_of_abuse_or_harassment'>]
