In [1]:
from dotenv import load_dotenv; load_dotenv()
from humaidclf import (
    load_tsv, plan_run_dirs, LABELS, SYSTEM_PROMPT, make_user_message,
    sync_test_sample, build_requests_jsonl_S,
    upload_file_for_batch, create_batch, wait_for_batch, download_file_content,
    parse_outputs_S_to_df, macro_f1, analyze_and_export_mistakes
)

# Define ZERO-SHOT rules (try multiple strings to A/B)
RULES_DF = """
- caution_and_advice: Reports of warnings issued or lifted, guidance and tips related to the disaster.
- sympathy_and_support: Tweets with prayers, thoughts, and emotional support.
- requests_or_urgent_needs: Reports of urgent needs or supplies such as food, water, clothing, money,...
- displaced_people_and_evacuations: People who have relocated due to the crisis, even for a short time...
- injured_or_dead_people: Reports of injured or dead people due to the disaster.
- missing_or_found_people: Reports of missing or found people due to the disaster.
- infrastructure_and_utility_damage: Reports of any type of damage to infrastructure such as buildings, houses,...
- rescue_volunteering_or_donation_effort: Reports of any type of rescue, volunteering, or donation efforts...
- other_relevant_information: on-topic but none of the above
- not_humanitarian: If the tweet does not convey humanitarian aid-related information.
"""

RULES_1 = """
- caution_and_advice: Reports of warnings issued or lifted, guidance and tips related to the disaster.
- sympathy_and_support: Tweets with prayers, thoughts, and emotional support.
- requests_or_urgent_needs: Reports of urgent needs or supplies such as food, water, clothing, money,...
- displaced_people_and_evacuations: People who have relocated due to the crisis, even for a short time...
- injured_or_dead_people: Reports of injured or dead people due to the disaster.
- missing_or_found_people: Reports of missing or found people due to the disaster.
- infrastructure_and_utility_damage: Reports of any type of damage to infrastructure such as buildings, houses,...
- rescue_volunteering_or_donation_effort: Reports of any type of rescue, volunteering, or donation efforts...
- other_relevant_information: on-topic but none of the above
- not_humanitarian: If the tweet does not convey humanitarian aid-related information.
"""

RULES_2 = """
- requests_or_urgent_needs: ASKING for help/supplies/services (need/please help/send/urgent/SOS). If both ask and offer words appear, ASKING wins.
- rescue_volunteering_or_donation_effort: OFFERING help, organizing rescues, donation drives, fundraisers, volunteering sign-ups.
- caution_and_advice: Warnings, instructions, actionable tips (evacuate/avoid/boil water). If only prayers/solidarity words, do NOT use this.
- displaced_people_and_evacuations: Evacuation orders, relocations, sheltering, families displaced.
- injured_or_dead_people: Injuries, casualties, fatalities.
- missing_or_found_people: People reported missing OR confirmed found/located/reunited. If not explicit, do NOT use this.
- infrastructure_and_utility_damage: Physical damage or outages to roads, bridges, buildings, power, water, comms, caused by the disaster. If disaster context is unclear, prefer not_humanitarian or other_relevant_information.
- sympathy_and_support: Prayers, thoughts, condolences, “stay strong”, morale support ONLY (no requests, offers, warnings).
- other_relevant_information: On-topic situation info that fits none of the above (e.g., event stats, forecasts, timelines) AND is clearly disaster-related.
- not_humanitarian: Unrelated to disasters/aid or unclear/no disaster context.
Important:
Only choose not_humanitarian if the tweet is totally irrelevant to diaster.
"""

RULES_3 = """
- caution_and_advice: Reports of warnings issued or lifted, guidance and tips related to the disaster.
- sympathy_and_support: Tweets with prayers, thoughts, and emotional support.
- requests_or_urgent_needs: Reports of urgent needs or supplies such as food, water, clothing, money,...
- displaced_people_and_evacuations: People who have relocated due to the crisis, even for a short time...
- injured_or_dead_people: Reports of injured or dead people due to the disaster.
- missing_or_found_people: Reports of missing or found people due to the disaster.
- infrastructure_and_utility_damage: Reports of any type of damage to infrastructure such as buildings, houses,...
- rescue_volunteering_or_donation_effort: Reports of any type of rescue, volunteering, or donation efforts...
- other_relevant_information: On-topic info about the disaster (facts, stats, timelines, situational updates...) that fits none of the above.
- not_humanitarian: Use ONLY when the tweet is unrelated to disasters.
"""

RULES_4 = """
Goal: Choose exactly one label that best reflects the tweet’s primary intent or information.

GENERAL PRINCIPLES
- Primary intent = the main speech act or information the author conveys (who is doing what, for whom, right now).
- If parts of a tweet point to multiple classes, pick the class with the strongest, most central signal (imperatives, first-person claims, logistics, explicit status), not simply the first match.
- If signals are truly balanced, pick the class that would be most actionable for responders.

CLASS DEFINITIONS
- caution_and_advice: Warnings/instructions/tips about the disaster (EVACUATE, AVOID, BOIL WATER). NOT prayers/support.
- sympathy_and_support: Prayers/thoughts/condolences/morale support ONLY. If mainly praising volunteers (no call to action), keep here.
- requests_or_urgent_needs: ASKING for help/supplies/services (NEED, PLEASE HELP, URGENT, SOS). IF both ASK and OFFER appear → ASK WINS.
- displaced_people_and_evacuations: Evacuation orders, relocation, sheltering, DISPLACED families/people.
- injured_or_dead_people: Injuries, casualties, fatalities.
- missing_or_found_people: People reported MISSING or FOUND/REUNITED (must be explicit).
- infrastructure_and_utility_damage: Physical damage/outages to roads/bridges/buildings/power/water/comms CAUSED BY THE DISASTER (explicit or strongly implied: due to/after storm/quake/wildfire).
- rescue_volunteering_or_donation_effort: OFFERING help, organizing rescues, donation/fundraiser drives, volunteer sign-ups, distribution events.
- other_relevant_information: On-topic disaster info (facts, stats, timelines, situational updates, official agency/community updates) that matches NONE of the above. Treat as ON-TOPIC if event name/hashtag/location+disaster term or official update is present.
- not_humanitarian: Use ONLY when unrelated to disasters/aid OR disaster context is absent/unclear. If on-topic cues exist but no specific class fits → choose other_relevant_information.

OUTPUT
- Return exactly one label that matches the primary intent.
"""

RULES_100 = """
- requests_or_urgent_needs: ASKING for help/supplies/services (need/please help/send/urgent/SOS). If both ask and offer words appear, ASKING wins.
- rescue_volunteering_or_donation_effort: OFFERING help, organizing rescues, donation drives, fundraisers, volunteering sign-ups.
- caution_and_advice: Warnings, instructions, actionable tips (evacuate/avoid/boil water). If only prayers/solidarity words, do NOT use this.
- displaced_people_and_evacuations: Evacuation orders, relocations, sheltering, families displaced.
- injured_or_dead_people: Injuries, casualties, fatalities.
- missing_or_found_people: People reported missing OR confirmed found/located/reunited. If not explicit, do NOT use this.
- infrastructure_and_utility_damage: Physical damage or outages to roads, bridges, buildings, power, water, comms, caused by the disaster. If disaster context is unclear, prefer not_humanitarian or other_relevant_information.
- sympathy_and_support: Prayers, thoughts, condolences, “stay strong”, morale support ONLY (no requests, offers, warnings).
- other_relevant_information: On-topic situation info that fits none of the above (e.g., event stats, forecasts, timelines) AND is clearly disaster-related.
- not_humanitarian: Unrelated to disasters/aid or unclear/no disaster context.
Tie-breakers:
1) ASKING vs OFFERING → ASKING wins (requests_or_urgent_needs).
2) People vs infrastructure → if injuries/casualties/missing are present, choose the people class.
3) Sympathy vs caution → only actionable verbs → caution_and_advice; otherwise sympathy_and_support.
4) Infra damage needs disaster context; otherwise not_humanitarian.
"""




# california_wildfires_2018 with RULES 1

In [2]:
# 0) Load data
dataset_path = "Dataset/HumAID/california_wildfires_2018/california_wildfires_2018_train.tsv"
df = load_tsv(dataset_path)

# 1) Dry-run zero-shot (Optional)
_ = sync_test_sample(df, n=20, rules=RULES_1, model="gpt-4o-mini", temperature=0.0, seed=42)

# 2) Plan dir + build JSONL
plan = plan_run_dirs(dataset_path, out_root="runs", model="gpt-4o-mini", tag="modeS-RULES1")
build_requests_jsonl_S(df, plan["requests_jsonl"], rules=RULES_1, model=plan["model"], temperature=0.0)

Macro-F1 (tiny sample): 0.419047619047619


WindowsPath('runs/california_wildfires_2018/train/gpt-4o-mini/20251018-192331-modeS-RULES1/requests.jsonl')

In [4]:
# 3) Submit batch and wait
fid  = upload_file_for_batch(str(plan["requests_jsonl"]))
bid  = create_batch(fid, endpoint="/v1/chat/completions", completion_window="24h")
info = wait_for_batch(bid, poll_secs=300)

[batch batch_68f44bae2e888190960ec64456b9c2d2] status = validating
[batch batch_68f44bae2e888190960ec64456b9c2d2] status = in_progress
[batch batch_68f44bae2e888190960ec64456b9c2d2] status = in_progress
[batch batch_68f44bae2e888190960ec64456b9c2d2] status = in_progress
[batch batch_68f44bae2e888190960ec64456b9c2d2] status = finalizing
[batch batch_68f44bae2e888190960ec64456b9c2d2] status = completed


In [5]:
# 4) Download + parse + save
download_file_content(info["output_file_id"], str(plan["outputs_jsonl"]))
preds = parse_outputs_S_to_df(plan["outputs_jsonl"], df)
preds.to_csv(plan["predictions_csv"], index=False)
print("Macro-F1:", macro_f1(preds))

Macro-F1: 0.6164874649237985


In [7]:
# 5) Optional: error analysis
from pathlib import Path
from humaidclf import analyze_and_export_mistakes

# Point to the RUN folder (the one that contains predictions.csv)
base = Path(r"runs/california_wildfires_2018/train/gpt-4o-mini/20251018-192331-modeS-RULES1/")

pred_csv   = base / "predictions.csv"         # classification result
analysis   = base / "analysis"                # analysis subfolder
mistakes   = analysis / "mistakes.csv"        # will be created/overwritten
chartsdir  = analysis / "charts"              # will be created if missing

mistakes_df, summary, per_cls, conf_df = analyze_and_export_mistakes(
    pred_csv_path=pred_csv,
    out_mistakes_csv_path=mistakes,
    charts_dir=chartsdir,
)

summary

{'num_total_with_truth': 5163,
 'num_correct': 3635,
 'num_incorrect': 1528,
 'accuracy': np.float64(0.7040480340887081),
 'macro_f1': 0.6164874649237986,
 'labels': ['caution_and_advice',
  'displaced_people_and_evacuations',
  'infrastructure_and_utility_damage',
  'injured_or_dead_people',
  'missing_or_found_people',
  'not_humanitarian',
  'other_relevant_information',
  'requests_or_urgent_needs',
  'rescue_volunteering_or_donation_effort',
  'sympathy_and_support']}

# california_wildfires_2018 with RULES 2

In [2]:
# 0) Load data
dataset_path = "Dataset/HumAID/california_wildfires_2018/california_wildfires_2018_train.tsv"
df = load_tsv(dataset_path)

# 1) Dry-run zero-shot (Optional)
_ = sync_test_sample(df, n=20, rules=RULES_2, model="gpt-4o-mini", temperature=0.0, seed=42)

# 2) Plan dir + build JSONL
plan = plan_run_dirs(dataset_path, out_root="runs", model="gpt-4o-mini", tag="modeS-RULES2")
build_requests_jsonl_S(df, plan["requests_jsonl"], rules=RULES_2, model=plan["model"], temperature=0.0)

Macro-F1 (tiny sample): 0.36666666666666664


WindowsPath('runs/california_wildfires_2018/train/gpt-4o-mini/20251018-212251-modeS-RULES2/requests.jsonl')

In [3]:
# 3) Submit batch and wait
fid  = upload_file_for_batch(str(plan["requests_jsonl"]))
bid  = create_batch(fid, endpoint="/v1/chat/completions", completion_window="24h")
info = wait_for_batch(bid, poll_secs=300)

[batch batch_68f467c64d0081909ac9eba8c8554269] status = validating
[batch batch_68f467c64d0081909ac9eba8c8554269] status = in_progress
[batch batch_68f467c64d0081909ac9eba8c8554269] status = in_progress
[batch batch_68f467c64d0081909ac9eba8c8554269] status = in_progress
[batch batch_68f467c64d0081909ac9eba8c8554269] status = in_progress
[batch batch_68f467c64d0081909ac9eba8c8554269] status = finalizing
[batch batch_68f467c64d0081909ac9eba8c8554269] status = completed


In [4]:
# 4) Download + parse + save
download_file_content(info["output_file_id"], str(plan["outputs_jsonl"]))
preds = parse_outputs_S_to_df(plan["outputs_jsonl"], df)
preds.to_csv(plan["predictions_csv"], index=False)
print("Macro-F1:", macro_f1(preds))

Macro-F1: 0.6154190540025045


In [5]:
# 5) Optional: error analysis
from pathlib import Path
from humaidclf import analyze_and_export_mistakes

# Point to the RUN folder (the one that contains predictions.csv)
base = Path(r"runs/california_wildfires_2018/train/gpt-4o-mini/20251018-212251-modeS-RULES2/")

pred_csv   = base / "predictions.csv"         # classification result
analysis   = base / "analysis"                # analysis subfolder
mistakes   = analysis / "mistakes.csv"        # will be created/overwritten
chartsdir  = analysis / "charts"              # will be created if missing

mistakes_df, summary, per_cls, conf_df = analyze_and_export_mistakes(
    pred_csv_path=pred_csv,
    out_mistakes_csv_path=mistakes,
    charts_dir=chartsdir,
)

summary

{'num_total_with_truth': 5163,
 'num_correct': 3630,
 'num_incorrect': 1533,
 'accuracy': np.float64(0.7030796048808832),
 'macro_f1': 0.6154190540025046,
 'labels': ['caution_and_advice',
  'displaced_people_and_evacuations',
  'infrastructure_and_utility_damage',
  'injured_or_dead_people',
  'missing_or_found_people',
  'not_humanitarian',
  'other_relevant_information',
  'requests_or_urgent_needs',
  'rescue_volunteering_or_donation_effort',
  'sympathy_and_support']}

# california_wildfires_2018 with RULES 3

In [3]:
# 0) Load data
dataset_path = "Dataset/HumAID/california_wildfires_2018/california_wildfires_2018_train.tsv"
df = load_tsv(dataset_path)

# 1) Dry-run zero-shot (Optional)
_ = sync_test_sample(df, n=20, rules=RULES_3, model="gpt-4o-mini", temperature=0.0, seed=42)

# 2) Plan dir + build JSONL
plan = plan_run_dirs(dataset_path, out_root="runs", model="gpt-4o-mini", tag="modeS-RULES3")
build_requests_jsonl_S(df, plan["requests_jsonl"], rules=RULES_3, model=plan["model"], temperature=0.0)

Macro-F1 (tiny sample): 0.5868893011750155


WindowsPath('runs/california_wildfires_2018/train/gpt-4o-mini/20251018-220846-modeS-RULES3/requests.jsonl')

In [4]:
# 3) Submit batch and wait
fid  = upload_file_for_batch(str(plan["requests_jsonl"]))
bid  = create_batch(fid, endpoint="/v1/chat/completions", completion_window="24h")
info = wait_for_batch(bid, poll_secs=300)

[batch batch_68f47284f4b48190bf65c0b3d88a8453] status = validating
[batch batch_68f47284f4b48190bf65c0b3d88a8453] status = in_progress
[batch batch_68f47284f4b48190bf65c0b3d88a8453] status = in_progress
[batch batch_68f47284f4b48190bf65c0b3d88a8453] status = in_progress
[batch batch_68f47284f4b48190bf65c0b3d88a8453] status = in_progress
[batch batch_68f47284f4b48190bf65c0b3d88a8453] status = finalizing
[batch batch_68f47284f4b48190bf65c0b3d88a8453] status = finalizing
[batch batch_68f47284f4b48190bf65c0b3d88a8453] status = finalizing
[batch batch_68f47284f4b48190bf65c0b3d88a8453] status = completed


In [5]:
# 4) Download + parse + save
download_file_content(info["output_file_id"], str(plan["outputs_jsonl"]))
preds = parse_outputs_S_to_df(plan["outputs_jsonl"], df)
preds.to_csv(plan["predictions_csv"], index=False)
print("Macro-F1:", macro_f1(preds))

Macro-F1: 0.6189517882011742


In [6]:
# 5) Optional: error analysis
from pathlib import Path
from humaidclf import analyze_and_export_mistakes

# Point to the RUN folder (the one that contains predictions.csv)
base = Path(r"runs/california_wildfires_2018/train/gpt-4o-mini/20251018-220846-modeS-RULES3/")

pred_csv   = base / "predictions.csv"         # classification result
analysis   = base / "analysis"                # analysis subfolder
mistakes   = analysis / "mistakes.csv"        # will be created/overwritten
chartsdir  = analysis / "charts"              # will be created if missing

mistakes_df, summary, per_cls, conf_df = analyze_and_export_mistakes(
    pred_csv_path=pred_csv,
    out_mistakes_csv_path=mistakes,
    charts_dir=chartsdir,
)

summary

{'num_total_with_truth': 5163,
 'num_correct': 3635,
 'num_incorrect': 1528,
 'accuracy': np.float64(0.7040480340887081),
 'macro_f1': 0.6189517882011742,
 'labels': ['caution_and_advice',
  'displaced_people_and_evacuations',
  'infrastructure_and_utility_damage',
  'injured_or_dead_people',
  'missing_or_found_people',
  'not_humanitarian',
  'other_relevant_information',
  'requests_or_urgent_needs',
  'rescue_volunteering_or_donation_effort',
  'sympathy_and_support']}

# california_wildfires_2018 with RULES 4

In [2]:
# 0) Load data
dataset_path = "Dataset/HumAID/california_wildfires_2018/california_wildfires_2018_train.tsv"
df = load_tsv(dataset_path)

# 1) Dry-run zero-shot
_ = sync_test_sample(df, n=20, rules=RULES_4, model="gpt-4o-mini", temperature=0.0, seed=42)

# 2) Plan dir + build JSONL
plan = plan_run_dirs(dataset_path, out_root="runs", model="gpt-4o-mini", tag="modeS-RULES4")
build_requests_jsonl_S(df, plan["requests_jsonl"], rules=RULES_4, model=plan["model"], temperature=0.0)

Macro-F1 (tiny sample): 0.4571428571428572


WindowsPath('runs/california_wildfires_2018/train/gpt-4o-mini/20251018-231232-modeS-RULES4/requests.jsonl')

In [3]:
# 3) Submit batch and wait
fid  = upload_file_for_batch(str(plan["requests_jsonl"]))
bid  = create_batch(fid, endpoint="/v1/chat/completions", completion_window="24h")
info = wait_for_batch(bid, poll_secs=300)

# 4) Download + parse + save
download_file_content(info["output_file_id"], str(plan["outputs_jsonl"]))
preds = parse_outputs_S_to_df(plan["outputs_jsonl"], df)
preds.to_csv(plan["predictions_csv"], index=False)
print("Macro-F1:", macro_f1(preds))

[batch batch_68f4815ca2c08190935b02f20318b32a] status = validating
[batch batch_68f4815ca2c08190935b02f20318b32a] status = in_progress
[batch batch_68f4815ca2c08190935b02f20318b32a] status = in_progress
[batch batch_68f4815ca2c08190935b02f20318b32a] status = in_progress
[batch batch_68f4815ca2c08190935b02f20318b32a] status = in_progress
[batch batch_68f4815ca2c08190935b02f20318b32a] status = in_progress
[batch batch_68f4815ca2c08190935b02f20318b32a] status = in_progress
[batch batch_68f4815ca2c08190935b02f20318b32a] status = in_progress
[batch batch_68f4815ca2c08190935b02f20318b32a] status = finalizing
[batch batch_68f4815ca2c08190935b02f20318b32a] status = finalizing
[batch batch_68f4815ca2c08190935b02f20318b32a] status = completed
Macro-F1: 0.6142976145425036


In [4]:
# 5) Optional: error analysis
from pathlib import Path
from humaidclf import analyze_and_export_mistakes

# Point to the RUN folder (the one that contains predictions.csv)
base = Path(r"runs/california_wildfires_2018/train/gpt-4o-mini/20251018-231232-modeS-RULES4/")

pred_csv   = base / "predictions.csv"         # classification result
analysis   = base / "analysis"                # analysis subfolder
mistakes   = analysis / "mistakes.csv"        # will be created/overwritten
chartsdir  = analysis / "charts"              # will be created if missing

mistakes_df, summary, per_cls, conf_df = analyze_and_export_mistakes(
    pred_csv_path=pred_csv,
    out_mistakes_csv_path=mistakes,
    charts_dir=chartsdir,
)

summary

{'num_total_with_truth': 5163,
 'num_correct': 3560,
 'num_incorrect': 1603,
 'accuracy': np.float64(0.6895215959713344),
 'macro_f1': 0.6142976145425038,
 'labels': ['caution_and_advice',
  'displaced_people_and_evacuations',
  'infrastructure_and_utility_damage',
  'injured_or_dead_people',
  'missing_or_found_people',
  'not_humanitarian',
  'other_relevant_information',
  'requests_or_urgent_needs',
  'rescue_volunteering_or_donation_effort',
  'sympathy_and_support']}

# california_wildfires_2018 with RULES DF (Exactly the same with the rules in Imran paper)

In [8]:
# 0) Load data
dataset_path = "Dataset/HumAID/california_wildfires_2018/california_wildfires_2018_train.tsv"
df = load_tsv(dataset_path)

# 1) Dry-run zero-shot
_ = sync_test_sample(df, n=20, rules=RULES_1, model="gpt-4o-mini", temperature=0.0, seed=42)

# 2) Plan dir + build JSONL
plan = plan_run_dirs(dataset_path, out_root="runs", model="gpt-4o-mini", tag="modeS-RULESDF")
build_requests_jsonl_S(df, plan["requests_jsonl"], rules=RULES_DF, model=plan["model"], temperature=0.0)

Macro-F1 (tiny sample): 0.45188145188145185


WindowsPath('runs/california_wildfires_2018/train/gpt-4o-mini/20251018-132622-modeS-RULES/requests.jsonl')

In [None]:
# 3) Submit batch and wait
fid  = upload_file_for_batch(str(plan["requests_jsonl"]))
bid  = create_batch(fid, endpoint="/v1/chat/completions", completion_window="24h")
info = wait_for_batch(bid, poll_secs=300)

# 4) Download + parse + save
download_file_content(info["output_file_id"], str(plan["outputs_jsonl"]))
preds = parse_outputs_S_to_df(plan["outputs_jsonl"], df)
preds.to_csv(plan["predictions_csv"], index=False)
print("Macro-F1:", macro_f1(preds))

In [10]:
# 5) Optional: error analysis
from pathlib import Path
from humaidclf import analyze_and_export_mistakes

# Point to the RUN folder (the one that contains predictions.csv)
base = Path(r"runs\california_wildfires_2018\train\gpt-4o-mini\20251018-132622-modeS-RULESDF")

pred_csv   = base / "predictions.csv"         # classification result
analysis   = base / "analysis"                # analysis subfolder
mistakes   = analysis / "mistakes.csv"        # will be created/overwritten
chartsdir  = analysis / "charts"              # will be created if missing

mistakes_df, summary, per_cls, conf_df = analyze_and_export_mistakes(
    pred_csv_path=pred_csv,
    out_mistakes_csv_path=mistakes,
    charts_dir=chartsdir,
)

summary

{'num_total_with_truth': 5163,
 'num_correct': 3661,
 'num_incorrect': 1502,
 'accuracy': np.float64(0.7090838659693977),
 'macro_f1': 0.6022003333962889,
 'labels': ['caution_and_advice',
  'displaced_people_and_evacuations',
  'infrastructure_and_utility_damage',
  'injured_or_dead_people',
  'missing_or_found_people',
  'not_humanitarian',
  'other_relevant_information',
  'requests_or_urgent_needs',
  'rescue_volunteering_or_donation_effort',
  'sympathy_and_support']}

# canada_wildfires_2016

In [None]:
# 0) Load data
dataset_path = "Dataset/HumAID/canada_wildfires_2016/canada_wildfires_2016_train.tsv"
df = load_tsv(dataset_path)

# 1) Dry-run zero-shot
_ = sync_test_sample(df, n=20, rules=RULES_1, model="gpt-4o-mini", temperature=0.0, seed=42)

# 2) Plan dir + build JSONL
plan = plan_run_dirs(dataset_path, out_root="runs", model="gpt-4o-mini", tag="modeS-RULES1")
build_requests_jsonl_S(df, plan["requests_jsonl"], rules=RULES_1, model=plan["model"], temperature=0.0)

# 3) Submit batch and wait
fid  = upload_file_for_batch(str(plan["requests_jsonl"]))
bid  = create_batch(fid, endpoint="/v1/chat/completions", completion_window="24h")
info = wait_for_batch(bid, poll_secs=300)

# 4) Download + parse + save
download_file_content(info["output_file_id"], str(plan["outputs_jsonl"]))
preds = parse_outputs_S_to_df(plan["outputs_jsonl"], df)
preds.to_csv(plan["predictions_csv"], index=False)
print("Macro-F1:", macro_f1(preds))

In [5]:
# 5) Optional: error analysis
from pathlib import Path
from humaidclf import analyze_and_export_mistakes

# Point to the RUN folder (the one that contains predictions.csv)
base = Path(r"runs\canada_wildfires_2016\train\gpt-4o-mini\20251017-234853-modeS-RULES1")

pred_csv   = base / "predictions.csv"         # classification result
analysis   = base / "analysis"                # analysis subfolder
mistakes   = analysis / "mistakes.csv"        # will be created/overwritten
chartsdir  = analysis / "charts"              # will be created if missing

mistakes_df, summary, per_cls, conf_df = analyze_and_export_mistakes(
    pred_csv_path=pred_csv,
    out_mistakes_csv_path=mistakes,
    charts_dir=chartsdir,
)

summary

{'num_total_with_truth': 1569,
 'num_correct': 1227,
 'num_incorrect': 342,
 'accuracy': np.float64(0.7820267686424475),
 'macro_f1': 0.5945937678829314,
 'labels': ['caution_and_advice',
  'displaced_people_and_evacuations',
  'infrastructure_and_utility_damage',
  'missing_or_found_people',
  'not_humanitarian',
  'other_relevant_information',
  'requests_or_urgent_needs',
  'rescue_volunteering_or_donation_effort',
  'sympathy_and_support']}