In [None]:
from dotenv import load_dotenv; load_dotenv()
from humaidclf import (
    load_tsv, plan_run_dirs, LABELS, SYSTEM_PROMPT, make_user_message,
    sync_test_sample, build_requests_jsonl_S,
    upload_file_for_batch, create_batch, wait_for_batch, download_file_content,
    parse_outputs_S_to_df, macro_f1, analyze_and_export_mistakes
)

# Define ZERO-SHOT rules (try multiple strings to A/B)
RULES_1 = (
  "- requests_or_urgent_needs: asking for help/supplies/SOS\n"
  "- rescue_volunteering_or_donation_effort: offering help, donation, organizing aid\n"
  "- caution_and_advice: warnings/instructions/tips\n"
  "- displaced_people_and_evacuations: evacuations, relocation, shelters\n"
  "- injured_or_dead_people: injuries, casualties, fatalities\n"
  "- missing_or_found_people: missing or found persons\n"
  "- infrastructure_and_utility_damage: damage/outages to roads/bridges/power/water/buildings\n"
  "- sympathy_and_support: prayers/condolences, no actionable info\n"
  "- other_relevant_information: on-topic but none of the above\n"
  "- not_humanitarian: unrelated to disasters/aid\n"
  "Tie-break: prefer actionable class when in doubt."
)

RULES_2 = (
  "- requests_or_urgent_needs: ASKING for help/supplies/services (need/please help/send/urgent/SOS). If both ask and offer words appear, ASKING wins.\n"
  "- rescue_volunteering_or_donation_effort: OFFERING help, organizing rescues, donation drives, fundraisers, volunteering sign-ups.\n"
  "- caution_and_advice: Warnings, instructions, actionable tips (evacuate/avoid/boil water). If only prayers/solidarity words, do NOT use this.\n"
  "- displaced_people_and_evacuations: Evacuation orders, relocations, sheltering, families displaced.\n"
  "- injured_or_dead_people: Injuries, casualties, fatalities.\n"
  "- missing_or_found_people: People reported missing OR confirmed found/located/reunited. If not explicit, do NOT use this.\n"
  "- infrastructure_and_utility_damage: Physical damage or outages to roads, bridges, buildings, power, water, comms, caused by the disaster. If disaster context is unclear, prefer not_humanitarian or other_relevant_information.\n"
  "- sympathy_and_support: Prayers, thoughts, condolences, “stay strong”, morale support ONLY (no requests, offers, warnings).\n"
  "- other_relevant_information: On-topic situation info that fits none of the above (e.g., event stats, forecasts, timelines) AND is clearly disaster-related.\n"
  "- not_humanitarian: Unrelated to disasters/aid or unclear/no disaster context.\n"
  "Tie-breakers:\n"
  "1) ASKING vs OFFERING → ASKING wins (requests_or_urgent_needs).\n"
  "2) People vs infrastructure → if injuries/casualties/missing are present, choose the people class.\n"
  "3) Sympathy vs caution → only actionable verbs → caution_and_advice; otherwise sympathy_and_support.\n"
  "4) Infra damage needs disaster context; otherwise not_humanitarian."
)




# california_wildfires_2018

In [None]:
# 0) Load data
dataset_path = "Dataset/HumAID/california_wildfires_2018/california_wildfires_2018_dev.tsv"
df = load_tsv(dataset_path)

# 1) Dry-run zero-shot
_ = sync_test_sample(df, n=20, rules=RULES_1, model="gpt-4o-mini", temperature=0.0, seed=42)

# 2) Plan dir + build JSONL
plan = plan_run_dirs(dataset_path, out_root="runs", model="gpt-4o-mini", tag="modeS-RULES1")
build_requests_jsonl_S(df, plan["requests_jsonl"], rules=RULES_1, model=plan["model"], temperature=0.0)

# 3) Submit batch and wait
fid  = upload_file_for_batch(str(plan["requests_jsonl"]))
bid  = create_batch(fid, endpoint="/v1/chat/completions", completion_window="24h")
info = wait_for_batch(bid, poll_secs=20)

# 4) Download + parse + save
download_file_content(info["output_file_id"], str(plan["outputs_jsonl"]))
preds = parse_outputs_S_to_df(plan["outputs_jsonl"], df)
preds.to_csv(plan["predictions_csv"], index=False)
print("Macro-F1:", macro_f1(preds))

# 5) Optional: error analysis
mistakes_df, summary, per_cls, conf_df = analyze_and_export_mistakes(
    pred_csv_path=str(plan["predictions_csv"]),
    out_mistakes_csv_path=str(plan["dir"] / "mistakes.csv"),
    charts_dir=str(plan["dir"] / "charts"),
)
summary