# Domain Case Studies: Task, Metric, and Tradeoff Decisions\n\nUse this notebook to practice selecting the right supervised-learning setup for realistic scenarios.

## What You Should Practice\n- identify the correct task type from a scenario,\n- choose a primary metric aligned to risk,\n- justify the precision/recall or error tradeoff,\n- compare your choices against a rubric with explanations.

In [None]:
import pandas as pd\nfrom dataclasses import dataclass\n\n@dataclass(frozen=True)\nclass CaseDefinition:\n    case_id: str\n    domain: str\n    scenario: str\n    task_type: str\n    primary_metric: str\n    tradeoff_focus: str\n    why: str\n\nCASES = [\n    CaseDefinition(\n        case_id='C1',\n        domain='Healthcare',\n        scenario='Predict whether a discharged patient will be readmitted within 30 days.',\n        task_type='binary_classification',\n        primary_metric='recall',\n        tradeoff_focus='minimize_false_negatives',\n        why='Missing a true high-risk patient can delay intervention and increase harm.'\n    ),\n    CaseDefinition(\n        case_id='C2',\n        domain='Finance',\n        scenario='Predict whether a borrower will default in the next 12 months.',\n        task_type='binary_classification',\n        primary_metric='recall',\n        tradeoff_focus='minimize_false_negatives',\n        why='False negatives can create direct financial losses if risky loans are approved.'\n    ),\n    CaseDefinition(\n        case_id='C3',\n        domain='Support Operations',\n        scenario='Route each incoming support ticket to billing, logistics, or technical support.',\n        task_type='multiclass_classification',\n        primary_metric='f1_macro',\n        tradeoff_focus='balanced_tradeoff',\n        why='Classes are operationally important and macro F1 avoids hiding weak classes.'\n    ),\n    CaseDefinition(\n        case_id='C4',\n        domain='Content Safety',\n        scenario='Assign multiple tags to each post: spam, harassment, hate_speech, and scam.',\n        task_type='multilabel_classification',\n        primary_metric='f1_macro',\n        tradeoff_focus='minimize_false_negatives',\n        why='A post can have multiple violations, and missing harmful content is costly.'\n    ),\n    CaseDefinition(\n        case_id='C5',\n        domain='Supply Chain',\n        scenario='Predict next-week demand for each region in one model call.',\n        task_type='multioutput_prediction',\n        primary_metric='mae',\n        tradeoff_focus='balanced_tradeoff',\n        why='The model outputs a vector of values; MAE is easy to interpret per region.'\n    ),\n    CaseDefinition(\n        case_id='C6',\n        domain='Real Estate',\n        scenario='Estimate home sale price from property and neighborhood attributes.',\n        task_type='regression',\n        primary_metric='rmse',\n        tradeoff_focus='control_large_errors',\n        why='Large pricing errors are especially harmful; RMSE penalizes them more.'\n    ),\n    CaseDefinition(\n        case_id='C7',\n        domain='Cybersecurity',\n        scenario='Classify a detected attack event as phishing, malware, brute_force, or benign.',\n        task_type='multiclass_classification',\n        primary_metric='f1_macro',\n        tradeoff_focus='balanced_tradeoff',\n        why='Rare attack types still matter, so macro F1 is safer than pure accuracy.'\n    ),\n    CaseDefinition(\n        case_id='C8',\n        domain='Human Resources',\n        scenario='Tag each resume with one or more skills: python, sql, leadership, ml.',\n        task_type='multilabel_classification',\n        primary_metric='f1_weighted',\n        tradeoff_focus='balanced_tradeoff',\n        why='Multiple labels per profile and skewed skill frequencies favor weighted F1.'\n    ),\n]

In [None]:
cases_df = pd.DataFrame([c.__dict__ for c in CASES])[['case_id', 'domain', 'scenario']]\ncases_df

In [None]:
TASK_ALIASES = {\n    'binary': 'binary_classification',\n    'binary_classification': 'binary_classification',\n    'multiclass': 'multiclass_classification',\n    'multi_class': 'multiclass_classification',\n    'multiclass_classification': 'multiclass_classification',\n    'multilabel': 'multilabel_classification',\n    'multi_label': 'multilabel_classification',\n    'multilabel_classification': 'multilabel_classification',\n    'multioutput': 'multioutput_prediction',\n    'multi_output': 'multioutput_prediction',\n    'multioutput_prediction': 'multioutput_prediction',\n    'regression': 'regression',\n}\n\nMETRIC_ALIASES = {\n    'precision': 'precision',\n    'recall': 'recall',\n    'f1': 'f1',\n    'f1_macro': 'f1_macro',\n    'macro_f1': 'f1_macro',\n    'f1_weighted': 'f1_weighted',\n    'weighted_f1': 'f1_weighted',\n    'pr_auc': 'pr_auc',\n    'roc_auc': 'roc_auc',\n    'mae': 'mae',\n    'mse': 'mse',\n    'rmse': 'rmse',\n    'r2': 'r2',\n}\n\nTRADEOFF_ALIASES = {\n    'minimize_false_negatives': 'minimize_false_negatives',\n    'high_recall': 'minimize_false_negatives',\n    'minimize_false_positives': 'minimize_false_positives',\n    'high_precision': 'minimize_false_positives',\n    'balanced_tradeoff': 'balanced_tradeoff',\n    'balanced': 'balanced_tradeoff',\n    'control_large_errors': 'control_large_errors',\n}\n\nCASE_LOOKUP = {c.case_id: c for c in CASES}\n\ndef _normalize(value: str, aliases: dict[str, str]) -> str:\n    key = str(value).strip().lower()\n    return aliases.get(key, key)

In [None]:
def check_case(case_id: str, task_type: str, primary_metric: str, tradeoff_focus: str) -> dict:\n    if case_id not in CASE_LOOKUP:\n        raise ValueError(f'Unknown case_id: {case_id}')\n\n    case = CASE_LOOKUP[case_id]\n    normalized_task = _normalize(task_type, TASK_ALIASES)\n    normalized_metric = _normalize(primary_metric, METRIC_ALIASES)\n    normalized_tradeoff = _normalize(tradeoff_focus, TRADEOFF_ALIASES)\n\n    task_ok = normalized_task == case.task_type\n    metric_ok = normalized_metric == case.primary_metric\n    tradeoff_ok = normalized_tradeoff == case.tradeoff_focus\n\n    score = int(task_ok) + int(metric_ok) + int(tradeoff_ok)\n\n    return {\n        'case_id': case_id,\n        'domain': case.domain,\n        'task_ok': task_ok,\n        'metric_ok': metric_ok,\n        'tradeoff_ok': tradeoff_ok,\n        'score_out_of_3': score,\n        'expected_task': case.task_type,\n        'expected_metric': case.primary_metric,\n        'expected_tradeoff': case.tradeoff_focus,\n        'why': case.why,\n    }\n\ndef grade_answers(answer_df: pd.DataFrame) -> pd.DataFrame:\n    required_cols = {'case_id', 'task_type', 'primary_metric', 'tradeoff_focus'}\n    missing = required_cols - set(answer_df.columns)\n    if missing:\n        raise ValueError(f'Missing columns: {sorted(missing)}')\n\n    rows = []\n    for row in answer_df.to_dict(orient='records'):\n        rows.append(\n            check_case(\n                row['case_id'],\n                row['task_type'],\n                row['primary_metric'],\n                row['tradeoff_focus'],\n            )\n        )\n\n    report = pd.DataFrame(rows)\n    report['is_perfect'] = report['score_out_of_3'] == 3\n    return report.sort_values(['score_out_of_3', 'case_id'], ascending=[False, True])

In [None]:
# Example: single-case check\ncheck_case(\n    case_id='C1',\n    task_type='binary_classification',\n    primary_metric='recall',\n    tradeoff_focus='minimize_false_negatives',\n)

In [None]:
# Fill this template with your answers, then run grade_answers(your_answers)\nyour_answers = pd.DataFrame([\n    {'case_id': c.case_id, 'task_type': '', 'primary_metric': '', 'tradeoff_focus': ''}\n    for c in CASES\n])\nyour_answers

In [None]:
# Uncomment and run after filling your_answers\n# grade_answers(your_answers)

## Reflection\nFor each case where score < 3, write one sentence: what assumption caused the mismatch?\nThen map the corrected setup to a real project in your domain.