In [1]:
# inclusive_scoring.py
from typing import Dict, List, Tuple
from copy import deepcopy

# --- Canonicalize gender labels (for logging/audit only) ---
def normalize_gender(value: str) -> str:
    """
    Normalize common gender strings into canonical categories used for auditing only.
    Returns one of: 'male', 'female', 'non_binary', 'other', 'unknown'
    This function is not used in scoring; it's only for consistent logging/audit.
    """
    if not value:
        return 'unknown'
    v = value.strip().lower()
    if v in {'m', 'male', 'man'}:
        return 'male'
    if v in {'f', 'female', 'woman'}:
        return 'female'
    if v in {'nb', 'non-binary', 'nonbinary', 'non binary', 'nonbinary', 'enby'}:
        return 'non_binary'
    if v in {'other', 'prefer not to say', 'prefer-not-to-say'}:
        return 'other'
    return 'unknown'

# --- Inclusive scoring (GENDER-IGNORANT) ---
def score_applicant_inclusive(applicant: Dict[str, float],
                              weights: Dict[str, float] = None) -> float:
    """
    Compute an inclusive applicant score WITHOUT using gender or other protected attributes.
    Inputs:
      applicant: dict with numeric keys:
        - education_years (float)
        - years_experience (float)
        - skill_score (float) 0-100
        - certifications_count (float)
      weights: optional dict for feature weights (edu, exp, skill, cert)
    Returns:
      score (float) in range [0.0, 100.0]
    """
    # Default weights (sum to 1.0)
    if weights is None:
        weights = {'edu': 0.2, 'exp': 0.3, 'skill': 0.4, 'cert': 0.1}
    # Extract features with safe defaults
    edu = float(applicant.get('education_years', 0.0))
    exp = float(applicant.get('years_experience', 0.0))
    skill = float(applicant.get('skill_score', 0.0))
    certs = float(applicant.get('certifications_count', 0.0))

    # Feature normalization (safe, clamped ranges)
    edu_norm = min(max(edu, 0.0), 30.0) / 30.0        # 0-30 years map -> 0-1
    exp_norm = min(max(exp, 0.0), 40.0) / 40.0        # 0-40 years -> 0-1
    skill_norm = min(max(skill, 0.0), 100.0) / 100.0  # 0-100 -> 0-1
    cert_norm = min(max(certs, 0.0), 20.0) / 20.0     # 0-20 -> 0-1

    raw = (weights['edu'] * edu_norm +
           weights['exp'] * exp_norm +
           weights['skill'] * skill_norm +
           weights['cert'] * cert_norm)

    score = round(raw * 100.0, 2)
    return score

# --- Counterfactual invariance check across gender categories ---
def counterfactual_gender_check(applicants: List[Dict],
                                score_fn,
                                sensitive_key: str = 'gender',
                                canonical_values: List[str] = None,
                                allowed_diff: float = 1e-8) -> Dict[str, float]:
    """
    For each applicant, create counterfactual copies with different canonical gender values and
    verify scores do not change. Returns summary statistics: fraction_changed.
    canonical_values: list of canonical genders to test (e.g., ['male','female','non_binary','other'])
    """
    if canonical_values is None:
        canonical_values = ['male', 'female', 'non_binary', 'other', 'unknown']

    total = 0
    changed = 0
    for a in applicants:
        # compute original score (without modifying input)
        s_orig = score_fn(a)
        for g in canonical_values:
            a_copy = deepcopy(a)
            # Only modify the sensitive_key for counterfactual test
            a_copy[sensitive_key] = g
            s_cf = score_fn(a_copy)
            total += 1
            if abs(s_orig - s_cf) > allowed_diff:
                changed += 1
    return {'total_comparisons': total, 'changed_count': changed, 'fraction_changed': changed / total if total else 0.0}

# --- Example unit test helpers ---
def test_counterfactual_invariance_single():
    base = {'education_years': 16, 'years_experience': 5, 'skill_score': 80.0, 'certifications_count': 2, 'gender': 'female'}
    copy = base.copy()
    copy['gender'] = 'male'
    assert score_applicant_inclusive(base) == score_applicant_inclusive(copy), "Inclusive scorer must be invariant to gender"

def run_quick_demo():
    # Demonstrative synthetic batch
    applicants = [
        {'education_years': 16, 'years_experience': 5, 'skill_score': 80.0, 'certifications_count': 2, 'gender': 'female'},
        {'education_years': 12, 'years_experience': 2, 'skill_score': 60.0, 'certifications_count': 1, 'gender': 'male'},
        {'education_years': 18, 'years_experience': 10, 'skill_score': 90.0, 'certifications_count': 5, 'gender': 'non-binary'},
    ]
    print("Scores (gender-ignored):")
    for a in applicants:
        print(f"  {normalize_gender(a.get('gender'))}: {score_applicant_inclusive(a)}")
    print("Counterfactual gender check:", counterfactual_gender_check(applicants, score_applicant_inclusive))

if __name__ == "__main__":
    run_quick_demo()

Scores (gender-ignored):
  female: 47.42
  male: 34.0
  non_binary: 58.0
Counterfactual gender check: {'total_comparisons': 15, 'changed_count': 0, 'fraction_changed': 0.0}
