In [0]:
%sql
-- ============================================================
-- NOTEBOOK B: gold_research_person_signals (v2)
-- Run AFTER Notebook A (weights + actions) and Notebook C (story status)
--
-- Changes vs v1:
--   REMOVED signals:  SIGNAL_DEEP_GENERATION, SIGNAL_HAS_MARRIAGE,
--                     SIGNAL_HAS_CHILDREN, SIGNAL_POSSIBLE_RESIDENCE (old def)
--   AGE GUARDS added: SIGNAL_NO_MARRIAGES, SIGNAL_NO_CHILDREN,
--                     SIGNAL_NO_DEATH_RECORDED
--   WIRED UP:         SIGNAL_LATE_LIFE_GAP, SIGNAL_EARLY_LIFE_ONLY,
--                     SIGNAL_CHILD_GAPS, SIGNAL_POSSIBLE_MARRIAGE,
--                     SIGNAL_POSSIBLE_CHILDREN, SIGNAL_POSSIBLE_OCCUPATION,
--                     SIGNAL_HIGH_FAMILY_PAYOFF
--   NEW signals:      SIGNAL_IMPRECISE_PLACES, SIGNAL_MISSING_CENSUS_COVERAGE,
--                     SIGNAL_VARIED_OCCUPATIONS, SIGNAL_UNCOVERED_SOURCES,
--                     SIGNAL_DOCS_NOT_TRANSCRIBED, SIGNAL_FACT_CONFLICT,
--                     SIGNAL_TRANSCRIPT_AVAILABLE, SIGNAL_STORY_WRITTEN
--   REDEFINED:        SIGNAL_POSSIBLE_RESIDENCE (tightened: RESI present
--                     but census coverage missing — replaces near-universal
--                     old definition)
-- ============================================================

CREATE OR REPLACE VIEW genealogy.gold_research_person_signals AS

WITH timeline AS (
  SELECT
    t.person_gedcom_id,
    MAX(p.sex)                                    AS sex,
    MIN(event_year_parsed)                        AS first_event_year,
    MAX(event_year_parsed)                        AS last_event_year,
    MAX(age_years)                                AS max_event_age,
    MAX(year(p.death_date))                       AS death_year,
    MAX(year(p.birth_date))                       AS birth_year,
    COALESCE(
      MAX(year(p.death_date)),
      LEAST(MAX(year(p.birth_date)) + 80, year(current_date)),
      LEAST(MIN(event_year_parsed) + 80, year(current_date))
    )                                             AS expected_end_year,
    (
      COALESCE(
        MAX(year(p.death_date)),
        LEAST(MAX(year(p.birth_date)) + 80, year(current_date)),
        LEAST(MIN(event_year_parsed) + 80, year(current_date))
      ) - MIN(event_year_parsed) + 1
    )                                             AS effective_span_years,
    COUNT(*)                                      AS event_count
  FROM genealogy.gold_person_event_timeline t
  JOIN genealogy.gold_person_life p ON p.person_gedcom_id = t.person_gedcom_id
  WHERE event_year_parsed IS NOT NULL
  GROUP BY t.person_gedcom_id
),

evidence AS (
  SELECT
    person_gedcom_id,
    total_facts,
    total_sources,
    avg_sources_per_fact,
    child_event_count,
    marriage_event_count,
    family_event_count,
    sourced_family_event_count
  FROM genealogy.gold_person_evidence_summary
),

proximity AS (
  SELECT
    p.person_id,
    MIN(ancestral_proximity)    AS proximity,
    MIN(d.generation_depth)     AS depth,
    d.person_id                 AS nearest_ancestor_id
  FROM genealogy.gold_ancestral_proximity p
  JOIN genealogy.gold_generation_depth d ON d.person_id = p.path_to_ancestor[0]
  GROUP BY p.person_id, d.person_id
),

-- Expected census years each person should appear in.
-- Covers England & Wales / Scotland censuses 1841-1921 + 1939 Register.
-- A person is "expected" in a census year if they were alive that year.
census_coverage AS (
  SELECT
    t.person_gedcom_id,
    CASE WHEN t.birth_year IS NOT NULL AND t.birth_year <= 1840
          AND t.expected_end_year >= 1841 THEN 1 ELSE 0 END  AS expected_1841,
    CASE WHEN t.birth_year IS NOT NULL AND t.birth_year <= 1850
          AND t.expected_end_year >= 1851 THEN 1 ELSE 0 END  AS expected_1851,
    CASE WHEN t.birth_year IS NOT NULL AND t.birth_year <= 1860
          AND t.expected_end_year >= 1861 THEN 1 ELSE 0 END  AS expected_1861,
    CASE WHEN t.birth_year IS NOT NULL AND t.birth_year <= 1870
          AND t.expected_end_year >= 1871 THEN 1 ELSE 0 END  AS expected_1871,
    CASE WHEN t.birth_year IS NOT NULL AND t.birth_year <= 1880
          AND t.expected_end_year >= 1881 THEN 1 ELSE 0 END  AS expected_1881,
    CASE WHEN t.birth_year IS NOT NULL AND t.birth_year <= 1890
          AND t.expected_end_year >= 1891 THEN 1 ELSE 0 END  AS expected_1891,
    CASE WHEN t.birth_year IS NOT NULL AND t.birth_year <= 1900
          AND t.expected_end_year >= 1901 THEN 1 ELSE 0 END  AS expected_1901,
    CASE WHEN t.birth_year IS NOT NULL AND t.birth_year <= 1910
          AND t.expected_end_year >= 1911 THEN 1 ELSE 0 END  AS expected_1911,
    CASE WHEN t.birth_year IS NOT NULL AND t.birth_year <= 1920
          AND t.expected_end_year >= 1921 THEN 1 ELSE 0 END  AS expected_1921,
    CASE WHEN t.birth_year IS NOT NULL AND t.birth_year <= 1938
          AND t.expected_end_year >= 1939 THEN 1 ELSE 0 END  AS expected_1939
  FROM timeline t
),

-- OCR/reconciliation pipeline signals
ocr_signals AS (
  SELECT
    p.person_gedcom_id,
    -- SIGNAL_UNCOVERED_SOURCES: citation in tree, no document filed
    MAX(CASE WHEN sc.coverage_status = 'UNCOVERED' THEN TRUE ELSE FALSE END)
      AS has_uncovered_sources,
    -- SIGNAL_DOCS_NOT_TRANSCRIBED: document filed but not OCR'd
    MAX(CASE WHEN sc.coverage_status = 'DOCUMENT_NO_TRANSCRIPT' THEN TRUE ELSE FALSE END)
      AS has_docs_not_transcribed,
    -- SIGNAL_FACT_CONFLICT: transcript contradicts tree fact
    MAX(CASE WHEN fc.status = 'CONFLICT' THEN TRUE ELSE FALSE END)
      AS has_fact_conflict,
    -- SIGNAL_TRANSCRIPT_AVAILABLE: at least one OCR transcript exists
    MAX(CASE WHEN tf.person_gedcom_id IS NOT NULL THEN TRUE ELSE FALSE END)
      AS has_transcript
  FROM genealogy.gold_person_life p
  LEFT JOIN genealogy.gold_source_coverage sc  ON sc.person_gedcom_id = p.person_gedcom_id
  LEFT JOIN genealogy.gold_fact_comparison fc  ON fc.person_gedcom_id = p.person_gedcom_id
  LEFT JOIN (
    SELECT DISTINCT person_gedcom_id FROM genealogy.gold_transcript_facts
  ) tf ON tf.person_gedcom_id = p.person_gedcom_id
  GROUP BY p.person_gedcom_id
),

-- Story written flag (from manually maintained silver table)
story_status AS (
  SELECT person_gedcom_id, story_written
  FROM genealogy.silver_person_story_status
)

SELECT
  p.person_gedcom_id,

  -- ── Structural / lineage context ─────────────────────────────────────────
  -- Used as multiplier in aggregation SQL — not scored additively
  CASE WHEN p.person_gedcom_id = pr.nearest_ancestor_id THEN TRUE ELSE FALSE END
    AS is_direct_ancestor,
  pr.depth,
  pr.proximity,
  p.event_count,
  p.effective_span_years,

  CASE WHEN pr.proximity = 0 THEN TRUE ELSE FALSE END   AS SIGNAL_DIRECT_ANCESTOR,
  CASE WHEN pr.proximity = 1 THEN TRUE ELSE FALSE END   AS SIGNAL_CLOSE_COLLATERAL,

  -- ── INTEGRITY — Completeness ──────────────────────────────────────────────

  CASE WHEN p.birth_year IS NULL
    THEN TRUE ELSE FALSE END
    AS SIGNAL_NO_BIRTH_RECORDED,

  -- GUARD: suppress if birth_year > 1930 (likely living) or lifespan < 16 years
  CASE WHEN p.death_year IS NULL
    AND p.expected_end_year < year(current_date)
    AND (p.birth_year IS NULL OR p.birth_year <= 1930)
    THEN TRUE ELSE FALSE END
    AS SIGNAL_NO_DEATH_RECORDED,

  -- GUARD: suppress if effective lifespan < 16 years (died young, never married)
  CASE WHEN fs.num_marriages = 0
    AND (
      p.death_year IS NULL
      OR (p.death_year - COALESCE(p.birth_year, p.death_year - 40)) >= 16
    )
    THEN TRUE ELSE FALSE END
    AS SIGNAL_NO_MARRIAGES,

  -- GUARD: suppress when SIGNAL_YOUNG_DEATH would also be true
  CASE WHEN fs.num_child_births = 0
    AND NOT (
      p.death_year IS NOT NULL
      AND p.effective_span_years BETWEEN 16 AND 40
    )
    THEN TRUE ELSE FALSE END
    AS SIGNAL_NO_CHILDREN,

  CASE WHEN fs.num_parents < 2
    THEN TRUE ELSE FALSE END
    AS SIGNAL_MISSING_PARENT,

  -- Fires if person was expected in >= 1 census year but has no RESI event
  -- in those census years (using fact_summary census boolean flags)
  CASE WHEN (
      (cc.expected_1841 > 0 AND fs.has_1841_census = 0) OR
      (cc.expected_1851 > 0 AND fs.has_1851_census = 0) OR
      (cc.expected_1861 > 0 AND fs.has_1861_census = 0) OR
      (cc.expected_1871 > 0 AND fs.has_1871_census = 0) OR
      (cc.expected_1881 > 0 AND fs.has_1881_census = 0) OR
      (cc.expected_1891 > 0 AND fs.has_1891_census = 0) OR
      (cc.expected_1901 > 0 AND fs.has_1901_census = 0) OR
      (cc.expected_1911 > 0 AND fs.has_1911_census = 0) OR
      (cc.expected_1921 > 0 AND fs.has_1921_census = 0) OR
      (cc.expected_1939 > 0 AND fs.has_1939_register = 0)
    )
    THEN TRUE ELSE FALSE END
    AS SIGNAL_MISSING_CENSUS_COVERAGE,

  -- OCR pipeline completeness signals
  COALESCE(o.has_uncovered_sources,    FALSE) AS SIGNAL_UNCOVERED_SOURCES,
  COALESCE(o.has_docs_not_transcribed, FALSE) AS SIGNAL_DOCS_NOT_TRANSCRIBED,

  -- Lifecycle gap signals (previously dead — now wired up)
  CASE WHEN
    p.death_year IS NULL
    AND p.last_event_year < p.birth_year + 40
    THEN TRUE ELSE FALSE END
    AS SIGNAL_LATE_LIFE_GAP,

  CASE WHEN
    p.max_event_age <= 25
    AND e.family_event_count > 0
    THEN TRUE ELSE FALSE END
    AS SIGNAL_EARLY_LIFE_ONLY,

  CASE WHEN
    fs.max_days_between_child_births > 730
    OR (fs.num_marriages > 0 AND fs.num_child_births = 0)
    THEN TRUE ELSE FALSE END
    AS SIGNAL_CHILD_GAPS,

  -- ── INTEGRITY — Evidence fragility ───────────────────────────────────────

  CASE WHEN e.avg_sources_per_fact < 1.2
    THEN TRUE ELSE FALSE END
    AS SIGNAL_LOW_EVIDENCE_DENSITY,

  CASE WHEN e.total_sources = 1 AND e.total_facts >= 3
    THEN TRUE ELSE FALSE END
    AS SIGNAL_SINGLE_SOURCE_DEPENDENCE,

  CASE WHEN e.family_event_count > e.sourced_family_event_count
    THEN TRUE ELSE FALSE END
    AS SIGNAL_UNSOURCED_FAMILY_EVENTS,

  CASE WHEN fs.has_given_name = 0 OR fs.has_surname = 0
    THEN TRUE ELSE FALSE END
    AS SIGNAL_INCOMPLETE_NAME,

  CASE WHEN fs.birth_date_precision <> 'DAY' OR fs.death_date_precision <> 'DAY'
    THEN TRUE ELSE FALSE END
    AS SIGNAL_IMPRECISE_DATES,

  -- Fires if birth or death place resolves to county level only
  -- (zero or one comma = no town/parish). Post-1600 only.
  -- NOTE: requires birth_place / death_place columns in gold_person_fact_summary.
  -- If those columns live in gold_person_life instead, update the fs. prefix below.
  CASE WHEN
    p.birth_year IS NOT NULL AND p.birth_year >= 1600
    AND (
      (p.birth_year IS NOT NULL AND (
        fs.birth_place IS NULL
        OR LENGTH(fs.birth_place) - LENGTH(REPLACE(fs.birth_place, ',', '')) < 1
      ))
      OR
      (p.death_year IS NOT NULL AND (
        fs.death_place IS NULL
        OR LENGTH(fs.death_place) - LENGTH(REPLACE(fs.death_place, ',', '')) < 1
      ))
    )
    THEN TRUE ELSE FALSE END
    AS SIGNAL_IMPRECISE_PLACES,

  -- OCR evidence signal
  COALESCE(o.has_fact_conflict, FALSE) AS SIGNAL_FACT_CONFLICT,

  -- ── NARRATIVE — Texture ───────────────────────────────────────────────────

  CASE WHEN fs.num_military > 0
    THEN TRUE ELSE FALSE END
    AS SIGNAL_MILITARY,

  CASE WHEN fs.num_marriages > 1
    THEN TRUE ELSE FALSE END
    AS SIGNAL_MULTIPLE_SPOUSES,

  CASE WHEN
    p.death_year IS NOT NULL
    AND p.effective_span_years BETWEEN 16 AND 40
    THEN TRUE ELSE FALSE END
    AS SIGNAL_YOUNG_DEATH,

  CASE WHEN fs.num_migration > 0 OR fs.num_countries > 1
    THEN TRUE ELSE FALSE END
    AS SIGNAL_MIGRANT,

  CASE WHEN
    p.sex = 'M'
    AND (p.birth_year BETWEEN 1867 AND 1904)
    AND p.expected_end_year >= 1914
    THEN TRUE ELSE FALSE END
    AS SIGNAL_POSSIBLE_WWI,

  CASE WHEN
    (
      (p.sex = 'M' AND (p.birth_year BETWEEN 1891 AND 1929))
      OR
      (p.sex = 'F' AND (p.birth_year BETWEEN 1911 AND 1925))
    )
    AND p.expected_end_year >= 1939
    THEN TRUE ELSE FALSE END
    AS SIGNAL_POSSIBLE_WWII,

  -- Story suppression — negative weight in narrative scoring
  COALESCE(ss.story_written, FALSE) AS SIGNAL_STORY_WRITTEN,

  -- ── NARRATIVE — Context ───────────────────────────────────────────────────

  -- Occupation records likely findable
  CASE WHEN
    (fs.has_1921_census = 1 AND p.birth_year <= 1907)
    OR fs.num_child_births > 0
    THEN TRUE ELSE FALSE END
    AS SIGNAL_POSSIBLE_OCCUPATION,

  -- Multiple distinct occupations recorded
  -- NOTE: requires num_occupations column in gold_person_fact_summary
  CASE WHEN fs.num_occupations > 1
    THEN TRUE ELSE FALSE END
    AS SIGNAL_VARIED_OCCUPATIONS,

  -- REDEFINED: RESI events exist but census coverage still has gaps.
  -- Much more discriminating than v1 (which fired for almost everyone).
  CASE WHEN
    (fs.num_marriages > 0 OR fs.num_child_births > 0)
    AND (
      (cc.expected_1841 > 0 AND fs.has_1841_census = 0) OR
      (cc.expected_1851 > 0 AND fs.has_1851_census = 0) OR
      (cc.expected_1861 > 0 AND fs.has_1861_census = 0) OR
      (cc.expected_1871 > 0 AND fs.has_1871_census = 0) OR
      (cc.expected_1881 > 0 AND fs.has_1881_census = 0) OR
      (cc.expected_1891 > 0 AND fs.has_1891_census = 0) OR
      (cc.expected_1901 > 0 AND fs.has_1901_census = 0) OR
      (cc.expected_1911 > 0 AND fs.has_1911_census = 0) OR
      (cc.expected_1921 > 0 AND fs.has_1921_census = 0) OR
      (cc.expected_1939 > 0 AND fs.has_1939_register = 0)
    )
    THEN TRUE ELSE FALSE END
    AS SIGNAL_POSSIBLE_RESIDENCE,

  -- High-leverage, under-researched family
  CASE WHEN
    e.family_event_count >= 3
    AND e.avg_sources_per_fact < 1.5
    THEN TRUE ELSE FALSE END
    AS SIGNAL_HIGH_FAMILY_PAYOFF,

  -- Possible hidden marriage (female, in 1939 register, no marriage recorded)
  CASE WHEN
    p.sex = 'F'
    AND fs.has_1939_register = 1
    AND fs.num_marriages = 0
    THEN TRUE ELSE FALSE END
    AS SIGNAL_POSSIBLE_MARRIAGE,

  -- Possible unrecorded children (female, in 1911 census, born <=1895)
  CASE WHEN
    p.sex = 'F'
    AND fs.has_1911_census = 1
    AND p.birth_year <= 1895
    THEN TRUE ELSE FALSE END
    AS SIGNAL_POSSIBLE_CHILDREN,

  -- Positive OCR signal
  COALESCE(o.has_transcript, FALSE) AS SIGNAL_TRANSCRIPT_AVAILABLE

FROM timeline p
JOIN  evidence e                  ON p.person_gedcom_id = e.person_gedcom_id
JOIN  genealogy.gold_person_fact_summary fs ON fs.person_gedcom_id = p.person_gedcom_id
LEFT JOIN proximity pr            ON p.person_gedcom_id = pr.person_id
LEFT JOIN census_coverage cc      ON p.person_gedcom_id = cc.person_gedcom_id
LEFT JOIN ocr_signals o           ON p.person_gedcom_id = o.person_gedcom_id
LEFT JOIN story_status ss         ON p.person_gedcom_id = ss.person_gedcom_id;


In [0]:
%sql
-- ============================================================
-- CELL 2: gold_research_person_signals_pivoted
-- Replaces the LATERAL VIEW explode(map(...)) from v1.
-- Only active scored signals are included here.
-- Structural signals (DIRECT_ANCESTOR, CLOSE_COLLATERAL) are
-- used directly in the aggregation SQL as multipliers.
-- Read-only signals (STORY_WRITTEN, TRANSCRIPT_AVAILABLE) are
-- included so their weights are applied by the scoring engine.
-- ============================================================

CREATE OR REPLACE VIEW genealogy.gold_research_person_signals_pivoted AS
SELECT person_gedcom_id, signal_code
FROM genealogy.gold_research_person_signals
LATERAL VIEW explode(map(
  -- INTEGRITY — Completeness
  'SIGNAL_NO_BIRTH_RECORDED',        SIGNAL_NO_BIRTH_RECORDED,
  'SIGNAL_NO_DEATH_RECORDED',        SIGNAL_NO_DEATH_RECORDED,
  'SIGNAL_NO_MARRIAGES',             SIGNAL_NO_MARRIAGES,
  'SIGNAL_NO_CHILDREN',              SIGNAL_NO_CHILDREN,
  'SIGNAL_MISSING_PARENT',           SIGNAL_MISSING_PARENT,
  'SIGNAL_MISSING_CENSUS_COVERAGE',  SIGNAL_MISSING_CENSUS_COVERAGE,
  'SIGNAL_UNCOVERED_SOURCES',        SIGNAL_UNCOVERED_SOURCES,
  'SIGNAL_DOCS_NOT_TRANSCRIBED',     SIGNAL_DOCS_NOT_TRANSCRIBED,
  'SIGNAL_LATE_LIFE_GAP',            SIGNAL_LATE_LIFE_GAP,
  'SIGNAL_EARLY_LIFE_ONLY',          SIGNAL_EARLY_LIFE_ONLY,
  'SIGNAL_CHILD_GAPS',               SIGNAL_CHILD_GAPS,
  -- INTEGRITY — Evidence
  'SIGNAL_LOW_EVIDENCE_DENSITY',     SIGNAL_LOW_EVIDENCE_DENSITY,
  'SIGNAL_SINGLE_SOURCE_DEPENDENCE', SIGNAL_SINGLE_SOURCE_DEPENDENCE,
  'SIGNAL_UNSOURCED_FAMILY_EVENTS',  SIGNAL_UNSOURCED_FAMILY_EVENTS,
  'SIGNAL_IMPRECISE_DATES',          SIGNAL_IMPRECISE_DATES,
  'SIGNAL_INCOMPLETE_NAME',          SIGNAL_INCOMPLETE_NAME,
  'SIGNAL_IMPRECISE_PLACES',         SIGNAL_IMPRECISE_PLACES,
  'SIGNAL_FACT_CONFLICT',            SIGNAL_FACT_CONFLICT,
  -- NARRATIVE — Texture
  'SIGNAL_MILITARY',                 SIGNAL_MILITARY,
  'SIGNAL_YOUNG_DEATH',              SIGNAL_YOUNG_DEATH,
  'SIGNAL_MIGRANT',                  SIGNAL_MIGRANT,
  'SIGNAL_POSSIBLE_WWI',             SIGNAL_POSSIBLE_WWI,
  'SIGNAL_POSSIBLE_WWII',            SIGNAL_POSSIBLE_WWII,
  'SIGNAL_STORY_WRITTEN',            SIGNAL_STORY_WRITTEN,
  -- NARRATIVE — Family
  'SIGNAL_MULTIPLE_SPOUSES',         SIGNAL_MULTIPLE_SPOUSES,
  -- NARRATIVE — Context
  'SIGNAL_POSSIBLE_OCCUPATION',      SIGNAL_POSSIBLE_OCCUPATION,
  'SIGNAL_VARIED_OCCUPATIONS',       SIGNAL_VARIED_OCCUPATIONS,
  'SIGNAL_POSSIBLE_RESIDENCE',       SIGNAL_POSSIBLE_RESIDENCE,
  'SIGNAL_HIGH_FAMILY_PAYOFF',       SIGNAL_HIGH_FAMILY_PAYOFF,
  'SIGNAL_POSSIBLE_MARRIAGE',        SIGNAL_POSSIBLE_MARRIAGE,
  'SIGNAL_POSSIBLE_CHILDREN',        SIGNAL_POSSIBLE_CHILDREN,
  'SIGNAL_TRANSCRIPT_AVAILABLE',     SIGNAL_TRANSCRIPT_AVAILABLE
)) exploded AS signal_code, is_present
WHERE is_present = TRUE;


In [0]:
%sql
-- ============================================================
-- CELL 3: Verification — signal fire rates
-- Sanity check: review what % of people each signal fires for.
-- Flag anything unexpectedly high (>80%) or zero.
-- ============================================================

SELECT
  signal_code,
  COUNT(*)                                          AS fire_count,
  ROUND(COUNT(*) * 100.0 / MAX(total.cnt), 1)       AS fire_pct
FROM genealogy.gold_research_person_signals_pivoted
CROSS JOIN (SELECT COUNT(DISTINCT person_gedcom_id) AS cnt
            FROM genealogy.gold_research_person_signals) total
GROUP BY signal_code
ORDER BY fire_pct DESC;


In [0]:
%sql
-- ============================================================
-- CELL 4: Verification — signals in pivoted view with no weight entry
-- Should return 0 rows. Any result here means a signal is firing
-- but contributing nothing to scores.
-- ============================================================

SELECT DISTINCT p.signal_code
FROM genealogy.gold_research_person_signals_pivoted p
LEFT JOIN genealogy.ref_signal_weights w ON w.signal_code = p.signal_code
WHERE w.signal_code IS NULL
ORDER BY p.signal_code;


In [0]:
%sql
-- ============================================================
-- CELL 5: Spot check — Cuthbertson branch new signals
-- Confirms OCR signals are firing for known individuals.
-- Expect SIGNAL_UNCOVERED_SOURCES and/or SIGNAL_DOCS_NOT_TRANSCRIBED
-- to fire for at least some Cuthbertson individuals.
-- ============================================================

SELECT
  s.person_gedcom_id,
  p.given_name,
  p.surname,
  p.birth_year,
  s.SIGNAL_UNCOVERED_SOURCES,
  s.SIGNAL_DOCS_NOT_TRANSCRIBED,
  s.SIGNAL_FACT_CONFLICT,
  s.SIGNAL_TRANSCRIPT_AVAILABLE,
  s.SIGNAL_MISSING_CENSUS_COVERAGE,
  s.SIGNAL_IMPRECISE_PLACES
FROM genealogy.gold_research_person_signals s
JOIN genealogy.gold_person_life p ON p.person_gedcom_id = s.person_gedcom_id
JOIN genealogy.gold_person_branch b ON b.person_gedcom_id = s.person_gedcom_id
WHERE b.branch = 'Cuthbertson'
  AND (
    s.SIGNAL_UNCOVERED_SOURCES
    OR s.SIGNAL_DOCS_NOT_TRANSCRIBED
    OR s.SIGNAL_FACT_CONFLICT
    OR s.SIGNAL_TRANSCRIPT_AVAILABLE
  )
ORDER BY p.surname, p.given_name
LIMIT 50;
