# 01 — Check Job Adverts for Age‑Coded Terms
Use this notebook to load a CSV of adverts and flag phrases from `data/lexicon.csv`.

## How to use
1. Put your adverts in a CSV with columns: `advert_id`, `text` (see `data/adverts_sample.csv`).
2. Make sure `data/lexicon.csv` exists (one column with terms or a header named `term`).
3. Run the cells to see flagged phrases per advert.

In [None]:
import pandas as pd
import re
from pathlib import Path

LEXICON_PATH = Path('data/lexicon.csv')  # update if needed
ADVERTS_PATH = Path('data/adverts_sample.csv')  # update if using your own file

lex = pd.read_csv(LEXICON_PATH)
lex.columns = [c.strip().lower() for c in lex.columns]
term_col = 'term' if 'term' in lex.columns else lex.columns[0]
terms = [str(t).strip() for t in lex[term_col].dropna().unique() if str(t).strip()]
print(f'Loaded {len(terms)} terms from {LEXICON_PATH}')

ads = pd.read_csv(ADVERTS_PATH)
print(f'Loaded {len(ads)} adverts from {ADVERTS_PATH}')

def find_terms(text, terms):
    hits = []
    for t in terms:
        pattern = re.compile(r'\b' + re.escape(t) + r'\b', flags=re.IGNORECASE)
        if pattern.search(str(text)):
            hits.append(t)
    return hits

results = (
    ads.assign(flagged=lambda df: df['text'].apply(lambda x: find_terms(x, terms)))
)
results