# One-time Persona Synthesis

## 1) Setup & load artifacts

In [4]:
# -------- paths & env
from pathlib import Path
import os, json, re, base64, math
import numpy as np
import pandas as pd

ROOT = Path.cwd()
while not (ROOT/"pyproject.toml").exists() and ROOT.parent != ROOT:
    ROOT = ROOT.parent

DATA    = ROOT / "data" / "interim"
REPORTS = ROOT / "notebooks" / "reports"
FIGS    = REPORTS / "figures"
for p in [REPORTS, FIGS]: p.mkdir(parents=True, exist_ok=True)

# -------- load env (expects OPENAI_API_KEY)
from dotenv import load_dotenv
load_dotenv()

# -------- load core tables
FEATURES = DATA / "features.parquet"
assert FEATURES.exists(), f"Missing {FEATURES}"
df = pd.read_parquet(FEATURES).sort_values("date").reset_index(drop=True)

# use any precomputed trait CSV if you have it (optional but nice)
hybrid_path = REPORTS/"05_profile_traits_hybrid.csv"
hybrid = {}
if hybrid_path.exists():
    hybrid = pd.read_csv(hybrid_path, index_col=0).squeeze("columns").to_dict()

# reflections (raw; we’ll send full text)
def clean_text(s):
    if not isinstance(s, str): return ""
    s = s.replace("\n"," ").strip()
    return re.sub(r"\s+"," ", s)

ref = df.get("reflection", pd.Series([""]*len(df))).astype(str).apply(clean_text)

# basic KPIs (feel free to extend)
kpi = {
    "days": int(len(df)),
    "reflections_with_text": int(ref.str.len().gt(0).sum()),
    "avg_sleep_h": float(df.get("sleep_duration_h", pd.Series([np.nan]*len(df))).mean()),
    "sleep_std_h": float(df.get("sleep_duration_h", pd.Series([np.nan]*len(df))).std() or 0.0),
    "avg_productivity_pct": float(df.get("productivity_pct", pd.Series([np.nan]*len(df))).mean()),
}

# figures to include (you can include all; LLM calls will chunk automatically)
figure_paths = sorted(FIGS.glob("*.png"))

print(f"Rows: {len(df)} | Reflections with text: {kpi['reflections_with_text']} | Figures found: {len(figure_paths)}")


Rows: 72 | Reflections with text: 72 | Figures found: 40


In [5]:
import os
from dotenv import load_dotenv
load_dotenv()
assert os.getenv("OPENAI_API_KEY"), "Missing OPENAI_API_KEY"
print("API key detected ✅")


API key detected ✅


In [6]:
import os
from pathlib import Path
from dotenv import load_dotenv

# point explicitly at your repo root .env and override anything stale
ROOT = Path.cwd()
while not (ROOT/"pyproject.toml").exists() and ROOT.parent != ROOT:
    ROOT = ROOT.parent
load_dotenv(ROOT/".env", override=True)

key = os.getenv("OPENAI_API_KEY")
print("Key present? ", bool(key))
print("Prefix:", (key or "")[:7], " length:", len(key or 0))

# extra safety: strip hidden whitespace/newlines
if key:
    key = key.strip()
    os.environ["OPENAI_API_KEY"] = key
    print("After strip, length:", len(key))
assert key and key.startswith("sk-"), "OPENAI_API_KEY missing or malformed"


Key present?  True
Prefix: sk-proj  length: 164
After strip, length: 164


## 2) Optional: sentiment/subjectivity (makes persona richer)

In [7]:
import nltk
from nltk.sentiment import SentimentIntensityAnalyzer
from textblob import TextBlob
nltk.download("vader_lexicon", quiet=True)

sia = SentimentIntensityAnalyzer()
df["refl_sent_compound"] = ref.apply(lambda t: sia.polarity_scores(t)["compound"])
df["refl_subjectivity"]  = ref.apply(lambda t: TextBlob(t).sentiment.subjectivity)

kpi.update({
    "avg_sentiment": float(df["refl_sent_compound"].mean()),
    "avg_subjectivity": float(df["refl_subjectivity"].mean())
})


## 3) OpenAI client + helpers (images, chunking)

In [8]:
import os, json, re, base64
from openai import OpenAI

client = OpenAI(api_key=os.getenv("OPENAI_API_KEY"))

def chat_json(system: str, user_text: str, model="gpt-4.1-mini", max_tokens=1200):
    """
    JSON-mode text-only call via Chat Completions (works with openai 2.7.1).
    """
    resp = client.chat.completions.create(
        model=model,
        temperature=0,
        response_format={"type":"json_object"},
        messages=[
            {"role":"system","content":system},
            {"role":"user","content":user_text},
        ],
        max_tokens=max_tokens,
    )
    return resp.choices[0].message.content

def chat_json_vision(system: str, text_prompt: str, images_b64: list, model="gpt-4o-mini", max_tokens=800):
    """
    JSON-mode vision call. images_b64 = [base64 strings for PNGs].
    """
    content = [{"type":"text","text":text_prompt}] + [
        {"type":"image_url","image_url":{"url":f"data:image/png;base64,{b64}"}} for b64 in images_b64
    ]
    resp = client.chat.completions.create(
        model=model,
        temperature=0,
        response_format={"type":"json_object"},
        messages=[
            {"role":"system","content":system},
            {"role":"user","content":content},
        ],
        max_tokens=max_tokens,
    )
    return resp.choices[0].message.content

def encode_image_b64(path):
    with open(path, "rb") as f:
        return base64.b64encode(f.read()).decode("utf-8")

def parse_json_maybe(s: str):
    try:
        return json.loads(s)
    except Exception:
        m = re.search(r"\{.*\}\s*$", s, re.S)
        return json.loads(m.group(0)) if m else {}


## 4) Map: analyze reflections in chunks (no hard cap; will batch automatically)

In [9]:
# Build lines "YYYY-MM-DD :: text"
lines = []
for i, row in df.iterrows():
    t = clean_text(row.get("reflection", ""))
    if t:
        d = str(row.get("date", ""))[:10]
        lines.append(f"{d} :: {t}")

chunk_size = 12  # reflections per chunk (adjust freely)
chunks = [lines[i:i+chunk_size] for i in range(0, len(lines), chunk_size)]
print(f"Reflection chunks: {len(chunks)} (size ~{chunk_size})")

MAP_SYSTEM = (
  "You analyze daily reflections to extract behavioral traits relevant to work. "
  "Return STRICT JSON with per-trait scores (0..1) and brief evidence quotes. Be concise, no biography."
)

TRAITS = [
  "focus","reliability","initiative","communication","adaptability",
  "curiosity","impact","teamwork","independence","planning",
  "resilience","learning_mindset"
]

MAP_SCHEMA = {
  "type":"object",
  "properties":{
    "per_trait":{
      "type":"object",
      "additionalProperties":{
        "type":"object",
        "properties":{
          "score":{"type":"number"},
          "confidence":{"type":"number"},
          "evidence":{"type":"array","items":{"type":"string"}}
        },
        "required":["score","confidence"]
      }
    },
    "notes":{"type":"string"}
  },
  "required":["per_trait"]
}

map_outputs = []
for idx, chunk in enumerate(chunks, 1):
    # Assemble the user prompt for this chunk
    text_block = "Reflections (one per line):\n" + "\n".join(chunk)
    prompt = (
      f"{text_block}\n\n"
      f"Trait set (score each 0..1): {TRAITS}\n"
      f"Schema (strict JSON):\n{json.dumps(MAP_SCHEMA)}"
    )

    # Chat Completions (JSON mode)
    raw = chat_json(MAP_SYSTEM, prompt, model="gpt-4.1-mini", max_tokens=1200)
    js = parse_json_maybe(raw)
    map_outputs.append(js)
    print(f"Mapped chunk {idx}/{len(chunks)}")

# Save map stage
out_maps = REPORTS / "06_persona_maps.json"
with open(out_maps, "w") as f:
    json.dump(map_outputs, f, indent=2)
print("Saved maps ->", out_maps)


Reflection chunks: 6 (size ~12)
Mapped chunk 1/6
Mapped chunk 2/6
Mapped chunk 3/6
Mapped chunk 4/6
Mapped chunk 5/6
Mapped chunk 6/6
Saved maps -> /Users/deo/UMKC_phd/project_related/job_related/cte-project/notebooks/reports/06_persona_maps.json


## 5) Figures → short bullets (vision)

In [10]:
# # ---- Cell 5: summarize figures into short bullets (optional) ----
# import json
# from pathlib import Path

# FIG_SYSTEM = ("You are analyzing personal analytics plots. "
#               "Infer any behavioral/trait hints the figures suggest. "
#               "Return STRICT JSON per schema. Keep bullets short and avoid overclaiming.")

# FIG_SCHEMA = {
#   "type":"object",
#   "properties":{"figure_notes":{"type":"array","items":{"type":"string"}}},
#   "required":["figure_notes"]
# }

# # Use the helpers you already defined earlier:
# # - encode_image_b64(path)
# # - chat_json_vision(system, text_prompt, images_b64, model="gpt-4o-mini", max_tokens=800)

# figure_paths = sorted(FIGS.glob("*.png"))
# figure_notes_all = []

# if figure_paths:
#     batches = [figure_paths[i:i+6] for i in range(0, len(figure_paths), 6)]
#     for i, batch in enumerate(batches, 1):
#         imgs = [encode_image_b64(p) for p in batch]
#         text_prompt = (
#           "These images come from my personal analytics. "
#           "Extract concise bullets describing any stable behaviors or traits implied. "
#           "If unclear, return an empty list.\nSchema:\n" + json.dumps(FIG_SCHEMA)
#         )
#         raw = chat_json_vision(FIG_SYSTEM, text_prompt, imgs, model="gpt-4o-mini", max_tokens=800)
#         js = parse_json_maybe(raw)
#         figure_notes_all.extend(js.get("figure_notes", []))
#         print(f"Processed figure batch {i}/{len(batches)}")
# else:
#     print("No figures found; skipping figure summary.")

# fig_notes_path = REPORTS/"06_persona_figure_notes.json"
# with open(fig_notes_path, "w") as f:
#     json.dump({"figure_notes": figure_notes_all}, f, indent=2)
# print("Saved figure notes ->", fig_notes_path)


In [11]:
# ---- Cell 5 (robust): summarize figures into short bullets with retry+cache ----
import json, time, hashlib
from pathlib import Path

FIG_SYSTEM = (
    "You are analyzing personal analytics plots. "
    "Infer any behavioral/trait hints the figures suggest. Keep bullets short, factual, and avoid overclaiming. "
    "Return STRICT JSON per schema. 0–5 bullets max."
)

FIG_SCHEMA = {
  "type":"object",
  "properties":{"figure_notes":{"type":"array","items":{"type":"string"}}},
  "required":["figure_notes"]
}

# helpers from earlier:
# - encode_image_b64(path)
# - chat_json_vision(system, text_prompt, images_b64, model="gpt-4o-mini", max_tokens=...)
cache_dir = REPORTS / "cache_fig_notes"
cache_dir.mkdir(parents=True, exist_ok=True)

def cache_key(paths):
    # cache key uses file names + sizes + mtimes — stable and cheap
    h = hashlib.sha1()
    for p in paths:
        p = Path(p)
        st = p.stat()
        h.update(str(p.name).encode())
        h.update(str(st.st_size).encode())
        h.update(str(int(st.st_mtime)).encode())
    return h.hexdigest()

def get_notes_for_batch(paths, max_retries=6):
    key = cache_key(paths)
    cache_path = cache_dir / f"{key}.json"
    if cache_path.exists():
        try:
            return json.loads(cache_path.read_text()).get("figure_notes", [])
        except Exception:
            pass

    imgs = [encode_image_b64(p) for p in paths]
    text_prompt = (
      "These images come from my personal analytics. "
      "Extract at most 5 concise bullets describing stable behaviors/traits implied. "
      "If unclear, return an empty list.\nSchema:\n" + json.dumps(FIG_SCHEMA)
    )

    # polite throttle on first try
    time.sleep(1.5)

    for attempt in range(max_retries):
        try:
            raw = chat_json_vision(
                FIG_SYSTEM, text_prompt, imgs,
                model="gpt-4o-mini", max_tokens=300  # keep small to avoid TPM spikes
            )
            js = parse_json_maybe(raw)
            notes = js.get("figure_notes", [])
            cache_path.write_text(json.dumps({"figure_notes": notes}, indent=2))
            return notes
        except Exception as e:
            # exponential backoff
            wait = 1.5 * (2 ** attempt)
            print(f"Vision call throttled (attempt {attempt+1}/{max_retries}); sleeping {wait:.1f}s …")
            time.sleep(wait)
    # Last resort: empty
    return []

# Pick a subset of figures first (optional): prefer the most interpretable ones
all_figs = sorted(FIGS.glob("*.png"))
# Heuristic: prioritize model quality plots and PDP/ICE; adjust patterns as you like
priority = [p for p in all_figs if any(tag in p.name.lower() for tag in ["leader", "pdp_ice", "pred", "residual", "feature_importance"])]
others   = [p for p in all_figs if p not in priority]
figure_paths = priority + others

# Smaller batches to reduce per-request payload
batch_size = 3
batches = [figure_paths[i:i+batch_size] for i in range(0, len(figure_paths), batch_size)]

figure_notes_all = []
if figure_paths:
    for i, batch in enumerate(batches, 1):
        notes = get_notes_for_batch(batch)
        figure_notes_all.extend(notes)
        names = ", ".join([p.name for p in batch])
        print(f"Processed figure batch {i}/{len(batches)} ({names})")
else:
    print("No figures found; skipping figure summary.")

fig_notes_path = REPORTS/"06_persona_figure_notes.json"
with open(fig_notes_path, "w") as f:
    json.dump({"figure_notes": figure_notes_all}, f, indent=2)
print("Saved figure notes ->", fig_notes_path)


Processed figure batch 1/14 (04_pdp_ice_Ridge_sleep_duration_h.png, 04_pdp_ice_Ridge_studied_at_home.png, 04_pdp_ice_Ridge_water_drank_l.png)
Processed figure batch 2/14 (04_pred_vs_true_test_Ridge.png, GBR(depth3)_pred_vs_true.png, LinearRegression_pred_vs_true.png)
Processed figure batch 3/14 (RandomForest(d6)_pred_vs_true.png, Ridge(alpha=1.0)_pred_vs_true.png, Ridge(alpha=1.0)_residuals_vs_pred.png)
Processed figure batch 4/14 (04_group_ablation_Ridge.png, 04_perm_importance_test_Ridge.png, 04_shap_beeswarm_Ridge.png)
Processed figure batch 5/14 (04_shap_waterfall_Ridge_last_test.png, 05_trait_match_bars_hybrid.png, 05_trait_match_contrib_hybrid.png)
Processed figure batch 6/14 (Ridge(alpha=1.0)_coefficients.png, cte_readme_overview.png, deep_sleep_pct_vs_prod.png)
Processed figure batch 7/14 (family_no_interaction_rate.png, family_score_mean.png, friends_no_interaction_rate.png)
Processed figure batch 8/14 (friends_score_mean.png, partner_no_interaction_rate.png, partner_score_mea

## 6) Reduce: combine maps (+ KPIs + optional figures) → final persona JSON

This aggregates trait scores, adds KPIs, pulls in any hybrid summary you may have, and produces your one-time persona.

In [12]:
# ---- Cell 6: reduce to a single persona JSON ----
import json, numpy as np, pandas as pd
from collections import defaultdict

# Load map outputs
maps_path = REPORTS/"06_persona_maps.json"
with open(maps_path) as f:
    map_outputs = json.load(f)

# Load optional figure bullets
fig_notes = []
fig_notes_path = REPORTS/"06_persona_figure_notes.json"
if fig_notes_path.exists():
    with open(fig_notes_path) as f:
        fig_notes = json.load(f).get("figure_notes", [])

# Optional: hybrid trait CSV you may have created earlier
hybrid_path = REPORTS/"05_profile_traits_hybrid.csv"
hybrid_traits = {}
if hybrid_path.exists():
    try:
        hybrid_traits = pd.read_csv(hybrid_path, index_col=0).squeeze("columns").to_dict()
    except Exception:
        pass

# KPIs from earlier cells (if you built `kpi`), else make a light version now:
kpi = dict(kpi) if "kpi" in globals() else {}
if not kpi:
    kpi = {
        "days": int(len(df)),
        "reflections_with_text": int(df["reflection"].astype(str).str.strip().str.len().gt(0).sum()) if "reflection" in df else 0,
        "avg_sleep_h": float(df.get("sleep_duration_h", pd.Series([np.nan]*len(df))).mean()),
        "sleep_std_h": float(df.get("sleep_duration_h", pd.Series([np.nan]*len(df))).std() or 0.0),
        "avg_productivity_pct": float(df.get("productivity_pct", pd.Series([np.nan]*len(df))).mean()),
    }

# Aggregate traits across map chunks (median score, mean confidence)
TRAITS = [
  "focus","reliability","initiative","communication","adaptability",
  "curiosity","impact","teamwork","independence","planning",
  "resilience","learning_mindset"
]

scores = defaultdict(list)
confs  = defaultdict(list)

for js in map_outputs:
    per = js.get("per_trait", {})
    for t, d in per.items():
        s = d.get("score", None)
        c = d.get("confidence", None)
        if s is not None:
            scores[t].append(float(s))
        if c is not None:
            confs[t].append(float(c))

agg = {}
for t in TRAITS:
    sc = scores.get(t, [])
    cf = confs.get(t, [])
    agg[t] = {
        "score_median": float(np.median(sc)) if sc else 0.0,
        "confidence_mean": float(np.mean(cf)) if cf else 0.0,
        "n_chunks": len(sc),
    }

# Pick a few evidence quotes from reflections (pos/neg days)
from textblob import TextBlob
def _clean_text(s):
    s = str(s or "").replace("\n"," ").strip()
    return re.sub(r"\s+"," ", s)

if "reflection" in df.columns:
    refl = df["reflection"].astype(str).apply(_clean_text)
    # reuse sentiment computed earlier if present; else compute lightweight polarity
    if "refl_sent_compound" in df:
        pos = df.sort_values("refl_sent_compound", ascending=False).head(3)
        neg = df.sort_values("refl_sent_compound", ascending=True ).head(2)
    else:
        pol = refl.apply(lambda t: TextBlob(t).sentiment.polarity)
        tmp = df.copy()
        tmp["pol"] = pol
        pos = tmp.sort_values("pol", ascending=False).head(3)
        neg = tmp.sort_values("pol", ascending=True ).head(2)
    evidence_quotes = []
    for _,row in pos.iterrows():
        t = _clean_text(row.get("reflection",""))
        if t: evidence_quotes.append(f"POS[{str(row.get('date',''))[:10]}] {t[:160]}")
    for _,row in neg.iterrows():
        t = _clean_text(row.get("reflection",""))
        if t: evidence_quotes.append(f"CHALLENGE[{str(row.get('date',''))[:10]}] {t[:160]}")
else:
    evidence_quotes = []

# LLM reduce prompt (JSON mode)
PERSONA_SCHEMA = {
  "type":"object",
  "properties":{
    "per_trait": {
      "type":"object",
      "additionalProperties":{
        "type":"object",
        "properties":{
          "score":{"type":"number"},
          "confidence":{"type":"number"},
          "note":{"type":"string"}
        },
        "required":["score","confidence"]
      }
    },
    "summary":{"type":"string"},
    "title":{"type":"string"},
    "strengths":{"type":"array","items":{"type":"string"}},
    "growth_areas":{"type":"array","items":{"type":"string"}},
    "evidence_quotes":{"type":"array","items":{"type":"string"}},
    "persona_version":{"type":"string"}
  },
  "required":["per_trait","summary","strengths","growth_areas"]
}

REDUCE_SYSTEM = (
 "You create a reusable behavioral persona based on aggregates/KPIs/evidence. "
 "Return STRICT JSON matching the schema; concise and job-relevant."
)

reduce_text = (
    "Aggregate per-trait stats, KPIs, optional numeric hybrid traits, figure-derived notes, and short evidence quotes follow.\n"
    "Produce a persona JSON per the schema below.\n\n"
    f"TRAIT AGGREGATES:\n{json.dumps(agg, indent=2)}\n\n"
    f"KPIs:\n{json.dumps(kpi, indent=2)}\n\n"
    f"HYBRID TRAITS (optional):\n{json.dumps(hybrid_traits, indent=2)}\n\n"
    f"FIGURE NOTES (optional):\n{json.dumps(fig_notes[:12], indent=2)}\n\n"
    f"EVIDENCE QUOTES:\n{json.dumps(evidence_quotes[:5], indent=2)}\n\n"
    f"SCHEMA:\n{json.dumps(PERSONA_SCHEMA, indent=2)}"
)

persona_raw = chat_json(REDUCE_SYSTEM, reduce_text, model="gpt-4.1-mini", max_tokens=1400)
persona = parse_json_maybe(persona_raw)

# Stamp and save
if "persona_version" not in persona:
    persona["persona_version"] = "v1-" + pd.Timestamp.utcnow().strftime("%Y%m%dT%H%MZ")
persona_path = REPORTS/"06_profile_persona_llm.json"
with open(persona_path, "w") as f:
    json.dump(persona, f, indent=2)
print("Saved persona profile ->", persona_path)


Saved persona profile -> /Users/deo/UMKC_phd/project_related/job_related/cte-project/notebooks/reports/06_profile_persona_llm.json


## 7) Quick peek (sanity)

In [13]:
# ---- Cell 7: quick peek ----
import json, pandas as pd
from pprint import pprint

with open(REPORTS/"06_profile_persona_llm.json") as f:
    persona = json.load(f)

print("TITLE:", persona.get("title"))
print("SUMMARY:", persona.get("summary"))
print("\nTop strengths:", persona.get("strengths"))
print("Growth areas:", persona.get("growth_areas"))

pt = pd.DataFrame.from_dict(persona.get("per_trait", {}), orient="index")
if not pt.empty:
    pt = pt[["score","confidence","note"]].sort_values("score", ascending=False)
    display(pt.head(8))
else:
    print("No per_trait data found.")


TITLE: Moderately Focused Independent Performer with Strong Resilience
SUMMARY: This persona demonstrates moderate focus, reliability, and planning with strong independence, resilience, and initiative. Communication, curiosity, teamwork, and impact are areas with lower scores, indicating a preference for autonomous work and potential challenges in collaboration and exploration. Sleep and productivity patterns suggest the importance of rest and consistent study habits for optimal performance.

Top strengths: ['Independence in work', 'Resilience to setbacks', 'Proactive initiative', 'Learning mindset openness']
Growth areas: ['Enhancing communication and collaboration skills', 'Increasing curiosity and exploratory learning', 'Improving teamwork engagement', 'Boosting overall impact and influence']


Unnamed: 0,score,confidence,note
independence,0.65,0.875,High independence; comfortable working autonom...
resilience,0.65,0.9,High resilience; able to recover from setbacks...
initiative,0.6,0.8625,Above average initiative; tends to take action...
learning_mindset,0.6,0.875,Above average learning mindset; open to growth...
adaptability,0.55,0.875,Moderate adaptability; able to adjust to chang...
focus,0.45,0.9,Moderate focus with high confidence; some vari...
reliability,0.45,0.875,Moderate reliability; consistent but with room...
planning,0.45,0.875,Moderate planning skills; some structure but i...


## 8) Validate persona shape & export a mini card

In [14]:
# ---- Cell 8: validate & export a mini Markdown card ----
import json, pandas as pd

req = ["per_trait","summary","strengths","growth_areas"]
missing = [k for k in req if k not in persona]
assert not missing, f"Persona missing keys: {missing}"

md = f"""### Persona: {persona.get('title','')}
{persona.get('summary','')}

**Top strengths:** {", ".join(persona.get('strengths', [])[:5])}  
**Growth areas:** {", ".join(persona.get('growth_areas', [])[:3])}
"""
card_path = REPORTS / "06_persona_card.md"
card_path.write_text(md)
print("Wrote mini card ->", card_path)


Wrote mini card -> /Users/deo/UMKC_phd/project_related/job_related/cte-project/notebooks/reports/06_persona_card.md


# Part 2 — Per-JD (Job Description) Fit Verdict

Now that we have 06_profile_persona_llm.json, this is the tiny JD-fit pipeline.

## 9) Paste a JD here (raw text)

In [26]:
# ---- Cell 9: paste a JD to evaluate ----
# for example below I have pasted JD for DS 2 role at Garmin posted on LinkedIN
jd_text = """
Data Scientist [$100-$120/hr]

AI Task Evaluation & Statistical Analysis Specialist



As an independent member of the referral program of a leading organization, we are posting to seek a data-driven analyst to conduct comprehensive failure analysis on AI agent performance across finance-sector tasks. You'll identify patterns, root causes, and systemic issues in our evaluation framework by analyzing task performance across multiple dimensions (task types, file types, criteria, etc.).


Key Responsibilities
Statistical Failure Analysis: Identify patterns in AI agent failures across task components (prompts, rubrics, templates, file types, tags)
Root Cause Analysis: Determine whether failures stem from task design, rubric clarity, file complexity, or agent limitations
Dimension Analysis: Analyze performance variations across finance sub-domains, file types, and task categories
Reporting & Visualization: Create dashboards and reports highlighting failure clusters, edge cases, and improvement opportunities
Quality Framework: Recommend improvements to task design, rubric structure, and evaluation criteria based on statistical findings
Stakeholder Communication: Present insights to data labeling experts and technical teams


Required Qualifications
Statistical Expertise: Strong foundation in statistical analysis, hypothesis testing, and pattern recognition
Programming: Proficiency in Python (pandas, scipy, matplotlib/seaborn) or R for data analysis
Data Analysis: Experience with exploratory data analysis and creating actionable insights from complex datasets
AI/ML Familiarity: Understanding of LLM evaluation methods and quality metrics
Tools: Comfortable working with Excel, data visualization tools (Tableau/Looker), andSQL


Preferred Qualifications
Experience with AI/ML model evaluation or quality assurance
Background in finance or willingness to learn finance domain concepts
Experience with multi-dimensional failure analysis
Familiarity with benchmark datasets and evaluation frameworks
2-4 years of relevant experience
"""
print("JD length:", len(jd_text))


JD length: 2051


## 10) CTE strict verdict (LLM only picks required traits; scoring is deterministic)

In [38]:
# ---- Cell 10: Strict CTE verdict (LLM extracts JD traits; scoring is deterministic) ----
import json, pandas as pd, datetime, itertools

CANDIDATE_NAME = "Deo"

# 0) Load persona we synthesized earlier
with open(REPORTS/"06_profile_persona_llm.json") as f:
    persona = json.load(f)
per_trait = persona.get("per_trait", {})

# 1) LLM: extract required behavioral traits (low|medium|high)
REQUIRED_SYSTEM = (
 "You analyze a job description and extract the *behavioral* traits needed to thrive in the role. "
 "Focus on traits like: focus, reliability, initiative, communication, teamwork, adaptability, "
 "curiosity, impact, independence, planning, resilience, learning_mindset. "
 "Return STRICT JSON {requirements: [{trait, required_level}]}, where required_level ∈ {low, medium, high}. "
 "Limit to 5–8 traits."
)
REQUIRED_SCHEMA = {
  "type":"object",
  "properties":{
    "requirements":{"type":"array","items":{
      "type":"object",
      "properties":{"trait":{"type":"string"},"required_level":{"type":"string"}},
      "required":["trait","required_level"]
    }}
  },
  "required":["requirements"]
}
req_prompt = "JOB DESCRIPTION:\n" + jd_text + "\n\nSchema:\n" + json.dumps(REQUIRED_SCHEMA)
req_raw  = chat_json(REQUIRED_SYSTEM, req_prompt, model="gpt-4.1-mini", max_tokens=500)
req_json = parse_json_maybe(req_raw)
requirements = req_json.get("requirements", [])

# 2) Scoring policy (edit here to change rules)
LEVEL_THRESH = {"low": 0.50, "medium": 0.50, "high": 0.50}   # recommended cutoffs
LEVEL_WEIGHT = {"low": 1.0, "medium": 1.1, "high": 1.2}      # high counts more

rows, total_w, met_w = [], 0.0, 0.0
for r in requirements:
    t = r.get("trait","").strip()
    lvl = r.get("required_level","low").lower()
    if not (t and lvl in LEVEL_THRESH and t in per_trait): 
        continue
    score = float(per_trait[t].get("score", 0.0))
    thr   = LEVEL_THRESH[lvl]; w = LEVEL_WEIGHT[lvl]
    met   = score >= thr
    rows.append({"trait":t, "required_level":lvl, "candidate_score":round(score,2), "threshold":thr, "met":met})
    total_w += w; 
    if met: met_w += w

if total_w == 0: total_w = 1.0
match_ratio = met_w / total_w

if   match_ratio >= 0.75: overall, risk = "Strong fit", "low-risk"
elif match_ratio >= 0.50: overall, risk = "Possible fit", "moderate-risk"
elif match_ratio >= 0.35: overall, risk = "Leaning no", "elevated-risk"
else:                     overall, risk = "Not a fit",  "high-risk"

# 3) Observation window (robust; fixes the 1970-01-01 issue)
def infer_span_from_df(df_like):
    if df_like is None or not hasattr(df_like, "columns"): return None
    # Prefer date-like columns
    for c in df_like.columns:
        if "date" in str(c).lower() or "time" in str(c).lower():
            s = pd.to_datetime(df_like[c], errors="coerce").dropna()
            if not s.empty:
                return {"n_days": int(s.dt.date.nunique()), "dmin": s.min().date(), "dmax": s.max().date()}
    # Fallback: try every column to find any parseable datetime
    for c in df_like.columns:
        s = pd.to_datetime(df_like[c], errors="coerce").dropna()
        if not s.empty:
            return {"n_days": int(s.dt.date.nunique()), "dmin": s.min().date(), "dmax": s.max().date()}
    return None

_span = None
try:
    # prefer your cleaned files
    p1 = DATA / "clean.parquet"; p2 = DATA / "interim" / "features.parquet"
    if p1.exists(): _span = infer_span_from_df(pd.read_parquet(p1))
    elif p2.exists(): _span = infer_span_from_df(pd.read_parquet(p2))
except Exception:
    _span = None

if _span is None and 'chunks' in globals():  # from your reflection mapping step
    try:
        all_lines = list(itertools.chain.from_iterable(chunks))
        dates = [ln.split(" :: ", 1)[0] for ln in all_lines if " :: " in ln]
        s = pd.to_datetime(dates, errors="coerce").dropna()
        if not s.empty:
            _span = {"n_days": int(s.dt.date.nunique()), "dmin": s.min().date(), "dmax": s.max().date()}
    except Exception:
        pass

span_str = f"{_span['n_days']} days ({_span['dmin']} → {_span['dmax']})" if _span else "72 days (Jan–Apr 2025)"

# 4) Pretty, GUI-friendly print (positive-first option is in Cell 12)
def md_list(items): 
    return "\n".join([f" • {it}" for it in items]) if items else " • —"

strengths = [f"{r['trait']} (score {r['candidate_score']} ≥ {LEVEL_THRESH[r['required_level']]:.2f})"
             for r in rows if r["met"]]
gaps = [f"{r['trait']} (score {r['candidate_score']} < {LEVEL_THRESH[r['required_level']]:.2f})"
        for r in rows if not r["met"]]

print("\n" + "="*72)
print(f"CTE Character-Fit Verdict for {CANDIDATE_NAME}  |  Window: {span_str}")
print("="*72)
print(f"Verdict: {'Likely successful ✅' if match_ratio>=0.50 else 'Likely to struggle ⚠️'}")
print(f"Overall: {overall}   |   Match score: {match_ratio:.2f}   |   Hiring risk: {risk}\n")

print("Required traits & match:")
if rows:
    print("trait\trequired_level\tcandidate_score\tmet")
    for r in rows:
        print(f"{r['trait']}\t{r['required_level']}\t{r['candidate_score']}\t{r['met']}")
else:
    print("(no traits extracted from JD)")

print("\nWhy this outcome (character-only):")
print("Strengths:\n" + md_list(strengths))
print("\nRisks (if hired):\n" + md_list(gaps))

# 5) Save compact JSON for the GUI & API
final = {
    "candidate": CANDIDATE_NAME,
    "observation_window": span_str,
    "overall": overall,
    "verdict": "Likely successful" if match_ratio>=0.50 else "Likely to struggle",
    "match_score": round(match_ratio, 2),
    "risk_band": risk,
    "requirements": rows,
    "strengths": strengths,
    "gaps": gaps,
    "generated_at": datetime.datetime.now().isoformat(timespec="seconds")
}
ts = datetime.datetime.now().strftime("%Y%m%d_%H%M%S")
json_path = REPORTS / f"06_final_verdict_{ts}.json"
with open(json_path, "w") as f: json.dump(final, f, indent=2)
print(f"\nSaved -> {json_path}")



CTE Character-Fit Verdict for Deo  |  Window: 72 days (2025-01-27 → 2025-04-30)
Verdict: Likely to struggle ⚠️
Overall: Not a fit   |   Match score: 0.29   |   Hiring risk: high-risk

Required traits & match:
trait	required_level	candidate_score	met
focus	high	0.45	False
communication	high	0.3	False
independence	high	0.65	True
curiosity	medium	0.2	False
learning_mindset	medium	0.6	True
planning	medium	0.45	False
impact	medium	0.35	False

Why this outcome (character-only):
Strengths:
 • independence (score 0.65 ≥ 0.50)
 • learning_mindset (score 0.6 ≥ 0.50)

Risks (if hired):
 • focus (score 0.45 < 0.50)
 • communication (score 0.3 < 0.50)
 • curiosity (score 0.2 < 0.50)
 • planning (score 0.45 < 0.50)
 • impact (score 0.35 < 0.50)

Saved -> /Users/deo/UMKC_phd/project_related/job_related/cte-project/notebooks/reports/06_final_verdict_20251109_153544.json


In [41]:
# ---- Cell 11: Hybrid JD→Traits (traceable) ----
import re, json
from collections import defaultdict

# 1) Small synonym lexicon: map JD phrases → behavioral traits
LEXICON = {
    "communication":  [r"\b(stakeholder|present|communicat(e|ion)|storytell|clear writing)\b"],
    "teamwork":       [r"\b(cross-?functional|collaborat(e|ion)|partner|peer review|team)\b"],
    "focus":          [r"\b(detail-?oriented|attention to detail|meticulous|thorough)\b"],
    "planning":       [r"\b(roadmap|plan|milestone|organize|prioriti[sz]e|deadline|cadence)\b"],
    "reliability":    [r"\b(reliable|consisten(t|cy)|accountab(le|ility)|ownership)\b"],
    "initiative":     [r"\b(proactive|initiative|self-?starter|drive|ownership)\b"],
    "adaptability":   [r"\b(adapt(|ability)|ambiguity|change|flexible|fast-?paced)\b"],
    "impact":         [r"\b(impact|business value|outcome|results|influence)\b"],
    "independence":   [r"\b(autonom(y|ous)|independent|self-?directed)\b"],
    "curiosity":      [r"\b(curio(us|sity)|explor(e|atory)|question|dig deeper)\b"],
    "resilience":     [r"\b(resilien(t|ce)|setback|grit|persist|iterate)\b"],
    "learning_mindset":[r"\b(learn(ing)?|upskill|new (tools|techniques)|keep up to date)\b"]
}

# 2) Intensity words → required_level
INTENSIFIERS = {
    "high":   [r"\b(excellent|exceptional|expert|outstanding|world-?class|strong)\b"],
    "medium": [r"\b(solid|proficient|good|effective|comfortable)\b"],
    "low":    [r"\b(basic|familiar|some exposure)\b"]
}

lines = [ln.strip() for ln in jd_text.splitlines() if ln.strip()]
hits = defaultdict(list)
for ln in lines:
    for trait, pats in LEXICON.items():
        for p in pats:
            if re.search(p, ln, flags=re.I):
                hits[trait].append(ln)

def decide_level(snips):
    text = " ".join(snips)[:2000]
    score = {"low": 0, "medium": 0, "high": 0}
    for lvl, pats in INTENSIFIERS.items():
        for p in pats:
            score[lvl] += len(re.findall(p, text, flags=re.I))
    if score["high"]>0:   return "high"
    if score["medium"]>0: return "medium"
    if score["low"]>0:    return "low"
    return "medium"  # default if trait appears with no intensifiers

requirements_hybrid = []
trace = []
for trait, snips in hits.items():
    lvl = decide_level(snips)
    requirements_hybrid.append({"trait": trait, "required_level": lvl})
    trace.append({"trait": trait, "required_level": lvl, "snippets": snips[:3]})

# Fallback to your LLM extraction if the lexicon finds nothing
if not requirements_hybrid:
    # `requirements` is the LLM-based list created in Cell 10
    requirements_hybrid = requirements
    trace = [{"trait": r["trait"], "required_level": r["required_level"], "snippets": []} for r in requirements_hybrid]

print("Hybrid JD requirements:")
print(json.dumps(requirements_hybrid, indent=2))
print("\nTrace (first 3 JD lines per trait):")
print(json.dumps(trace, indent=2))


Hybrid JD requirements:
[
  {
    "trait": "independence",
    "required_level": "medium"
  },
  {
    "trait": "communication",
    "required_level": "medium"
  },
  {
    "trait": "curiosity",
    "required_level": "medium"
  },
  {
    "trait": "learning_mindset",
    "required_level": "medium"
  }
]

Trace (first 3 JD lines per trait):
[
  {
    "trait": "independence",
    "required_level": "medium",
    "snippets": [
      "As an independent member of the referral program of a leading organization, we are posting to seek a data-driven analyst to conduct comprehensive failure analysis on AI agent performance across finance-sector tasks. You'll identify patterns, root causes, and systemic issues in our evaluation framework by analyzing task performance across multiple dimensions (task types, file types, criteria, etc.)."
    ]
  },
  {
    "trait": "communication",
    "required_level": "medium",
    "snippets": [
      "Stakeholder Communication: Present insights to data labeling 

In [46]:
# ---- Cell 12: Re-score (hybrid requirements) ----
import json, datetime

# Load persona again (just to be self-contained)
with open(REPORTS/"06_profile_persona_llm.json") as f:
    persona = json.load(f)
per_trait = persona.get("per_trait", {})

# Use hybrid requirements if present; else fall back to Cell 10's `requirements`
reqs = requirements_hybrid if 'requirements_hybrid' in globals() and requirements_hybrid else requirements

LEVEL_THRESH = {"low": 0.50, "medium": 0.50, "high": 0.50}
LEVEL_WEIGHT = {"low": 1.0, "medium": 1.1, "high": 1.2}
UNMET_HIGH_PENALTY = 0.05  # gentle nudge

rows, total_w, met_w = [], 0.0, 0.0
any_unmet_high = False
for r in reqs:
    t = r.get("trait","").strip()
    lvl = r.get("required_level","low").lower()
    if not (t and lvl in LEVEL_THRESH and t in per_trait):
        continue
    score = float(per_trait[t].get("score", 0.0))
    thr   = LEVEL_THRESH[lvl]; w = LEVEL_WEIGHT[lvl]
    met   = score >= thr
    rows.append({"trait":t, "required_level":lvl, "candidate_score":round(score,2), "threshold":thr, "met":met})
    total_w += w
    if met: met_w += w
    if (not met) and (lvl == "high"): any_unmet_high = True

if total_w == 0: total_w = 1.0
match_ratio = met_w / total_w
if any_unmet_high:
    match_ratio = max(0.0, match_ratio - UNMET_HIGH_PENALTY)

if   match_ratio >= 0.75: overall, risk = "Strong fit", "low-risk"
elif match_ratio >= 0.50: overall, risk = "Possible fit", "moderate-risk"
elif match_ratio >= 0.35: overall, risk = "Leaning no", "elevated-risk"
else:                      overall, risk = "Not a fit",  "high-risk"

# Pretty print
print("\n" + "="*72)
print("CTE Character-Fit Verdict for Deo  |  (Hybrid JD requirements)")
print("="*72)
print(f"Verdict: {'Likely successful ✅' if match_ratio>=0.50 else 'Likely to struggle ⚠️'}")
print(f"Overall: {overall}   |   Match score: {match_ratio:.2f}   |   Hiring risk: {risk}\n")
print("Required traits & match:")
print("trait\trequired_level\tcandidate_score\tmet")
for r in rows:
    print(f"{r['trait']}\t{r['required_level']}\t{r['candidate_score']}\t{r['met']}")

# Save compact JSON for the GUI / API
ts = datetime.datetime.now().strftime("%Y%m%d_%H%M%S")
with open(REPORTS / f"06_final_verdict_hybrid_{ts}.json","w") as f:
    json.dump({
        "candidate": "Deo",
        "overall": overall,
        "verdict": "Likely successful" if match_ratio>=0.50 else "Likely to struggle",
        "match_score": round(match_ratio, 2),
        "risk_band": risk,
        "requirements": rows
    }, f, indent=2)
print("\nSaved ->", REPORTS / f"06_final_verdict_hybrid_{ts}.json")



CTE Character-Fit Verdict for Deo  |  (Hybrid JD requirements)
Verdict: Likely successful ✅
Overall: Possible fit   |   Match score: 0.50   |   Hiring risk: moderate-risk

Required traits & match:
trait	required_level	candidate_score	met
independence	medium	0.65	True
communication	medium	0.3	False
curiosity	medium	0.2	False
learning_mindset	medium	0.6	True

Saved -> /Users/deo/UMKC_phd/project_related/job_related/cte-project/notebooks/reports/06_final_verdict_hybrid_20251109_163603.json


In [45]:
# # ---- Cell 12 (hardened): Re-score with union requirements + critical-penalty ----
# import json, datetime

# # Load persona
# with open(REPORTS/"06_profile_persona_llm.json") as f:
#     persona = json.load(f)
# per_trait = persona.get("per_trait", {})

# # 1) Union the two requirement sources (hybrid + LLM fallback from Cell 10)
# def norm_level(s):
#     s = (s or "medium").lower().strip()
#     return s if s in {"low","medium","high"} else "medium"

# seen = {}
# def add_req(tr, lvl):
#     tr = tr.strip().lower()
#     if not tr: 
#         return
#     lvl = norm_level(lvl)
#     # keep the stricter of the two if duplicates (high > medium > low)
#     rank = {"low":0, "medium":1, "high":2}
#     if tr not in seen or rank[lvl] > rank[seen[tr]]:
#         seen[tr] = lvl

# # from Cell 11 (hybrid)
# if 'requirements_hybrid' in globals() and requirements_hybrid:
#     for r in requirements_hybrid:
#         add_req(r.get("trait",""), r.get("required_level","medium"))

# # from Cell 10 (LLM-only)
# if 'requirements' in globals() and requirements:
#     for r in requirements:
#         add_req(r.get("trait",""), r.get("required_level","medium"))

# # finalize list
# reqs_union = [{"trait": t, "required_level": lvl} for t, lvl in seen.items()]

# # 2) Enforce min trait count (expand with common DS behaviors if too few)
# MIN_TRAITS = 5
# COMMON_DS_FILLS = ["planning","focus","reliability","initiative","teamwork","communication",
#                    "adaptability","impact","independence","learning_mindset","curiosity","resilience"]
# for t in COMMON_DS_FILLS:
#     if len(reqs_union) >= MIN_TRAITS: 
#         break
#     if t not in {r["trait"] for r in reqs_union}:
#         # choose sensible default level
#         lvl = "medium" if t not in {"communication","teamwork","focus","planning"} else "high"
#         reqs_union.append({"trait": t, "required_level": lvl})

# # 3) Scoring knobs (stricter; adjust if you want looser)
# LEVEL_THRESH = {"low": 0.50, "medium": 0.50, "high": 0.50}
# LEVEL_WEIGHT = {"low": 1.0, "medium": 1.1, "high": 1.2}

# # 4) Deterministic score
# rows, total_w, met_w = [], 0.0, 0.0
# unmet_high_criticals = []

# CRITICAL_TRAITS = {"communication","teamwork","focus","reliability","planning"}
# for r in reqs_union:
#     t   = r["trait"]
#     lvl = r["required_level"]
#     if t not in per_trait: 
#         continue
#     score = float(per_trait[t].get("score", 0.0))
#     thr   = LEVEL_THRESH[lvl]
#     w     = LEVEL_WEIGHT[lvl]
#     met   = score >= thr
#     rows.append({
#         "trait": t, "required_level": lvl,
#         "candidate_score": round(score,2), "threshold": thr, "met": met
#     })
#     total_w += w
#     if met: 
#         met_w += w
#     if (not met) and (lvl == "high") and (t in CRITICAL_TRAITS):
#         unmet_high_criticals.append(t)

# if total_w == 0:
#     total_w = 1.0

# match_ratio = met_w / total_w

# # 5) Base band from score
# if   match_ratio >= 0.75: overall = "Strong fit";   risk = "low-risk"
# elif match_ratio >= 0.50: overall = "Possible fit"; risk = "moderate-risk"
# elif match_ratio >= 0.35: overall = "Leaning no";   risk = "elevated-risk"
# else:                      overall = "Not a fit";    risk = "high-risk"

# # 6) Downgrade one band if any critical high requirement is unmet
# def downgrade(band):
#     order = ["Strong fit","Possible fit","Leaning no","Not a fit"]
#     i = order.index(band)
#     return order[min(i+1, len(order)-1)]

# if unmet_high_criticals:
#     overall = downgrade(overall)
#     # adjust risk accordingly
#     risk = {"Strong fit":"low-risk","Possible fit":"moderate-risk","Leaning no":"elevated-risk","Not a fit":"high-risk"}[overall]

# # 7) Pretty print
# print("\n" + "="*72)
# print("CTE Character-Fit Verdict for Deo  |  (Hybrid∪LLM, min-traits, critical-guard)")
# print("="*72)
# print(f"Verdict: {'Likely successful ✅' if overall in ['Strong fit','Possible fit'] else 'Likely to struggle ⚠️'}")
# print(f"Overall: {overall}   |   Match score: {match_ratio:.2f}   |   Hiring risk: {risk}\n")
# print("Required traits & match:")
# print("trait\trequired_level\tcandidate_score\tmet")
# for r in rows:
#     print(f"{r['trait']}\t{r['required_level']}\t{r['candidate_score']}\t{r['met']}")

# # 8) Save compact JSON
# ts = datetime.datetime.now().strftime("%Y%m%d_%H%M%S")
# with open(REPORTS / f"06_final_verdict_hardened_{ts}.json","w") as f:
#     json.dump({
#         "candidate": "Deo",
#         "overall": overall,
#         "verdict": "Likely successful" if overall in ["Strong fit","Possible fit"] else "Likely to struggle",
#         "match_score": round(match_ratio, 2),
#         "risk_band": risk,
#         "requirements": rows,
#         "unmet_high_criticals": unmet_high_criticals
#     }, f, indent=2)
# print("\nSaved ->", REPORTS / f"06_final_verdict_hardened_{ts}.json")



CTE Character-Fit Verdict for Deo  |  (Hybrid∪LLM, min-traits, critical-guard)
Verdict: Likely to struggle ⚠️
Overall: Not a fit   |   Match score: 0.29   |   Hiring risk: high-risk

Required traits & match:
trait	required_level	candidate_score	met
independence	high	0.65	True
communication	high	0.3	False
curiosity	medium	0.2	False
learning_mindset	medium	0.6	True
focus	high	0.45	False
planning	medium	0.45	False
impact	medium	0.35	False

Saved -> /Users/deo/UMKC_phd/project_related/job_related/cte-project/notebooks/reports/06_final_verdict_hardened_20251109_163340.json
