In [3]:
!pip install strands-agents[mistral] python-dotenv dictdiffer

Collecting python-dotenv
  Downloading python_dotenv-1.1.1-py3-none-any.whl.metadata (24 kB)
Collecting dictdiffer
  Downloading dictdiffer-0.9.0-py2.py3-none-any.whl.metadata (4.8 kB)
Collecting strands-agents[mistral]
  Downloading strands_agents-1.13.0-py3-none-any.whl.metadata (13 kB)
Collecting docstring-parser<1.0,>=0.15 (from strands-agents[mistral])
  Downloading docstring_parser-0.17.0-py3-none-any.whl.metadata (3.5 kB)
Collecting mcp<2.0.0,>=1.11.0 (from strands-agents[mistral])
  Downloading mcp-1.19.0-py3-none-any.whl.metadata (85 kB)
Collecting opentelemetry-api<2.0.0,>=1.30.0 (from strands-agents[mistral])
  Downloading opentelemetry_api-1.38.0-py3-none-any.whl.metadata (1.5 kB)
Collecting opentelemetry-instrumentation-threading<1.00b0,>=0.51b0 (from strands-agents[mistral])
  Downloading opentelemetry_instrumentation_threading-0.59b0-py3-none-any.whl.metadata (2.1 kB)
Collecting opentelemetry-sdk<2.0.0,>=1.30.0 (from strands-agents[mistral])
  Downloading opentelemetry_s

In [13]:
import json
import os
import sys
import dotenv
import boto3
import requests
from collections import Counter
import yaml

import copy

from datetime import datetime

import pprint

import yaml
from pathlib import Path
from typing import Dict, List, Optional, Tuple, TypeVar
from tqdm import tqdm

# Pydantic for structured data
from pydantic import BaseModel, Field

# AWS authentication
from botocore.auth import SigV4Auth
from botocore.awsrequest import AWSRequest

sys.path.append('..')

from src.utils import (
    save_json,
    read_json,
    load_file_content,
    get_job_paths,
    get_training_paths,
    sanity_check,
    chat_with_persona,
    validate_submission_format,
    track_api_call,  # Cost tracking from utils
    print_cost_summary,  # Cost summary from utils
    reset_cost_tracker  # Reset cost tracker from utils
)

from src.my_utils import (
    display_markdown_file,
    call_mistral,
    get_agent,
    batch_extract
)

from src.models.persona_info import PersonaInfo
from src.models.job_info import JobInfo
from src.models.training_info import TrainingInfo
from src.models.generic_models import (
    BooleanModel,
    BooleanModelWithRationale,
    ListOfIds
)

from src.prompts.find_training_matches_prompt import (
    FIND_TRAINING_MATCHES_PROMPT,
    FIND_TRAINING_MATCHES_PROMPT_BY_NAME,
    CHECK_PERSONA_TRAINING_MATCH,
    FIND_TRAINING_MATCHES_FOR_JOB_PROMPT
)

from src.prompts.find_job_matches_prompt import (
    FIND_JOB_MATCHES_PROMPT
)

In [14]:
DATA_PERSONAS_INFO_DIR = Path('../data_personas_info')
DATA_INTERVIEWS_DIR = Path('../data_interviews')

In [15]:
with open("../src/config.yaml", "r") as f:
    config = yaml.safe_load(f)

personas_info_data_version = config["personas_info_data_version"]
print(f"personas_info_data_version version: {personas_info_data_version}")

interview_data_version = config["interview_data_version"]
print(f"interview_data_version version: {interview_data_version}")

personas_info_data_version version: v12
interview_data_version version: v8


In [16]:
# Load Personas data
filename = f"last_final_personas_info_{personas_info_data_version}.json"
personas_save_path = DATA_PERSONAS_INFO_DIR / filename

personas_data = read_json(personas_save_path)

# Convert to PersonaInfo objects
personas = {
    pid: PersonaInfo.model_validate_json(data)
    for pid, data in personas_data.items()
}

print(f"✅ Loaded {len(personas)} personas")
print("\n" + "="*50)

✅ Loaded 100 personas



In [17]:
filename = f"interviews_{interview_data_version}.json"
interviews_save_path = DATA_INTERVIEWS_DIR / filename
initial_interviews = read_json(interviews_save_path)

In [18]:
filename = f"job_extension_interviews_{interview_data_version}.json"
interviews_save_path = DATA_INTERVIEWS_DIR / filename
job_extension_interviews = read_json(interviews_save_path)

In [19]:
filename = f"job_interview_round_prompt{interview_data_version}.json"
interviews_save_path = DATA_INTERVIEWS_DIR / filename
job_interview_round_prompt = read_json(interviews_save_path)

In [20]:
filename = f"jobs_interviews_{interview_data_version}.json"
interviews_save_path = DATA_INTERVIEWS_DIR / filename
jobs_interviews = read_json(interviews_save_path)

In [21]:
filename = f"training_domain_extension_interviews_{interview_data_version}.json"
interviews_save_path = DATA_INTERVIEWS_DIR / filename
training_domain_extension_interviews = read_json(interviews_save_path)

In [22]:
filename = f"training_skills_extension_interviews_{interview_data_version}.json"
interviews_save_path = DATA_INTERVIEWS_DIR / filename
training_skills_extension_interviews = read_json(interviews_save_path)

In [24]:
persona_id = 'persona_036'

In [25]:
print("\n" + "="*50)
print('Persona')
print(personas[persona_id].describe())

print("\n" + "="*50)
print('initial_interviews')
interview_str = "\n\n".join(initial_interviews[persona_id]['interview'])
print(interview_str)

print("\n" + "="*50)
print('job_extension_interviews')
if persona_id in job_extension_interviews:
    interview_str = "\n\n".join(job_extension_interviews[persona_id]['interview'])
    print(interview_str)
else:
    print("No interview")

print("\n" + "="*50)
print('job_interview_round_prompt')
if persona_id in job_interview_round_prompt:
    interview_str = "\n\n"+job_interview_round_prompt[persona_id]
    print(interview_str)
else:
    print("No interview")

print("\n" + "="*50)
print('jobs_interviews')
if persona_id in jobs_interviews:
    interview_str = "\n\n".join(jobs_interviews[persona_id]['interview'])
    print(interview_str)
else:
    print("No interview")

print("\n" + "="*50)
print('training_domain_extension_interviews')
if persona_id in training_domain_extension_interviews:
    interview_str = "\n\n".join(training_domain_extension_interviews[persona_id]['interview'])
    print(interview_str)
else:
    print("No interview")

print("\n" + "="*50)
print('training_skills_extension_interviews')
if persona_id in training_skills_extension_interviews:
    interview_str = "\n\n".join(training_skills_extension_interviews[persona_id]['interview'])
    print(interview_str)
else:
    print("No interview")

print("\n" + "="*50)
print('Persona')
print(personas[persona_id])



Persona
Name: 
Age: 16
Location: Porto Alegre
Recommendation type: trainings_only
Open to relocate for work: No
Work type preference: 
Education level: 
Years of experience: 0
Skills Domains: Procurement And Supply Chain Management
Skills: Procurement And Supply Chain Management : Market Analysis And Strategic Procurement: None, Procurement And Supply Chain Management : Advanced Negotiation And Cost Reduction: None, Procurement And Supply Chain Management : Supplier Evaluation And Selection: None
Languages: 
Target domains: 
Goals: Understand how companies make purchasing decisions, learn about related career paths and training, curious about roles and responsibilities in purchasing from intern to experienced professionals.

initial_interviews
Assistant: Hello, I'm here to helping you navigate your career path.


User: Oh hey! That’s cool. I love shopping with my friends and comparing prices online—it’s like a game for me. Sometimes I wonder, is buying stuff for companies kinda like t