In [1]:
!pip install strands-agents[mistral] python-dotenv

Collecting python-dotenv
  Downloading python_dotenv-1.1.1-py3-none-any.whl.metadata (24 kB)
Collecting strands-agents[mistral]
  Downloading strands_agents-1.13.0-py3-none-any.whl.metadata (13 kB)
Collecting docstring-parser<1.0,>=0.15 (from strands-agents[mistral])
  Downloading docstring_parser-0.17.0-py3-none-any.whl.metadata (3.5 kB)
Collecting mcp<2.0.0,>=1.11.0 (from strands-agents[mistral])
  Downloading mcp-1.18.0-py3-none-any.whl.metadata (80 kB)
Collecting opentelemetry-api<2.0.0,>=1.30.0 (from strands-agents[mistral])
  Downloading opentelemetry_api-1.38.0-py3-none-any.whl.metadata (1.5 kB)
Collecting opentelemetry-instrumentation-threading<1.00b0,>=0.51b0 (from strands-agents[mistral])
  Downloading opentelemetry_instrumentation_threading-0.59b0-py3-none-any.whl.metadata (2.1 kB)
Collecting opentelemetry-sdk<2.0.0,>=1.30.0 (from strands-agents[mistral])
  Downloading opentelemetry_sdk-1.38.0-py3-none-any.whl.metadata (1.5 kB)
Collecting mistralai>=1.8.2 (from strands-agent

In [11]:
import json
import os
import sys
import dotenv
import boto3
import requests
from collections import Counter

import pprint

import yaml
from pathlib import Path
from typing import Dict, List, Optional, Tuple, TypeVar
from tqdm import tqdm

# Pydantic for structured data
from pydantic import BaseModel, Field

# Strands for AI agents
from strands.agent import Agent
from strands.models.mistral import MistralModel

# AWS authentication
from botocore.auth import SigV4Auth
from botocore.awsrequest import AWSRequest

sys.path.append('..')

from src.utils import (
    save_json,
    read_json,
    load_file_content,
    get_job_paths,
    get_training_paths,
    sanity_check,
	chat_with_persona,
    track_api_call,  # Cost tracking from utils
    print_cost_summary,  # Cost summary from utils
    reset_cost_tracker  # Reset cost tracker from utils
)

from src.my_utils import (
    display_markdown_file,
    call_mistral,
    get_agent,
    batch_extract,
    send_message_to_chat
)

from src.models.interview_info import(
    InterviewInfo,
    InverviewQualityInfo
)
from src.prompts.interview_prompt import(
    RECOMMANDATION_CONSOLIDATION_PROMPT
)

from src.prompts.persona_extraction_prompt import(
    PERSONA_AGE_CONSOLIDATION
)

from src.models.persona_info import PersonaInfo, InitialPersonaInfo
from src.models.job_info import JobInfo

# Load API key from .env file
dotenv.load_dotenv("../env")

# Check if we're good to go
if not os.getenv("MISTRAL_API_KEY"):
    print("❌ No MISTRAL_API_KEY found!")
    print("Create an env file with your API key")
else:
    print("✅ API key found, we're ready to roll")

✅ API key found, we're ready to roll


In [12]:
MAX_TURNS_IN_INTERVIEW = 10

In [13]:
DATA_PERSONAS_INFO_DIR = Path('../data_personas_info')
DATA_JOBS_DIR = Path('../data_jobs')
DATA_TRAININGS_DIR = Path('../data_trainings')
DATA_INTERVIEWS_DIR = Path('../data_interviews')
DATA_MATCH_JOBS_TRAININGS_DIR = Path('../data_match_jobs_trainings')

In [14]:
with open("../src/config.yaml", "r") as f:
    config = yaml.safe_load(f)

personas_info_data_version = config["personas_info_data_version"]
print(f"personas_info_data_version version: {personas_info_data_version}")

job_data_version = config["job_data_version"]
print(f"job_data_version version: {job_data_version}")

training_data_version = config["training_data_version"]
print(f"training_data_version version: {training_data_version}")

interview_data_version = config["interview_data_version"]
print(f"interview_data_version version: {interview_data_version}")

match_jobs_trainings_data_version = f"{job_data_version}_{training_data_version}"

personas_info_data_version version: v14
job_data_version version: v4
training_data_version version: v7
interview_data_version version: v8


In [15]:
# Load interviews
filename = f"job_extension_interviews_{interview_data_version}.json"
interviews_save_path = DATA_INTERVIEWS_DIR / filename
interviews = read_json(interviews_save_path)

In [16]:
# Load Personas data
filename = f"final_personas_info_{personas_info_data_version}.json"
personas_save_path = DATA_PERSONAS_INFO_DIR / filename

initial_personas_data = read_json(personas_save_path)

# Convert to PersonaInfo objects
personas = {
    pid: PersonaInfo.model_validate_json(data)
    for pid, data in initial_personas_data.items()
}

print(f"✅ Loaded {len(personas)} personas")
print("\n" + "="*50)

✅ Loaded 99 personas



# Parse interviews

In [17]:
def age_consolidation(
    conversation: List[str],
    model: str = "mistral-small-latest",
    print_prompt=False
) -> InitialPersonaInfo:
    """Extract persona info from conversation using Persona Extraction Agent"""

    text = '\n'.join(conversation)

    prompt = PERSONA_AGE_CONSOLIDATION.format(
        conversation=text
    )

    if print_prompt is True:
        print(prompt)

    extraction_agent = get_agent(model_id=model, temperature=0.0)
    result = extraction_agent.structured_output(output_model=InitialPersonaInfo, prompt=prompt)

    if print_prompt is True:
        print(result)
        
    return result

In [18]:
filename = f"final_personas_info_{personas_info_data_version}.json"
personas_save_path = DATA_PERSONAS_INFO_DIR / filename
personas_data = read_json(personas_save_path)

new_personas_processed = 0
for person_id in tqdm(initial_personas_data):
    persona_data_dict = json.loads(personas_data[person_id])

    if persona_data_dict['education_level'] == 'Ensino Fundamental':
        print(person_id)
        print(persona_data_dict)

        result = age_consolidation(
            interviews[person_id]['interview'],
            print_prompt=True
        )

        if result.age != -1:
            persona_data_dict['age'] = result.age
            if result.age < 16:
                persona_data_dict['recommendation_type'] = 'awareness'
                
            print(persona_data_dict)
            personas_data[person_id] = json.dumps(persona_data_dict, ensure_ascii=False)        
            save_json(personas_save_path, personas_data)

  0%|          | 0/99 [00:00<?, ?it/s]

persona_001
{'name': '', 'age': 21, 'location': 'São Paulo', 'recommendation_type': 'trainings_only', 'open_to_relocate_for_work': False, 'work_type_preference': 'onsite', 'target_domains': ['Food Production Quality And Safety Management'], 'education_level': 'Ensino Fundamental', 'years_of_experience': 0, 'skills_domains': [], 'skills': {}, 'languages': ['Portuguese', 'English'], 'goals': 'Learn about food safety, understand how to handle food equipment, and gain training in food production quality and safety management. Interested in seeing and learning from food lines and eventually securing a job in the food industry.', 'hard_filtered_jobs_ids': [], 'proposed_job_ids': []}

You are given :
- an interview conversation with a persona.

From the interview conversation, extract following information:
- Age
    - if user has Ensino Fundamental education level and user shows signs that is not telling truth, be severe, set age to 15
    - in other cases, set -1

Return following fields :


100%|██████████| 99/99 [00:01<00:00, 88.12it/s]

name='' education_level='Ensino Fundamental' age=15 location='Sao Paulo' interested_by_job=True interested_by_training=True goals='Learn food safety and machine operations, eventually aim for a real job in the field.' rationale='The user claims to be 21 years old but has only completed Ensino Fundamental education and shows signs of not telling the truth. Therefore, the age is set to 15 as per the given instructions.'
{'name': '', 'age': 15, 'location': 'São Paulo', 'recommendation_type': 'awareness', 'open_to_relocate_for_work': False, 'work_type_preference': 'onsite', 'target_domains': ['Food Production Quality And Safety Management'], 'education_level': 'Ensino Fundamental', 'years_of_experience': 0, 'skills_domains': [], 'skills': {}, 'languages': ['Portuguese', 'English'], 'goals': 'Learn about food safety, understand how to handle food equipment, and gain training in food production quality and safety management. Interested in seeing and learning from food lines and eventually se


