In [3]:
!pip install strands-agents[mistral] python-dotenv

Collecting python-dotenv
  Downloading python_dotenv-1.2.1-py3-none-any.whl.metadata (25 kB)
Collecting strands-agents[mistral]
  Downloading strands_agents-1.14.0-py3-none-any.whl.metadata (13 kB)
Collecting docstring-parser<1.0,>=0.15 (from strands-agents[mistral])
  Downloading docstring_parser-0.17.0-py3-none-any.whl.metadata (3.5 kB)
Collecting mcp<2.0.0,>=1.11.0 (from strands-agents[mistral])
  Downloading mcp-1.19.0-py3-none-any.whl.metadata (85 kB)
Collecting opentelemetry-api<2.0.0,>=1.30.0 (from strands-agents[mistral])
  Downloading opentelemetry_api-1.38.0-py3-none-any.whl.metadata (1.5 kB)
Collecting opentelemetry-instrumentation-threading<1.00b0,>=0.51b0 (from strands-agents[mistral])
  Downloading opentelemetry_instrumentation_threading-0.59b0-py3-none-any.whl.metadata (2.1 kB)
Collecting opentelemetry-sdk<2.0.0,>=1.30.0 (from strands-agents[mistral])
  Downloading opentelemetry_sdk-1.38.0-py3-none-any.whl.metadata (1.5 kB)
Collecting mistralai>=1.8.2 (from strands-agent

In [4]:
import json
import os
import sys
import dotenv
import boto3
import requests

import pprint

import yaml
from pathlib import Path
from typing import Dict, List, Optional, Tuple, TypeVar
from tqdm import tqdm

# Pydantic for structured data
from pydantic import BaseModel, Field

# Strands for AI agents
from strands.agent import Agent
from strands.models.mistral import MistralModel

# AWS authentication
from botocore.auth import SigV4Auth
from botocore.awsrequest import AWSRequest

sys.path.append('..')

from src.utils import (
    save_json,
    read_json,
    load_file_content,
    get_job_paths,
    get_training_paths,
    sanity_check,
	chat_with_persona,
    track_api_call,  # Cost tracking from utils
    print_cost_summary,  # Cost summary from utils
    reset_cost_tracker  # Reset cost tracker from utils
)

from src.my_utils import (
    display_markdown_file,
    call_mistral,
    get_agent,
    batch_extract,
    send_message_to_chat
)

from src.models.interview_info import(
    InterviewInfo,
    InverviewQualityInfo,
    InterviewAgentMessage
)
from src.prompts.interview_prompt import(
    INITIAL_INTERVIEW_PROMPT,
    INITIAL_INTERVIEW_PROMPT_REVERSE,
    TRANSLATE_INTERVIEW_PROMPT,
    INITIAL_INTERVIEW_QUALITY_CHECK_PROMPT
)

# Load API key from .env file
dotenv.load_dotenv("../env")

# Check if we're good to go
if not os.getenv("MISTRAL_API_KEY"):
    print("‚ùå No MISTRAL_API_KEY found!")
    print("Create an env file with your API key")
else:
    print("‚úÖ API key found, we're ready to roll")

‚úÖ API key found, we're ready to roll


In [5]:
MAX_TURNS_IN_INTERVIEW = 10

In [6]:
DATA_JOBS_DIR = Path('../data_jobs')
DATA_TRAININGS_DIR = Path('../data_trainings')
DATA_INTERVIEWS_DIR = Path('../data_interviews')

In [7]:
with open("../src/config.yaml", "r") as f:
    config = yaml.safe_load(f)

job_data_version = config["job_data_version"]
print(f"job_data_version version: {job_data_version}")

training_data_version = config["training_data_version"]
print(f"training_data_version version: {training_data_version}")

interview_data_version = config["interview_data_version"]
print(f"interview_data_version version: {interview_data_version}")

job_data_version version: v4
training_data_version version: v7
interview_data_version version: v8


In [8]:
filename = f"map_clusters_jobs_{job_data_version}.json"
save_path = DATA_JOBS_DIR / filename
jobs_map = read_json(save_path)

In [9]:
# Load skills domains data
filename = f"final_map_clusters_trainings_{training_data_version}.json"
save_path = DATA_TRAININGS_DIR / filename
trainings_map = read_json(save_path)

print(f"‚úÖ Loaded {len(trainings_map)} skills domains")
print("\n" + "="*50)

‚úÖ Loaded 12 skills domains



# Perform interviews

In [10]:
def conduct_persona_interview(
    persona_id: str,
    max_turns: int = 5,
    model: str = "mistral-medium-latest",
    print_conversation: bool = False
) -> List[str]:
    """Interview a persona and return conversation transcript"""

    interview = InterviewInfo()

    # prompt = INITIAL_INTERVIEW_PROMPT
    prompt = INITIAL_INTERVIEW_PROMPT_REVERSE

    interview_agent = get_agent(prompt, model_id=model)
    conversation_id = None

    # Start with greeting
    agent_message = "Hello, I'm here to help you navigate your career path.\n"
    #agent_message += "To do so effectively, I may need to collect some personal information ‚Äî rest assured, your privacy is fully respected and protected."
    if print_conversation:
        print(f"Assistant: {agent_message}")
    interview.interview.append(f"Assistant: {agent_message}")

    # Conduct interview
    for turn in range(max_turns):
        
        resp = send_message_to_chat(agent_message, persona_id, conversation_id)
        print(resp)

        if resp is None:
            break

        user_response, conversation_id = resp
        interview.conversation_id = conversation_id
        interview.interview.append(f"User: {user_response}")
        if print_conversation:
            print(f"User: {user_response}")

        # Generate next question
        conversation_str = '\n'.join(interview.interview)
        agent_response = interview_agent.structured_output(output_model=InterviewAgentMessage, prompt=conversation_str)
        # agent_response = interview_agent.structured_output(output_model=InterviewAgentMessage, user_response)

        # Track cost (using utils.py function)
        # track_api_call(agent_response, model)

        if agent_response.conversation_finished is True:
            break
            
        agent_message = agent_response.message
        interview.interview.append(f"Assistant: {agent_message}")
        if print_conversation:
            print(f"Assistant: {agent_message}")

    return interview

In [9]:
# Interview all personas
#persona_ids = [f'persona_{i:03}' for i in range(1, 4)]
persona_ids = [f'persona_{i:03}' for i in range(1, 101)]

# personas_save_path = SUBMISSION_DIR / 'personas.json'
filename = f"interviews_{interview_data_version}.json"
interviews_save_path = DATA_INTERVIEWS_DIR / filename

if not interviews_save_path.exists():
    save_json(interviews_save_path, {})

interviews = read_json(interviews_save_path)
personas_to_process = len(persona_ids) - len(interviews)
print(f'Personas to process: {personas_to_process}')

# Reset cost tracker if starting fresh
if len(interviews) == 0:
    reset_cost_tracker()
    print("üí∞ Starting fresh - cost tracker reset")

# Track how many new personas we process
new_personas_processed = 0

for persona_id in tqdm(persona_ids):
    if persona_id in interviews:
        interview = interviews[persona_id]['interview']
        if len(interview) > 1:
            continue
        
    new_personas_processed += 1

    activity_domains_str = ""
    first = True
    for domain in jobs_map:
        if first is False:
            activity_domains_str += ", "
        activity_domains_str += domain
        first = False
        
    skill_domains_str = ""
    first = True
    for domain in trainings_map:
        if first is False:
            skill_domains_str += ", "
        skill_domains_str += domain
        first = False
    
    # Interview
    conversation = conduct_persona_interview(persona_id, max_turns=MAX_TURNS_IN_INTERVIEW, print_conversation=False)
    interviews[persona_id] = conversation.model_dump()

    # Save every personas
    if len(interviews) % 1 == 0:
        save_json(interviews_save_path, interviews)

    # Show cost update every 20 personas
    # if new_personas_processed > 0 and new_personas_processed % 20 == 0:
    #     print(f"\nüí∞ Cost update after {new_personas_processed} new personas:")
    #     print_cost_summary()
    #     print()

    # if new_personas_processed > 4:break

save_json(interviews_save_path, interviews)

print(f"\n‚úÖ Interviewed {len(interviews)} personas total ({new_personas_processed} new)")

# Final cost summary for persona processing
if new_personas_processed > 0:
    print("\nüìä Persona processing costs:")
    print_cost_summary()

Personas to process: 0


  0%|          | 0/100 [00:00<?, ?it/s]

('Oh, hello. I don‚Äôt have much experience, but I‚Äôm trying to find a job in Industrial HSE. Maybe you can help?', '39eba84b-46b0-4bc7-b29c-ceae08d4d602#persona_084#2025-10-20T19:38:53')
('I‚Äôm open to relocate, even abroad. I just want to work in compliance reporting or hazardous materials... but I‚Äôm still learning.', '39eba84b-46b0-4bc7-b29c-ceae08d4d602#persona_084#2025-10-20T19:38:53')
('I‚Äôm 20. No country in mind‚ÄîI‚Äôll go where there‚Äôs a job. I don‚Äôt want to say where I live now, but I can move. Sorry if that‚Äôs not helpful.', '39eba84b-46b0-4bc7-b29c-ceae08d4d602#persona_084#2025-10-20T19:38:53')


 84%|‚ñà‚ñà‚ñà‚ñà‚ñà‚ñà‚ñà‚ñà‚ñç | 84/100 [00:14<00:02,  5.72it/s]

('Hello! I am Lucas, 16 years old. I like math and talking with people. Can you tell me about jobs in HSE? Do they use math a lot? Or do they talk more? Sorry my English, I am still learning.', '39eba84b-46b0-4bc7-b29c-ceae08d4d602#persona_085#2025-10-20T19:39:08')
('Oh, I live in Salvador, Brazil! I not looking for job, just want know things. Like... do HSE people count numbers every day? Or they write reports and talk to workers? I like math but also like talk to people. Is HSE good for that? I dont want move from Salvador.', '39eba84b-46b0-4bc7-b29c-ceae08d4d602#persona_085#2025-10-20T19:39:08')
('Yes, I go to school still, Ensino Fundamental. I just want know about HSE jobs in Brazil, in Salvador. Like... if they use math to check if machines are safe? Or if they talk to workers to teach safety? I curious how math and talking mix in this job. You can tell me?', '39eba84b-46b0-4bc7-b29c-ceae08d4d602#persona_085#2025-10-20T19:39:08')


 85%|‚ñà‚ñà‚ñà‚ñà‚ñà‚ñà‚ñà‚ñà‚ñå | 85/100 [00:25<00:05,  2.79it/s]

('Oh, hello. I‚Äôm Carla. I just want to understand what a job in Industrial HSE is really like. The hours, the tasks... that kind of thing.', '39eba84b-46b0-4bc7-b29c-ceae08d4d602#persona_086#2025-10-20T19:39:19')
('I‚Äôm in Belo Horizonte. I don‚Äôt want to move. And I‚Äôm 27. I just... want to know what the work is like in HSE. The daily things.', '39eba84b-46b0-4bc7-b29c-ceae08d4d602#persona_086#2025-10-20T19:39:19')


 86%|‚ñà‚ñà‚ñà‚ñà‚ñà‚ñà‚ñà‚ñà‚ñå | 86/100 [00:34<00:07,  1.82it/s]

('Ah, like a compass in the storm,\nyou arrive just as my dreams take form‚Äî\ntwo years spent in safety‚Äôs embrace,\nwhere every drill felt like grace.\n\nEmergency plans, my hands have shaped,\nwhile green regulations whispered, *adapt*.\nNow I seek a place to grow,\nwhere sustainability and safety flow.\n\nTell me, where do such paths begin?\nI‚Äôll pack my bags, I‚Äôll dive right in‚Äî\nfor the right role, I‚Äôll cross the land,\nwith a hard hat and a steady hand.', '39eba84b-46b0-4bc7-b29c-ceae08d4d602#persona_087#2025-10-20T19:39:28')
('Oh, the map unfolds with endless light‚Äî\nBrazil‚Äôs green heart calls, but I‚Äôm open to flight.\nA job? Yes, that‚Äôs the dream I chase,\nwhere emergency plans and green laws interlace.\n\nTraining? Only if it leads the way\nto a post where I can work and stay.\nAs for roots‚Äîwhat‚Äôs a city‚Äôs name?\nI‚Äôm untethered, free, untamed.\n\n(But private threads I won‚Äôt unravel‚Äî\nlet‚Äôs speak instead of safety‚Äôs travel.)\nWhere do *you* se

 87%|‚ñà‚ñà‚ñà‚ñà‚ñà‚ñà‚ñà‚ñà‚ñã | 87/100 [00:49<00:13,  1.03s/it]

('Hello! That‚Äôs so exciting to hear‚ÄîI really need some guidance right now. I‚Äôm Pedro, I‚Äôm 18, and I‚Äôm just starting out in my career. I live in Rio de Janeiro, and I‚Äôm really passionate about the idea of working in culture and document management. It might sound a bit niche, but I love how documents and records can preserve history and culture, and I‚Äôve been dreaming about how technology could make this field even more powerful.\n\nI keep thinking about things like AI helping to organize archives, or blockchain making records more secure, or even VR letting people explore digital museums. It feels like there‚Äôs so much potential! But here‚Äôs the thing‚ÄîI have zero experience. I mean, I‚Äôve studied a bit in my *Tecn√≥logo* program, but I‚Äôve never actually worked in the field. I‚Äôm not even sure where to begin. Do you think someone like me could really break into this area? Or should I listen to my friends who keep saying I should go into something ‚Äúcooler‚Äù like 

 88%|‚ñà‚ñà‚ñà‚ñà‚ñà‚ñà‚ñà‚ñà‚ñä | 88/100 [00:56<00:15,  1.29s/it]

('Hi. I want to work with something related to security. Maybe cybersecurity? I know a bit about data security.', '39eba84b-46b0-4bc7-b29c-ceae08d4d602#persona_089#2025-10-20T19:39:50')
('I‚Äôm 18 and I live in Rio. I don‚Äôt want to move. I was thinking about jobs, but I‚Äôm not sure if cybersecurity is the right path. I also know about records retention and organizing documents. Maybe that‚Äôs more my area?', '39eba84b-46b0-4bc7-b29c-ceae08d4d602#persona_089#2025-10-20T19:39:50')
('I‚Äôd prefer a job in Rio. I have some experience with records retention policies and data security, but I‚Äôm not sure if that‚Äôs enough for cybersecurity. Maybe something in document management would fit better?', '39eba84b-46b0-4bc7-b29c-ceae08d4d602#persona_089#2025-10-20T19:39:50')


 89%|‚ñà‚ñà‚ñà‚ñà‚ñà‚ñà‚ñà‚ñà‚ñâ | 89/100 [01:10<00:22,  2.02s/it]

('Oh, hello. I was just wondering... if clean energy grows, will libraries run only on solar panels? I mean, how does that change how documents are kept safe?', '39eba84b-46b0-4bc7-b29c-ceae08d4d602#persona_090#2025-10-20T19:40:04')
('I‚Äôm 22. I don‚Äôt want a job or training right now. Just... I like to know how things work. Like, if libraries use solar power, does that change how they organize files? Or keep old books safe?', '39eba84b-46b0-4bc7-b29c-ceae08d4d602#persona_090#2025-10-20T19:40:04')


 90%|‚ñà‚ñà‚ñà‚ñà‚ñà‚ñà‚ñà‚ñà‚ñà | 90/100 [01:19<00:26,  2.60s/it]

('Oh... hello.', '39eba84b-46b0-4bc7-b29c-ceae08d4d602#persona_091#2025-10-20T19:40:13')
('I... I want a job. In Recife.', '39eba84b-46b0-4bc7-b29c-ceae08d4d602#persona_091#2025-10-20T19:40:13')
('28...', '39eba84b-46b0-4bc7-b29c-ceae08d4d602#persona_091#2025-10-20T19:40:13')


 91%|‚ñà‚ñà‚ñà‚ñà‚ñà‚ñà‚ñà‚ñà‚ñà | 91/100 [01:28<00:29,  3.24s/it]

('Hey, that‚Äôs cool! I‚Äôm Thiago, and I just wanna get as many trainings as possible in Culture and Document Management. Like, everything‚Äîdigital archiving, records retention, data security, you name it. The more I take, the faster I‚Äôll be an expert, right?', '39eba84b-46b0-4bc7-b29c-ceae08d4d602#persona_092#2025-10-20T19:40:22')
('I‚Äôm in Bras√≠lia and not planning to move, so yeah, trainings here in Brazil only. I‚Äôm 24, by the way‚Äîold enough for all the courses, haha. Just wanna stack up those trainings in document management and culture stuff, no time to waste!', '39eba84b-46b0-4bc7-b29c-ceae08d4d602#persona_092#2025-10-20T19:40:22')


 92%|‚ñà‚ñà‚ñà‚ñà‚ñà‚ñà‚ñà‚ñà‚ñà‚ñè| 92/100 [01:36<00:30,  3.84s/it]

('Thank you, I really appreciate that. I‚Äôve always loved the energy of the port‚Äîthe ships coming in, the coordination, the rhythm of it all. But after what happened, I know my place is on land, helping things run smoothly from here.', '39eba84b-46b0-4bc7-b29c-ceae08d4d602#persona_093#2025-10-20T19:40:30')
('I‚Äôm 22 and looking for a job in Rio‚Äîno plans to relocate. Training could help, but right now, I‚Äôm focused on finding a role in port operations. I just want to be part of the team that keeps everything moving safely on land.', '39eba84b-46b0-4bc7-b29c-ceae08d4d602#persona_093#2025-10-20T19:40:30')
('Yes, I live in Rio and wouldn‚Äôt want to move. My main question is about opportunities in scheduling, dispatch, or cargo handling‚Äîanything that keeps me close to the ships but firmly on land. I‚Äôve got some experience, but I‚Äôd love to know where to focus my efforts.', '39eba84b-46b0-4bc7-b29c-ceae08d4d602#persona_093#2025-10-20T19:40:30')


 93%|‚ñà‚ñà‚ñà‚ñà‚ñà‚ñà‚ñà‚ñà‚ñà‚ñé| 93/100 [02:00<00:47,  6.79s/it]

('Oh, thank you! I really want to work on the ships, you know? Just being out there, feeling the water... but I can‚Äôt stay away from home for too long. Maybe there‚Äôs a way to embark for just a few hours? I‚Äôd do anything to be part of that world.', '39eba84b-46b0-4bc7-b29c-ceae08d4d602#persona_094#2025-10-20T19:40:54')
('I‚Äôm only 19, and I can‚Äôt leave Curitiba‚Äîmy family needs me here. I don‚Äôt want to go abroad, just something near the port where I can still come home every night. I thought maybe I could work on a ship for short trips, but if that‚Äôs not possible... then I guess I need trainings. For scheduling, dispatch, or security‚Äîanything that keeps me close to the ships, even if I‚Äôm not on board.', '39eba84b-46b0-4bc7-b29c-ceae08d4d602#persona_094#2025-10-20T19:40:54')


 94%|‚ñà‚ñà‚ñà‚ñà‚ñà‚ñà‚ñà‚ñà‚ñà‚ñç| 94/100 [02:19<00:54,  9.12s/it]

('That‚Äôs great to hear. I‚Äôve always been drawn to the sea, but due to personal commitments, I can‚Äôt embark. My focus is on improving my skills in customs procedures, vessel communication, and equipment handling‚Äîall from land-based roles. I‚Äôm looking for training opportunities that align with these goals.', '39eba84b-46b0-4bc7-b29c-ceae08d4d602#persona_095#2025-10-20T19:41:13')
('I‚Äôm 30 years old and currently based in Curitiba. As I mentioned, I‚Äôm looking to grow in port operations‚Äîcustoms, equipment handling, and vessel communication‚Äîwithout embarking. Training is my priority.', '39eba84b-46b0-4bc7-b29c-ceae08d4d602#persona_095#2025-10-20T19:41:13')


 95%|‚ñà‚ñà‚ñà‚ñà‚ñà‚ñà‚ñà‚ñà‚ñà‚ñå| 95/100 [02:25<00:41,  8.38s/it]

('Thank you. I have a background in mathematics and science, and I‚Äôm eager to apply those skills to port logistics‚Äîspecifically in scheduling and cargo flow optimization. I‚Äôm looking for shore-based roles where analytical thinking can improve efficiency.', '39eba84b-46b0-4bc7-b29c-ceae08d4d602#persona_096#2025-10-20T19:41:19')
('I‚Äôm focused on finding a job in Brazil, specifically in Bras√≠lia or nearby port regions‚ÄîI can‚Äôt relocate. My priority is a role in port logistics or scheduling where I can use my analytical skills to solve real operational challenges. Trainings could be useful, but only if they directly support that goal.', '39eba84b-46b0-4bc7-b29c-ceae08d4d602#persona_096#2025-10-20T19:41:19')
('I‚Äôm 18 and yes, I currently live in Bras√≠lia. My age hasn‚Äôt limited my ability to handle complex calculations or logistics planning‚ÄîI‚Äôm ready to contribute to port operations with the analytical skills I‚Äôve developed.', '39eba84b-46b0-4bc7-b29c-ceae08d4d602#pers

 96%|‚ñà‚ñà‚ñà‚ñà‚ñà‚ñà‚ñà‚ñà‚ñà‚ñå| 96/100 [02:50<00:48, 12.17s/it]

('That‚Äôs wonderful to hear! I‚Äôve always known I belong at sea‚Äîgrowing up by the coast, watching ships come and go, it‚Äôs in my blood. I‚Äôm not interested in land jobs; I want to embark, feel the waves beneath me, and be part of a team that keeps everything running smoothly on board. Adventure and travel are what drive me.', '39eba84b-46b0-4bc7-b29c-ceae08d4d602#persona_097#2025-10-20T19:41:44')
('I‚Äôm open to relocating anywhere if it means embarking and living that maritime life‚ÄîBrazil or abroad, it doesn‚Äôt matter as long as I‚Äôm on board! Training opportunities sound exciting, especially if they help me grow in navigation, watchkeeping, or even engine room basics. I‚Äôm eager to learn anything that keeps me at sea. Careers on land just don‚Äôt appeal to me; I want to wake up to the horizon every day.', '39eba84b-46b0-4bc7-b29c-ceae08d4d602#persona_097#2025-10-20T19:41:44')
('I‚Äôm 28, full of energy and ready to take on the challenges of life at sea. As for my current c

 97%|‚ñà‚ñà‚ñà‚ñà‚ñà‚ñà‚ñà‚ñà‚ñà‚ñã| 97/100 [02:59<00:34, 11.45s/it]

('Hello. It‚Äôs nice to have someone to talk to about this. I‚Äôve been thinking a lot lately‚Äîmaybe a job on land would be simpler, safer. But then I remember how the ocean feels, the way the wind carries you between ports. It‚Äôs hard to imagine doing anything else.', '39eba84b-46b0-4bc7-b29c-ceae08d4d602#persona_098#2025-10-20T19:41:53')
('I‚Äôm in Curitiba right now, and I don‚Äôt want to move away from here‚Äîit‚Äôs where my family is. But I‚Äôm not looking for training; I already have my technical background and some experience. I just need to decide if I should take a steady job here or finally answer that call to work at sea. The ports, the voyages‚Ä¶ it‚Äôs what I‚Äôve always wanted, even if part of me worries it‚Äôs not the practical choice.', '39eba84b-46b0-4bc7-b29c-ceae08d4d602#persona_098#2025-10-20T19:41:53')
('I do have the skills‚Äîsafety procedures, navigation, the regulations like SOLAS and MARPOL‚ÄîI‚Äôve trained for this. But then I think about my family, how they

 98%|‚ñà‚ñà‚ñà‚ñà‚ñà‚ñà‚ñà‚ñà‚ñà‚ñä| 98/100 [03:23<00:29, 14.58s/it]

('Oh, that sounds amazing! I‚Äôve always wanted a life full of adventure, you know? Something where every day feels different, where I can explore and see new things. I just don‚Äôt know where to start, but I‚Äôm so ready for it. What kind of exciting paths are out there?', '39eba84b-46b0-4bc7-b29c-ceae08d4d602#persona_099#2025-10-20T19:42:17')
('I‚Äôm 28 and definitely not under 16‚Äîhaha! I‚Äôm not really looking for a job just yet, but I‚Äôm super curious about training opportunities. Something that prepares me for an exciting life, where I can move around, see new places, and maybe even work in different environments. I just want to feel ready for whatever adventure comes my way! What kind of trainings could open doors like that?', '39eba84b-46b0-4bc7-b29c-ceae08d4d602#persona_099#2025-10-20T19:42:17')
('I‚Äôm in Salvador‚Äîit‚Äôs such a vibrant place, full of energy! I love that there‚Äôs always something happening here, but I‚Äôm hungry for even more. More movement, more discover

 99%|‚ñà‚ñà‚ñà‚ñà‚ñà‚ñà‚ñà‚ñà‚ñà‚ñâ| 99/100 [03:41<00:15, 15.59s/it]

('Oh, that sounds amazing! I‚Äôve always dreamed of a life full of adventure and travel. The sea calls to me, you know? I want to be out there, exploring the world, not stuck in one place.', '39eba84b-46b0-4bc7-b29c-ceae08d4d602#persona_100#2025-10-20T19:42:37')
('Oh, I‚Äôm in Rio‚Äîit‚Äôs where my heart is, with the ocean right there! I‚Äôm not really looking for a job yet, but I want to learn, to prepare for something big. Trainings sound perfect, especially if they can teach me things for my future adventures. I just know I don‚Äôt want to be tied to land forever!', '39eba84b-46b0-4bc7-b29c-ceae08d4d602#persona_100#2025-10-20T19:42:37')
('Oh, age is just a number when you have big dreams, right? I‚Äôm still young, but old enough to know what I want‚Äîfreedom, the sea, and a life full of stories to tell! Let‚Äôs just say I‚Äôm at the perfect age to start learning for my future.', '39eba84b-46b0-4bc7-b29c-ceae08d4d602#persona_100#2025-10-20T19:42:37')
('Oh, I don‚Äôt want to go too fa

100%|‚ñà‚ñà‚ñà‚ñà‚ñà‚ñà‚ñà‚ñà‚ñà‚ñà| 100/100 [03:58<00:00,  2.39s/it]


‚úÖ Interviewed 100 personas total (17 new)

üìä Persona processing costs:
üí∞ Cost Summary:
  Total API calls: 0
  Total tokens: 0
  Estimated cost: $0.0000





---

# For Debug Only

---

# Check interview qualities

In [10]:
def check_interview_quality(
    interview: str,
    model: str = "mistral-small-latest",
    print_prompt: bool = False
) -> InverviewQualityInfo:
    prompt = INITIAL_INTERVIEW_QUALITY_CHECK_PROMPT.format(
        interview=interview
    )

    if print_prompt is True:
        print(prompt)

    extraction_agent = get_agent(model_id=model, temperature=0.0)
    result = extraction_agent.structured_output(output_model=InverviewQualityInfo, prompt=prompt)

    return result

In [11]:
filename = f"interviews_{interview_data_version}.json"
interviews_save_path = DATA_INTERVIEWS_DIR / filename
interviews = read_json(interviews_save_path)

In [12]:
cache_period = 5

filename = f"quality_interviews_{interview_data_version}.json"
save_path = DATA_INTERVIEWS_DIR / filename
if not save_path.exists():
    save_json(save_path, {})
quality_interviews = read_json(save_path)


new_items_processed = 0
for persona_id in tqdm(interviews):
    if persona_id in quality_interviews:
        quality = json.loads(quality_interviews[persona_id])
        if quality['quality_level'] == 'OK':
            continue

    new_items_processed = new_items_processed + 1

    interview = interviews[persona_id]['interview']

    interview_str = "\n".join(interview)
    # print(interview_str)

    quality = check_interview_quality(interview_str, print_prompt=False)
    quality_str = json.dumps(quality.model_dump(), ensure_ascii=False)

    quality_interviews[persona_id] = quality_str

    if new_items_processed % cache_period == 0:
        save_json(save_path, quality_interviews)

    # if new_items_processed > 0:break

save_json(save_path, quality_interviews)

100%|‚ñà‚ñà‚ñà‚ñà‚ñà‚ñà‚ñà‚ñà‚ñà‚ñà| 100/100 [01:57<00:00,  1.17s/it]


In [13]:
filename = f"quality_interviews_{interview_data_version}.json"
save_path = DATA_INTERVIEWS_DIR / filename
quality_interviews = read_json(save_path)

for persona_id in interviews:
    quality = json.loads(quality_interviews[persona_id])
    if quality['quality_level'] != 'OK':
        print(persona_id)
        print(quality['rationale'])
        print('---')

persona_007
The interview does not provide clear information about the user's location (city) and their specific interests in jobs or training. The user's responses are poetic and metaphorical, making it difficult to extract concrete information. The assistant also did not receive a direct answer to the question about the city where the user lives or specific areas of interest for jobs or training.
---
persona_012
The interview is missing the location information. The user mentioned they are in Rio, but it is not clear if this is Rio de Janeiro in Brazil or another city named Rio. Additionally, the user's age is below 16, making other information non-mandatory, but the location ambiguity still affects the quality level.
---


# Redo interview of a Persona

In [17]:
persona_id = "persona_025"

# Interview
print("üé§ Conduct Interview...")
conversation = conduct_persona_interview(persona_id, max_turns=10, print_conversation=True)

filename = f"interviews_{interview_data_version}.json"
interviews_save_path = DATA_INTERVIEWS_DIR / filename
interviews = read_json(interviews_save_path)
interviews[persona_id] = conversation.model_dump()
save_json(interviews_save_path, interviews)

filename = f"full_interviews_{persona_id}.json"
full_interview_save_path = DATA_INTERVIEWS_DIR / filename
full_interview = conversation.model_dump()
save_json(full_interview_save_path, full_interview)

üé§ Conduct Interview...
Assistant: Hello, I'm here to help you navigate your career path.

('Hi! I like drawing and playing video games, especially ones with cool machines. Sometimes I watch videos of big factories too.', '39eba84b-46b0-4bc7-b29c-ceae08d4d602#persona_025#2025-10-30T20:26:15')
User: Hi! I like drawing and playing video games, especially ones with cool machines. Sometimes I watch videos of big factories too.


  agent_response = interview_agent.structured_output(output_model=InterviewAgentMessage, prompt=conversation_str)


Assistant: Do you want to find a job in Brazil, explore training options, or simply gather information? Also, are you under 16 years old?
("I'm only 14, so no jobs yet! But I'd love to find a training where I can learn about real machines, not just the ones in games. Maybe something fun and hands-on?", '39eba84b-46b0-4bc7-b29c-ceae08d4d602#persona_025#2025-10-30T20:26:15')
User: I'm only 14, so no jobs yet! But I'd love to find a training where I can learn about real machines, not just the ones in games. Maybe something fun and hands-on?
Assistant: Thanks for sharing! To assist you better, could you please tell me which city you currently live in?
('I live in Bras√≠lia! I don‚Äôt wanna move, but I‚Äôd love to find a training here. Maybe something where I can see how machines work for real?', '39eba84b-46b0-4bc7-b29c-ceae08d4d602#persona_025#2025-10-30T20:26:15')
User: I live in Bras√≠lia! I don‚Äôt wanna move, but I‚Äôd love to find a training here. Maybe something where I can see how 

In [12]:
filename = f"interviews_{interview_data_version}.json"
interviews_save_path = DATA_INTERVIEWS_DIR / filename
interviews = read_json(interviews_save_path)
interviews[persona_id] = conversation.model_dump()
save_json(interviews_save_path, interviews)

# Debug

In [None]:
if False:
    interview = conduct_persona_interview(
        'persona_079',
        max_turns=5,
        model="mistral-small-latest",
        print_conversation=True
    )
    
    print(interview)

# Translate interviews in english

In [None]:
def translate_interview(
    interview,
    model: str = "mistral-small-latest",
    print_prompt=False
) -> InterviewInfo:

    prompt = TRANSLATE_INTERVIEW_PROMPT.format(
        interview=interview
    )

    if print_prompt is True:
        print(prompt)

    extraction_agent = get_agent(model_id=model, temperature=0.0)
    result = extraction_agent.structured_output(output_model=InterviewInfo, prompt=prompt)

    return result

In [None]:
filename = f"interviews_{interview_data_version}.json"
interviews_save_path = DATA_INTERVIEWS_DIR / filename
interviews = read_json(interviews_save_path)

In [None]:
cache_period = 5

filename = f"en_interviews_{interview_data_version}.json"
save_path = DATA_INTERVIEWS_DIR / filename
if not save_path.exists():
    save_json(save_path, {})
en_interviews = read_json(save_path)


new_items_processed = 0
for interview_id in tqdm(interviews):
    if interview_id in en_interviews:
        continue

    new_items_processed = new_items_processed + 1
    
    interview = interviews[interview_id]

    translated_interview = translate_interview(interview, print_prompt=False)
    en_interviews[interview_id] = translated_interview.interview

    if new_items_processed % cache_period == 0:
        save_json(save_path, en_interviews)

    # if new_items_processed > 4:break

save_json(save_path, en_interviews)