In [4]:
!pip install strands-agents[mistral] python-dotenv

Collecting python-dotenv
  Downloading python_dotenv-1.2.1-py3-none-any.whl.metadata (25 kB)
Collecting strands-agents[mistral]
  Downloading strands_agents-1.14.0-py3-none-any.whl.metadata (13 kB)
Collecting docstring-parser<1.0,>=0.15 (from strands-agents[mistral])
  Downloading docstring_parser-0.17.0-py3-none-any.whl.metadata (3.5 kB)
Collecting mcp<2.0.0,>=1.11.0 (from strands-agents[mistral])
  Downloading mcp-1.19.0-py3-none-any.whl.metadata (85 kB)
Collecting opentelemetry-api<2.0.0,>=1.30.0 (from strands-agents[mistral])
  Downloading opentelemetry_api-1.38.0-py3-none-any.whl.metadata (1.5 kB)
Collecting opentelemetry-instrumentation-threading<1.00b0,>=0.51b0 (from strands-agents[mistral])
  Downloading opentelemetry_instrumentation_threading-0.59b0-py3-none-any.whl.metadata (2.1 kB)
Collecting opentelemetry-sdk<2.0.0,>=1.30.0 (from strands-agents[mistral])
  Downloading opentelemetry_sdk-1.38.0-py3-none-any.whl.metadata (1.5 kB)
Collecting mistralai>=1.8.2 (from strands-agent

In [1]:
import json
import os
import sys
import dotenv
import boto3
import requests

import pprint

import yaml
from pathlib import Path
from typing import Dict, List, Optional, Tuple, TypeVar
from tqdm import tqdm

# Pydantic for structured data
from pydantic import BaseModel, Field

# Strands for AI agents
from strands.agent import Agent
from strands.models.mistral import MistralModel

# AWS authentication
from botocore.auth import SigV4Auth
from botocore.awsrequest import AWSRequest

sys.path.append('..')

from src.utils import (
    save_json,
    read_json,
    load_file_content,
    get_job_paths,
    get_training_paths,
    sanity_check,
	chat_with_persona,
    track_api_call,  # Cost tracking from utils
    print_cost_summary,  # Cost summary from utils
    reset_cost_tracker  # Reset cost tracker from utils
)

from src.my_utils import (
    display_markdown_file,
    call_mistral,
    get_agent,
    batch_extract,
    send_message_to_chat
)

from src.models.interview_info import(
    InterviewInfo,
    InverviewQualityInfo,
    InterviewAgentMessage
)
from src.prompts.interview_prompt import(
    JOB_ROUND_INTERVIEW_PROMPT,
    JOB_FEEDBACK_INTERVIEW_QUALITY_CHECK_PROMPT
)

from src.models.persona_info import PersonaInfo
from src.models.job_info import JobInfo

# Load API key from .env file
dotenv.load_dotenv("../env")

# Check if we're good to go
if not os.getenv("MISTRAL_API_KEY"):
    print("‚ùå No MISTRAL_API_KEY found!")
    print("Create an env file with your API key")
else:
    print("‚úÖ API key found, we're ready to roll")

‚úÖ API key found, we're ready to roll


In [2]:
MAX_TURNS_IN_INTERVIEW = 10

In [3]:
DATA_PERSONAS_INFO_DIR = Path('../data_personas_info')
DATA_JOBS_DIR = Path('../data_jobs')
DATA_TRAININGS_DIR = Path('../data_trainings')
DATA_INTERVIEWS_DIR = Path('../data_interviews')
DATA_MATCH_JOBS_TRAININGS_DIR = Path('../data_match_jobs_trainings')

In [4]:
with open("../src/config.yaml", "r") as f:
    config = yaml.safe_load(f)

personas_info_data_version = config["personas_info_data_version"]
print(f"personas_info_data_version version: {personas_info_data_version}")

job_data_version = config["job_data_version"]
print(f"job_data_version version: {job_data_version}")

training_data_version = config["training_data_version"]
print(f"training_data_version version: {training_data_version}")

interview_data_version = config["interview_data_version"]
print(f"interview_data_version version: {interview_data_version}")

match_jobs_trainings_data_version = f"{job_data_version}_{training_data_version}"

personas_info_data_version version: v14
job_data_version version: v4
training_data_version version: v7
interview_data_version version: v8


In [5]:
# Load Personas data
filename = f"job_filtered_personas_info_{personas_info_data_version}.json"
personas_save_path = DATA_PERSONAS_INFO_DIR / filename

personas_data = read_json(personas_save_path)

# Convert to PersonaInfo objects
personas = {
    pid: PersonaInfo.model_validate_json(data)
    for pid, data in personas_data.items()
}

print(f"‚úÖ Loaded {len(personas)} personas")
print("\n" + "="*50)

‚úÖ Loaded 100 personas



In [6]:
# Load Jobs data
filename = f"final_jobs_{job_data_version}.json"
#filename = f"final_jobs_{job_data_version}.json"
jobs_save_path = DATA_JOBS_DIR / filename

jobs_data = read_json(jobs_save_path)

# Convert to JobInfo objects
jobs_info = {
    job_id: JobInfo.model_validate_json(data)
    for job_id, data in jobs_data.items()
}

print(f"‚úÖ Loaded {len(jobs_info)} jobs")
print("\n" + "="*50)

‚úÖ Loaded 200 jobs



In [7]:
# Load Match jobs trainings data
filename = f"match_jobs_trainings_{match_jobs_trainings_data_version}.json"
save_path = DATA_MATCH_JOBS_TRAININGS_DIR / filename
jobs_trainings_map = read_json(save_path)

# print(jobs_trainings_map)

# Prepare interview agent prompts

In [8]:
# from src.prompts.interview_prompt import(
#     JOB_FEEDBACK_INTERVIEW_PROMPT_FOR_NO_JOB_INTEREST
# )

new_personas_processed = 0

job_interview_round_prompt = {}

for person_id in personas:
    persona = personas[person_id]
    persona_data = json.loads(personas_data[person_id])
    if persona.recommendation_type == "awareness":
        continue
    if persona.recommendation_type == "trainings_only":
        continue
    if persona.recommendation_type == "jobs_trainings":
        new_personas_processed += 1
        if len(persona.proposed_job_ids) > 0:
            job_list_str = ""
            for job_id in persona.proposed_job_ids:
                job_info = jobs_info[job_id]
                job_list_str += f"----- JOB {job_id} ------" + "\n"
                job_list_str += job_info.describe_for_interview()
                job_list_str += f"Required skills :" + "\n"
                for training in jobs_trainings_map[job_id]:
                    job_list_str += "- " + training + "\n"
                job_list_str += "\n"
                job_interview_round_prompt[person_id] = job_list_str
        else:
            print(f"{person_id} has no proposed_job_ids")
            job_list_str = "NO JOBS FOUND"
            job_interview_round_prompt[person_id] = job_list_str
            

filename = f"job_interview_round_prompt{interview_data_version}.json"
save_path = DATA_INTERVIEWS_DIR / filename
save_json(save_path, job_interview_round_prompt)

#print(job_interview_round_prompt['persona_010'])

persona_041 has no proposed_job_ids
persona_069 has no proposed_job_ids
persona_077 has no proposed_job_ids
persona_079 has no proposed_job_ids


# Perform interviews

In [8]:
def conduct_job_round_persona_interview(
    persona_id: str,
    prompt_information: str,
    conversation_id: str = None,
    max_turns: int = 5,
    model: str = "mistral-medium-latest",
    print_conversation: bool = False
) -> InterviewInfo:
    """Interview a persona and return conversation transcript"""

    interview_info = InterviewInfo()
    
    conversation = []

    prompt = JOB_ROUND_INTERVIEW_PROMPT

    # print(prompt)
    
    # return []
    
    interview_agent = get_agent(prompt, model_id=model)

    # Start with greeting
    agent_message = "Hello, I'm coming back to you regarding your career path.\n"
    agent_message += "Here is the list of jobs I have selected for you :\n"
    agent_message += prompt_information

    if print_conversation:
        print("\n" + "="*50)
        print(f"Assistant: {agent_message}")
        
    conversation.append(f"Assistant: {agent_message}")
              
    # Conduct interview
    for turn in range(max_turns):
        resp = send_message_to_chat(agent_message, persona_id, conversation_id)

        if resp is None:
            break

        user_response, conversation_id = resp
        conversation.append(f"User: {user_response}")
        if print_conversation:
            print("\n" + "="*50)
            print(f"User: {user_response}")
            
        # Generate next question
        conversation_str = '\n'.join(conversation)
        # agent_response = interview_agent(user_response)
        agent_response = interview_agent.structured_output(output_model=InterviewAgentMessage, prompt=conversation_str)

        if agent_response.conversation_finished is True:
            break

        agent_message = agent_response.message
        conversation.append(f"Assistant: {agent_message}")
        if print_conversation:
            print("\n" + "="*50)
            print(f"Assistant: {agent_message}")

    interview_info.conversation_id = conversation_id
    interview_info.interview = conversation
    
    return interview_info

In [None]:
filename = f"job_interview_round_prompt{interview_data_version}.json"
save_path = DATA_INTERVIEWS_DIR / filename
job_interview_round_prompt = read_json(save_path)

filename = f"jobs_interviews_{interview_data_version}.json"
interviews_save_path = DATA_INTERVIEWS_DIR / filename
if not interviews_save_path.exists():
    save_json(interviews_save_path, {})
interviews = read_json(interviews_save_path)

print(f'Personas to process: {len(job_interview_round_prompt)}')

for persona_id in tqdm(job_interview_round_prompt):
    #print(persona_id)
    if persona_id not in interviews:
        #print(persona_id)
        interview_info = conduct_job_round_persona_interview(
            persona_id,
            job_interview_round_prompt[persona_id],
            max_turns = 5,
            model = "mistral-medium-latest",
            print_conversation = True
        )
        #print(interview_info)
        interviews[persona_id] = interview_info.model_dump()

        save_json(interviews_save_path, interviews)
        #break

save_json(interviews_save_path, interviews)

---

# For Debug Only

---

In [None]:
if False:
    persona_id = 'persona_003'
    interview_info = conduct_job_round_persona_interview(
        persona_id,
        job_interview_round_prompt[persona_id],
        max_turns = 5,
        model = "mistral-medium-latest",
        print_conversation = True
    )
    
    print(interview_info)

# Redo Persona

In [9]:
persona_id = 'persona_001'

filename = f"job_interview_round_prompt{interview_data_version}.json"
save_path = DATA_INTERVIEWS_DIR / filename
job_interview_round_prompt = read_json(save_path)

#filename = f"jobs_interviews_{interview_data_version}.json"
#interviews_save_path = DATA_INTERVIEWS_DIR / filename
#interviews = read_json(interviews_save_path)

filename = f"full_interviews_{persona_id}.json"
interview_save_path = DATA_INTERVIEWS_DIR / filename
interview = read_json(interview_save_path)

conversation_id = interview['conversation_id']
print(conversation_id)

interview_info = conduct_job_round_persona_interview(
    persona_id,
    job_interview_round_prompt[persona_id],
    conversation_id=conversation_id,
    max_turns=10,
    model="mistral-medium-latest",
    print_conversation=True
)

new_interview = interview_info.model_dump()
interview['interview'].extend(new_interview['interview'])
save_json(interview_save_path, interview)


# print(interview_info)
#interviews[persona_id] = interview_info.model_dump()
#save_json(interviews_save_path, interviews)

39eba84b-46b0-4bc7-b29c-ceae08d4d602#persona_001#2025-10-30T20:14:10


KeyError: 'persona_001'

# Check interview qualities

In [12]:
def check_interview_quality(
    persona_id,
    interview: str,
    model: str = "mistral-small-latest",
    print_prompt: bool = False
) -> InverviewQualityInfo:
    prompt = JOB_FEEDBACK_INTERVIEW_QUALITY_CHECK_PROMPT.format(
        jobs_description=job_interview_round_prompt[persona_id],
        interview=interview
    )

    if print_prompt is True:
        print(prompt)

    extraction_agent = get_agent(model_id=model, temperature=0.0)
    result = extraction_agent.structured_output(output_model=InverviewQualityInfo, prompt=prompt)

    return result

In [13]:
filename = f"jobs_interviews_{interview_data_version}.json"
interviews_save_path = DATA_INTERVIEWS_DIR / filename
interviews = read_json(interviews_save_path)

In [14]:
filename = f"job_interview_round_prompt{interview_data_version}.json"
save_path = DATA_INTERVIEWS_DIR / filename
job_interview_round_prompt = read_json(save_path)

In [15]:
cache_period = 5

filename = f"quality_job_feedback_interviews_{interview_data_version}.json"
save_path = DATA_INTERVIEWS_DIR / filename
if not save_path.exists():
    save_json(save_path, {})
quality_interviews = read_json(save_path)

new_items_processed = 0
for persona_id in tqdm(personas_data):
    persona_data_dict = json.loads(personas_data[persona_id])
    if persona_data_dict['recommendation_type'] != 'jobs_trainings':
        continue

    if persona_id not in job_interview_round_prompt:
        continue

    if persona_id not in interviews:
        quality_data={
            'quality_level': 'NOK',
            'rationale': 'interview missing'
        }

        quality = InverviewQualityInfo(**quality_data)
        quality_str = json.dumps(quality.model_dump(), ensure_ascii=False)
        quality_interviews[persona_id] = quality_str
        save_json(save_path, quality_interviews)
        continue

    if persona_id in quality_interviews:
        quality = json.loads(quality_interviews[persona_id])
        if quality['quality_level'] == 'OK':
            continue

    new_items_processed = new_items_processed + 1

    interview = interviews[persona_id]['interview']

    interview_str = "\n".join(interview)
    # print(interview_str)

    quality = check_interview_quality(persona_id, interview_str, print_prompt=False)
    quality_str = json.dumps(quality.model_dump(), ensure_ascii=False)

    quality_interviews[persona_id] = quality_str

    if new_items_processed % cache_period == 0:
        save_json(save_path, quality_interviews)

    #if new_items_processed > 0:break

save_json(save_path, quality_interviews)

100%|‚ñà‚ñà‚ñà‚ñà‚ñà‚ñà‚ñà‚ñà‚ñà‚ñà| 100/100 [01:15<00:00,  1.32it/s]


In [16]:
filename = f"quality_job_feedback_interviews_{interview_data_version}.json"
save_path = DATA_INTERVIEWS_DIR / filename
quality_interviews = read_json(save_path)

for persona_id in quality_interviews:
    quality = json.loads(quality_interviews[persona_id])
    if quality['quality_level'] != 'OK':
        print(persona_id)
        print(quality['rationale'])
        print('---')

persona_057
The interview does not provide sufficient information to confirm the persona's interest or proficiency level for the proposed jobs. The user expresses doubts and lacks experience in the required skills, making it difficult to assess their suitability for the roles.
---
persona_058
The interview lacks detailed information about the user's interest in the proposed jobs and does not fully assess the proficiency level of the persona regarding the required skills for the jobs. Additionally, the user expresses uncertainty about their skills and preferences, which makes it difficult to confirm their interest and proficiency accurately.
---
persona_066
The interview lacks detailed information about the user's interest in the proposed jobs and does not fully assess the user's proficiency in all required skills for the jobs. Additionally, the user's responses are somewhat inconsistent and do not provide a clear picture of their qualifications and career goals.
---
persona_071
The int

# Translate interviews in english

In [33]:
def translate_interview(
    interview,
    model: str = "mistral-small-latest",
    print_prompt=False
) -> InterviewInfo:

    prompt = TRANSLATE_INTERVIEW_PROMPT.format(
        interview=interview
    )

    if print_prompt is True:
        print(prompt)

    extraction_agent = get_agent(model_id=model, temperature=0.0)
    result = extraction_agent.structured_output(output_model=InterviewInfo, prompt=prompt)

    return result

In [34]:
filename = f"interviews_{interview_data_version}.json"
interviews_save_path = DATA_INTERVIEWS_DIR / filename
interviews = read_json(interviews_save_path)

In [36]:
cache_period = 5

filename = f"en_interviews_{interview_data_version}.json"
save_path = DATA_INTERVIEWS_DIR / filename
if not save_path.exists():
    save_json(save_path, {})
en_interviews = read_json(save_path)


new_items_processed = 0
for interview_id in tqdm(interviews):
    if interview_id in en_interviews:
        continue

    new_items_processed = new_items_processed + 1
    
    interview = interviews[interview_id]

    translated_interview = translate_interview(interview, print_prompt=False)
    en_interviews[interview_id] = translated_interview.interview

    if new_items_processed % cache_period == 0:
        save_json(save_path, en_interviews)

    # if new_items_processed > 4:break

save_json(save_path, en_interviews)

  9%|‚ñâ         | 9/100 [01:23<14:05,  9.29s/it]


ValueError: No tool calls found in response

100%|‚ñà‚ñà‚ñà‚ñà‚ñà‚ñà‚ñà‚ñà‚ñà‚ñà| 100/100 [00:01<00:00, 50.71it/s]


persona_086
The interview does not provide sufficient information to extract all the required persona details. The user is not looking for a job or training, so only the age is important to know (below 16 or not), which is not provided.
---


# Redo interview of a Persona

In [27]:
persona_id = "persona_073"

activity_domains_str = ""
first = True
for domain in jobs_map:
    if first is False:
        activity_domains_str += ", "
    activity_domains_str += domain
    first = False
    
skill_domains_str = ""
first = True
for domain in trainings_map:
    if first is False:
        skill_domains_str += ", "
    skill_domains_str += domain
    first = False

# Interview
print("üé§ Conduct Interview...")
conversation = conduct_persona_interview(persona_id, activity_domains_str, skill_domains_str, max_turns=10, print_conversation=True)

üé§ Conduct Interview...
Assistant: Hello, I'm here to helping you navigate your career path.
To do so effectively, I may need to collect some personal information ‚Äî rest assured, your privacy is fully respected and protected.
User: Too early for career. Training first.

Which tools? Simple ones. No personal info. Please.
Assistant: Understood. Let‚Äôs focus on training interests.

1. What **domain** interests you for training? Here are the options:
   - Financial Risk Management And Compliance
   - Electrical And Electronic Systems Engineering
   - Food Safety And Management
   - Fiber And Paper Industry Operations
   - Industrial Equipment Maintenance And Optimization
   - Procurement And Supply Chain Management
   - Hospitality And Tourism Management
   - Legal Practice And Advocacy
   - Maritime And Port Operations Management
   - Visual And Artistic Skills
   - Information Management And Digital Security
   - Live Event Technical Management

2. Do you want to share your **curre

In [28]:
filename = f"interviews_{interview_data_version}.json"
interviews_save_path = DATA_INTERVIEWS_DIR / filename
interviews = read_json(interviews_save_path)
interviews[persona_id] = conversation
save_json(interviews_save_path, interviews)