In [30]:
!pip install strands-agents[mistral] python-dotenv



In [12]:
import json
import os
import sys
import dotenv
import boto3
import requests

import pprint

import yaml
from pathlib import Path
from typing import Dict, List, Optional, Tuple, TypeVar
from tqdm import tqdm

from collections import Counter

# Pydantic for structured data
from pydantic import BaseModel, Field

# Strands for AI agents
from strands.agent import Agent
from strands.models.mistral import MistralModel

# AWS authentication
from botocore.auth import SigV4Auth
from botocore.awsrequest import AWSRequest

sys.path.append('..')

from src.utils import (
    save_json,
    read_json,
    load_file_content,
    get_job_paths,
    get_training_paths,
    sanity_check,
	chat_with_persona,
    track_api_call,  # Cost tracking from utils
    print_cost_summary,  # Cost summary from utils
    reset_cost_tracker  # Reset cost tracker from utils
)

from src.my_utils import (
    display_markdown_file,
    call_mistral,
    get_agent,
    batch_extract,
    compute_stat_for_multi_items
)

from src.models.persona_info import PersonaInfo, JobPersonaInfo, LoacationNameRefator
from src.models.activity_domain_info import ActivityDomainInfo, ListOfActivityDomains
from src.models.skill_domain_info import SkillDomainInfo, ListSkillsDomains
from src.prompts.persona_extraction_prompt import (
    PERSONA_JOB_EXTRACTION_PROMPT,
    PERSONA_INITIAL_EXTRACTION_PROMPT,
    PERSONA_EXTEND_SKILL_DOMAIN_PROMPT,
    PERSONA_SKILL_DOMAINS_CLASSIFICATION_PROMPT,
    PERSONA_ACTIVITY_DOMAINS_CLASSIFICATION_PROMPT,
    LOCATION_NAME_REFACTORING
)
from src.prompts.interview_prompt import(
    JOB_INTERVIEW_QUALITY_CHECK_PROMPT
)
from src.models.interview_info import(
    InverviewQualityInfo
)
# Load API key from .env file
dotenv.load_dotenv("../env")

# Check if we're good to go
if not os.getenv("MISTRAL_API_KEY"):
    print("‚ùå No MISTRAL_API_KEY found!")
    print("Create an env file with your API key")
else:
    print("‚úÖ API key found, we're ready to roll")

‚úÖ API key found, we're ready to roll


In [13]:
DATA_JOBS_DIR = Path('../data_jobs')
DATA_TRAININGS_DIR = Path('../data_trainings')
DATA_INTERVIEWS_DIR = Path('../data_interviews')
DATA_ACTIVITIES_DOMAINS_DIR = Path('../data_activities_domains')
DATA_SKILLS_DOMAINS_DIR = Path('../data_skills_domains')
DATA_PERSONAS_INFO_DIR = Path('../data_personas_info')

In [14]:
with open("../src/config.yaml", "r") as f:
    config = yaml.safe_load(f)

job_data_version = config["job_data_version"]
print(f"job_data_version version: {job_data_version}")

training_data_version = config["training_data_version"]
print(f"training_data_version version: {training_data_version}")

interview_data_version = config["interview_data_version"]
print(f"interview_data_version version: {interview_data_version}")

activity_domains_version = config["activity_domains_version"]
print(f"activity_domains_version version: {activity_domains_version}")

personas_info_data_version = config["personas_info_data_version"]
print(f"personas_info_data_version version: {personas_info_data_version}")

skill_domains_version = config["skill_domains_version"]
print(f"skill_domains_version version: {skill_domains_version}")

job_data_version version: v4
training_data_version version: v7
interview_data_version version: v8
activity_domains_version version: v4
personas_info_data_version version: v14
skill_domains_version version: v3


In [15]:
# Load jobs domains map data
filename = f"map_clusters_jobs_{job_data_version}.json"
save_path = DATA_JOBS_DIR / filename
jobs_map = read_json(save_path)

In [16]:
# Load skills domains map data
filename = f"final_map_clusters_trainings_{training_data_version}.json"
save_path = DATA_TRAININGS_DIR / filename
trainings_map = read_json(save_path)

print(f"‚úÖ Loaded {len(trainings_map)} skills domains")
print("\n" + "="*50)

‚úÖ Loaded 12 skills domains



In [17]:
# Load interviews
filename = f"interviews_{interview_data_version}.json"
interviews_save_path = DATA_INTERVIEWS_DIR / filename
initial_interviews = read_json(interviews_save_path)

In [18]:
# Load interviews
filename = f"job_extension_interviews_{interview_data_version}.json"
interviews_save_path = DATA_INTERVIEWS_DIR / filename
job_extension_interviews = read_json(interviews_save_path)

In [19]:
# Load Personas data
filename = f"recommandation_consolidated_personas_info_{personas_info_data_version}.json"
personas_save_path = DATA_PERSONAS_INFO_DIR / filename

initial_personas_data = read_json(personas_save_path)

# Convert to PersonaInfo objects
personas = {
    pid: PersonaInfo.model_validate_json(data)
    for pid, data in initial_personas_data.items()
}

print(f"‚úÖ Loaded {len(personas)} personas")
print("\n" + "="*50)

‚úÖ Loaded 100 personas



# Extract Persona Data

In [20]:
def extract_job_persona_info(
    activities_domains: str,
    conversation: List[str],
    previous_goal: str,
    model: str = "mistral-small-latest",
    print_prompt=False
) -> JobPersonaInfo:
    """Extract persona info from conversation using Persona Extraction Agent"""
    text = '\n'.join(conversation)
    #print(text)

    prompt = PERSONA_JOB_EXTRACTION_PROMPT.format(
        activities_domains=activities_domains,
        conversation=text,
        previous_goal=previous_goal
    )

    if print_prompt is True:
        print("\n" + "="*50)
        print(prompt)
        print("\n" + "="*50)

    # return None

    extraction_agent = get_agent(model_id=model, temperature=0.0)
    result = extraction_agent.structured_output(output_model=JobPersonaInfo, prompt=prompt)

    return result

In [22]:
MAX_LOOPS = 1
cache_period = 5

# Prepare personas info
filename = f"job_consolidated_personas_info_{personas_info_data_version}.json"
personas_save_path = DATA_PERSONAS_INFO_DIR / filename
if not personas_save_path.exists():
    save_json(personas_save_path, {})
personas_data = read_json(personas_save_path)

new_items_processed = 0

print("START CLASSIFICATION LOOP")
for i in range(MAX_LOOPS):
    print("ITERATE CLASSIFICATION LOOP")

    new_personas_processed = 0

    for persona_id in tqdm(initial_personas_data):
        if persona_id in personas_data:
            continue

        if persona_id not in job_extension_interviews:
            personas_data[persona_id] = initial_personas_data[persona_id]
            continue

        #if personas[persona_id].recommendation_type != 'jobs_trainings':
        #    personas_data[persona_id] = initial_personas_data[persona_id]
        #    continue

        new_personas_processed += 1

        conversation = initial_interviews[persona_id]['interview']
        conversation += job_extension_interviews[persona_id]['interview']

        persona_data_dict = json.loads(initial_personas_data[persona_id])

        domains_str = ""
        for domain in jobs_map:
            domains_str += f"- {domain} : {jobs_map[domain]['description']}" + "\n"

        print_prompt = False
        # if new_personas_processed == 1:
        #    print_prompt = True

        result = extract_job_persona_info(
            domains_str,
            conversation,
            persona_data_dict['goals'],
            model="mistral-medium-latest",
            print_prompt=print_prompt
        )

        if len(result.target_domains) == 0:
            print(f"Activity domains empty : for {persona_id}")
            continue

        domain_issue = False
        for domain in result.target_domains:
            if domain not in jobs_map:
                domain_issue = True
                print(f"{persona_id} : {result.target_domains} not in domains list")

        if domain_issue is True:
            continue

        persona_data_dict['open_to_relocate_for_work'] = result.open_to_relocate_for_work
        persona_data_dict['work_type_preference'] = result.work_type_preference
        persona_data_dict['education_level'] = result.education_level
        persona_data_dict['years_of_experience'] = result.years_of_experience
        persona_data_dict['languages'] = result.languages
        persona_data_dict['goals'] = result.goals
        persona_data_dict['target_domains'] = result.target_domains

        if persona_data_dict['education_level'] == 'T√©cn√≥logo':
            print('T√©cn√≥logo renamed in Tecn√≥logo')
            persona_data_dict['education_level'] = 'Tecn√≥logo'

        personas_data[persona_id] = json.dumps(persona_data_dict, ensure_ascii=False)

        # Save every 5 personas
        if new_personas_processed % 5 == 0:
            save_json(personas_save_path, personas_data)

        # Show cost update every 20 personas
        # if new_personas_processed > 0 and new_personas_processed % 20 == 0:
        #     print(f"\nüí∞ Cost update after {new_personas_processed} new personas:")
        #     print_cost_summary()
        #     print()

        #if new_personas_processed > 0:break

save_json(personas_save_path, personas_data)

# Convert to PersonaInfo objects
# personas = {
#     pid: PersonaInfo.model_validate_json(data)
#     for pid, data in persona_infos.items()
# }

# print(f"\n‚úÖ Interviewed {len(personas)} personas total ({new_personas_processed} new)")


START CLASSIFICATION LOOP
ITERATE CLASSIFICATION LOOP


 17%|‚ñà‚ñã        | 17/100 [00:26<02:09,  1.56s/it]

T√©cn√≥logo renamed in Tecn√≥logo


 89%|‚ñà‚ñà‚ñà‚ñà‚ñà‚ñà‚ñà‚ñà‚ñâ | 89/100 [02:25<00:24,  2.22s/it]

persona_089 : ['Cultural Information and Records Management'] not in domains list


100%|‚ñà‚ñà‚ñà‚ñà‚ñà‚ñà‚ñà‚ñà‚ñà‚ñà| 100/100 [02:41<00:00,  1.62s/it]


# Clean location information

In [23]:
def location_name_refactoring(
    location: str,
    model: str = "mistral-small-latest",
    print_prompt=False
) -> LoacationNameRefator:

    prompt = LOCATION_NAME_REFACTORING.format(
        location=location
    )

    if print_prompt is True:
        print("\n" + "="*50)
        print(prompt)
        print("\n" + "="*50)

    # return None

    extraction_agent = get_agent(model_id=model, temperature=0.0)
    result = extraction_agent.structured_output(output_model=LoacationNameRefator, prompt=prompt)

    return result

In [24]:
filename = f"job_consolidated_personas_info_{personas_info_data_version}.json"
personas_save_path = DATA_PERSONAS_INFO_DIR / filename
initial_personas_data = read_json(personas_save_path)

In [25]:
MAX_LOOPS = 1
cache_period = 5

# Prepare personas info
filename = f"final_personas_info_{personas_info_data_version}.json"
personas_save_path = DATA_PERSONAS_INFO_DIR / filename
if not personas_save_path.exists():
    save_json(personas_save_path, {})
personas_data = read_json(personas_save_path)

print("START CLASSIFICATION LOOP")
for i in range(MAX_LOOPS):
    print("ITERATE CLASSIFICATION LOOP")

    new_items_processed = 0
    for persona_id in tqdm(initial_personas_data):
        new_items_processed += 1
        personas_data[persona_id] = initial_personas_data[persona_id]

        persona_data_dict = json.loads(initial_personas_data[persona_id])
        result = location_name_refactoring(
            persona_data_dict['location'],
            model="mistral-medium-latest",
            print_prompt=False
        )
        
        if result.renaming_needed is True:
            print(f"{persona_data_dict['location']} => {result.name}")
            persona_data_dict['location'] = result.name
            personas_data[persona_id] = json.dumps(persona_data_dict, ensure_ascii=False)
            
        if new_items_processed % 5 == 0:
            save_json(personas_save_path, personas_data)
            
save_json(personas_save_path, personas_data)


START CLASSIFICATION LOOP
ITERATE CLASSIFICATION LOOP


  4%|‚ñç         | 4/99 [00:07<02:49,  1.78s/it]

Rio => Rio de Janeiro


  9%|‚ñâ         | 9/99 [00:14<02:14,  1.49s/it]

Rio => Rio de Janeiro


 12%|‚ñà‚ñè        | 12/99 [00:35<05:38,  3.89s/it]

Rio => Rio de Janeiro


 26%|‚ñà‚ñà‚ñã       | 26/99 [00:59<02:15,  1.86s/it]

Fortaleza, Brazil => Fortaleza


 52%|‚ñà‚ñà‚ñà‚ñà‚ñà‚ñè    | 51/99 [01:40<01:31,  1.90s/it]

Recife, Brazil => Recife


 57%|‚ñà‚ñà‚ñà‚ñà‚ñà‚ñã    | 56/99 [01:49<01:11,  1.66s/it]

Rio => Rio de Janeiro


 86%|‚ñà‚ñà‚ñà‚ñà‚ñà‚ñà‚ñà‚ñà‚ñå | 85/99 [02:37<00:25,  1.85s/it]

Salvador, Brazil => Salvador


100%|‚ñà‚ñà‚ñà‚ñà‚ñà‚ñà‚ñà‚ñà‚ñà‚ñà| 99/99 [03:02<00:00,  1.84s/it]

Rio => Rio de Janeiro





---

# For Debug Only

---

In [None]:
if False:
    persona_id = "persona_018"
    persona_data_dict = json.loads(initial_personas_data[persona_id])

    conversation = initial_interviews[persona_id]['interview']
    conversation += job_extension_interviews[persona_id]['interview']

    domains_str = ""
    for domain in jobs_map:
        domains_str += f"- {domain} : {jobs_map[domain]['description']}" + "\n"

    result = extract_job_persona_info(
        domains_str,
        conversation,
        persona_data_dict['goals'],
        model="mistral-medium-latest",
        print_prompt=True
    )

    persona_data_dict['open_to_relocate_for_work'] = result.open_to_relocate_for_work
    persona_data_dict['work_type_preference'] = result.work_type_preference
    persona_data_dict['education_level'] = result.education_level
    persona_data_dict['years_of_experience'] = result.years_of_experience
    persona_data_dict['languages'] = result.languages
    persona_data_dict['goals'] = result.goals
    persona_data_dict['target_domains'] = result.target_domains

    print(persona_data_dict)

    # print(persona_info)

    # if(persona_info.recommendation_type != "awareness"):


# Check quality

In [15]:
filename = f"final_personas_info_{personas_info_data_version}.json"
personas_save_path = DATA_PERSONAS_INFO_DIR / filename
persona_infos = read_json(personas_save_path)

In [16]:
def check_interview_quality(
    interview: str,
    model: str = "mistral-small-latest",
    print_prompt: bool = False
) -> InverviewQualityInfo:
    prompt = JOB_INTERVIEW_QUALITY_CHECK_PROMPT.format(
        interview=interview
    )

    if print_prompt is True:
        print(prompt)

    extraction_agent = get_agent(model_id=model, temperature=0.0)
    result = extraction_agent.structured_output(output_model=InverviewQualityInfo, prompt=prompt)

    return result

In [28]:
if True:
    persona_id = 'persona_026'

    filename = f"quality_job_interviews_{interview_data_version}.json"
    save_path = DATA_INTERVIEWS_DIR / filename
    quality_interviews = read_json(save_path)

    persona_data_dict = json.loads(persona_infos[persona_id])
    if persona_data_dict['recommendation_type'] != 'jobs_trainings':
        print('ISSUE')
    elif persona_id in quality_interviews:
        quality = json.loads(quality_interviews[persona_id])
        if quality['quality_level'] == 'OK':
            print('ISSUE')
        else:
            interview = job_extension_interviews[persona_id]['interview']
            
            interview_str = "\n".join(interview)
            # print(interview_str)
            
            quality = check_interview_quality(interview_str, model='mistral-medium-latest', print_prompt=False)
            quality_str = json.dumps(quality.model_dump(), ensure_ascii=False)
            
            quality_interviews[persona_id] = quality_str
            
            save_json(save_path, quality_interviews)            

            print(quality_str)

{"quality_level": "OK", "rationale": "The interview covers all required information: willingness to relocate, education level, years of experience, languages spoken, career goals, and target domains. No information is missing, and there is no indication of hallucinated information by the assistant."}


In [17]:
cache_period = 5

filename = f"quality_job_interviews_{interview_data_version}.json"
save_path = DATA_INTERVIEWS_DIR / filename
if not save_path.exists():
    save_json(save_path, {})
quality_interviews = read_json(save_path)


new_items_processed = 0
for persona_id in tqdm(job_extension_interviews):
    persona_data_dict = json.loads(persona_infos[persona_id])
    if persona_data_dict['recommendation_type'] != 'jobs_trainings':
        continue

    if persona_id in quality_interviews:
        quality = json.loads(quality_interviews[persona_id])
        if quality['quality_level'] == 'OK':
            continue

    new_items_processed = new_items_processed + 1

    interview = job_extension_interviews[persona_id]['interview']

    interview_str = "\n".join(interview)
    # print(interview_str)

    quality = check_interview_quality(interview_str, model='mistral-medium-latest', print_prompt=False)
    quality_str = json.dumps(quality.model_dump(), ensure_ascii=False)

    quality_interviews[persona_id] = quality_str

    if new_items_processed % cache_period == 0:
        save_json(save_path, quality_interviews)

    #if new_items_processed > 0:break

save_json(save_path, quality_interviews)

100%|‚ñà‚ñà‚ñà‚ñà‚ñà‚ñà‚ñà‚ñà‚ñà‚ñà| 63/63 [02:10<00:00,  2.06s/it]


# Redo Persona

In [12]:
if True:
    persona_id = 'persona_017'
    
    filename = f"job_consolidated_personas_info_{personas_info_data_version}.json"
    personas_save_path = DATA_PERSONAS_INFO_DIR / filename
    personas_data = read_json(personas_save_path)

    persona_data_dict = json.loads(initial_personas_data[persona_id])
    if persona_data_dict['recommendation_type'] != 'jobs_trainings':
        print("recommendation_type not jobs_trainings")
        personas_data[persona_id] = initial_personas_data[persona_id]
    else:
        conversation = initial_interviews[persona_id]['interview']
        conversation += job_extension_interviews[persona_id]['interview']
        
        domains_str = ""
        for domain in jobs_map:
            domains_str += f"- {domain} : {jobs_map[domain]['description']}" + "\n"
        
        result = extract_job_persona_info(
            domains_str,
            conversation,
            persona_data_dict['goals'],
            model="mistral-medium-latest",
            print_prompt=True
        )
    
        domain_issue = False
        for domain in result.target_domains:
            if domain not in jobs_map:
                domain_issue = True
                print(f"{persona_id} : {result.target_domains} not in domains list")
    
        persona_data_dict = json.loads(initial_personas_data[persona_id])
        persona_data_dict['open_to_relocate_for_work'] = result.open_to_relocate_for_work
        persona_data_dict['work_type_preference'] = result.work_type_preference
        persona_data_dict['education_level'] = result.education_level
        persona_data_dict['years_of_experience'] = result.years_of_experience
        persona_data_dict['languages'] = result.languages
        persona_data_dict['goals'] = result.goals
        persona_data_dict['target_domains'] = result.target_domains

        if persona_data_dict['education_level'] == 'T√©cn√≥logo':
            persona_data_dict['education_level'] = 'Tecn√≥logo'

        print(persona_data_dict)
        
        personas_data[persona_id] = json.dumps(persona_data_dict, ensure_ascii=False)
    
save_json(personas_save_path, personas_data)




From the interview conversation, extract the following fields and return a structured profile with these fields:
- Open to relocate for work: Whether they've mentioned being willing to move for work (true/false)
- Work type preference: Their preferred work arrangement (onsite, remote, or hybrid)
- Education level: Their highest completed education level. PICK EXACT LABEL NAME FROM  : Ensino Fundamental, Ensino M√©dio, T√©cnico, Tecn√≥logo, Gradua√ß√£o, Bacharelado, Licenciatura, P√≥s-gradua√ß√£o, Especializa√ß√£o, Mestrado, MBA, Doutorado). IMPORTANT reuse exact same labels !
- Years of experience: Their professional experience in years (as an integer)
- Languages: Languages they speak
    - Use strict formating for language name:
        - use english term
        - starting with Capital letter, the rest in lowercase letter
        - no location attribute (like Brasilian), just one word
- Goals: Their stated career or learning objectives.
    - Knowing previous stated goal by user w

In [13]:
# check and correct location information
if True:
    persona_id = 'persona_017'
    
    filename = f"job_consolidated_personas_info_{personas_info_data_version}.json"
    personas_save_path = DATA_PERSONAS_INFO_DIR / filename
    initial_personas_data = read_json(personas_save_path)

    filename = f"final_personas_info_{personas_info_data_version}.json"
    personas_save_path = DATA_PERSONAS_INFO_DIR / filename
    personas_data = read_json(personas_save_path)

    persona_data_dict = json.loads(initial_personas_data[persona_id])
    result = location_name_refactoring(
        persona_data_dict['location'],
        model="mistral-medium-latest",
        print_prompt=True
    )
    
    if result.renaming_needed is True:
        print(f"{persona_data_dict['location']} => {result.name}")
        persona_data_dict['location'] = result.name

    personas_data[persona_id] = json.dumps(persona_data_dict, ensure_ascii=False)
    print(persona_data_dict)
        
    save_json(personas_save_path, personas_data)



You are a location harmonization agent. Your task is to process a single persona's location string provided as "Salvador".

Follow these rules:
1.Trim and clean the input string.
2.If the input contains a city followed by a country (e.g., "Fortaleza, Brazil"), extract and normalize the city name only.
3.VERY IMPORTANT : Normalize known aliases:
- "Rio" ‚Üí "Rio de Janeiro"
- "Recife, Brazil" ‚Üí "Recife"
- "Fortaleza, Brazil" ‚Üí "Fortaleza"
4.If the input is empty, ambiguous (e.g., ":", "Brazil"), or not a valid city, return "Unknown".
5.Return a Python object of class LocationNameRefator with:
- renaming_needed = True if the name was changed or cleaned.
- renaming_needed = False if the input was already clean.
- name = "<standardized city name>" or "Unknown" if invalid.
- rationale = justification of your decision


{'name': '', 'age': 18, 'location': 'Salvador', 'recommendation_type': 'jobs_trainings', 'open_to_relocate_for_work': False, 'work_type_preference': 'onsite', 'target_d

# Persona info statistics

In [22]:
filename = f"final_personas_info_{personas_info_data_version}.json"
personas_save_path = DATA_PERSONAS_INFO_DIR / filename
persona_infos = read_json(personas_save_path)

In [23]:
# 1. Extract ages from the JSON data
ages = []
for persona in persona_infos.values():
    data = json.loads(persona)
    if data['age'] > 0:  # Filter out invalid age (0)
        ages.append(data['age'])
age_counts = Counter(ages)
# Print in ascending order of age
for age in sorted(age_counts.keys()):
    print(f"Age {age}: {age_counts[age]} occurrences")

# Extract infos from the dictionary
recommendation_types = []
locations = []
for persona in persona_infos.values():
    data = json.loads(persona)
    recommendation_types.append(data['recommendation_type'])
    locations.append(data['location'])

type_counts = Counter(recommendation_types)
print("\nRecommendation Type Counts:")
for type_name, count in type_counts.most_common():
    print(f"{type_name}: {count}")
print(f"\nTotal count: {sum(type_counts.values())}")

type_counts = Counter(locations)
print("\nLocation Counts:")
for type_name, count in type_counts.most_common():
    print(f"{type_name}: {count}")
print(f"\nTotal count: {sum(type_counts.values())}")

Age 13: 2 occurrences
Age 14: 1 occurrences
Age 15: 5 occurrences
Age 16: 10 occurrences
Age 17: 3 occurrences
Age 18: 12 occurrences
Age 19: 7 occurrences
Age 20: 3 occurrences
Age 21: 7 occurrences
Age 22: 7 occurrences
Age 23: 4 occurrences
Age 24: 4 occurrences
Age 25: 7 occurrences
Age 26: 8 occurrences
Age 27: 9 occurrences
Age 28: 7 occurrences
Age 29: 1 occurrences
Age 30: 2 occurrences

Recommendation Type Counts:
jobs_trainings: 55
trainings_only: 26
awareness: 19

Total count: 100

Location Counts:
S√£o Paulo: 12
Recife: 12
Rio de Janeiro: 12
Belo Horizonte: 12
Bras√≠lia: 11
Curitiba: 10
Salvador: 9
Porto Alegre: 8
: 6
Fortaleza: 5
Brazil: 2
Unknown: 1

Total count: 100


In [19]:
# Correct Persona Info Extraction
print("üíº Testing Persona information Extraction Agent...")
print("Reading a sample job file...\n")

filename = f"interviews_{interview_data_version}.json"
interviews_save_path = DATA_INTERVIEWS_DIR / filename
interviews = read_json(interviews_save_path)

filename = f"personas_info_{personas_info_data_version}.json"
personas_save_path = DATA_PERSONAS_INFO_DIR / filename
personas_data = read_json(personas_save_path)

# Get first job file
persona_id = "persona_015"

if persona_id in interviews:
    conversation = interviews[persona_id]

    # Extract
    persona_info = extract_persona_info(activities_domains, skills_domains, conversation)
    personas_data[persona_id] = persona_info.model_dump_json()
    save_json(personas_save_path, personas_data)

    print(persona_info)
    #persona_infos[persona_id] = persona_info.model_dump_json()
    
    # persona_info = extract_persona_info(activities_domains, skills_domains, conversation)
   

üíº Testing Persona information Extraction Agent...
Reading a sample job file...

name='Camila' age=22 location='Fortaleza' recommendation_type='trainings_only' open_to_relocate_for_work=False work_type_preference='onsite' target_domains=['UNKNOWN'] education_level='T√©cnico' years_of_experience=0 skills_domains=['UNKNOWN'] skills={} languages={'Portuguese', 'English'} goals='Learn all basics of live event production: lights, sound, stage setup.'
