In [3]:
!pip install strands-agents[mistral] python-dotenv dictdiffer

Collecting python-dotenv
  Downloading python_dotenv-1.1.1-py3-none-any.whl.metadata (24 kB)
Collecting dictdiffer
  Downloading dictdiffer-0.9.0-py2.py3-none-any.whl.metadata (4.8 kB)
Collecting strands-agents[mistral]
  Downloading strands_agents-1.13.0-py3-none-any.whl.metadata (13 kB)
Collecting docstring-parser<1.0,>=0.15 (from strands-agents[mistral])
  Downloading docstring_parser-0.17.0-py3-none-any.whl.metadata (3.5 kB)
Collecting mcp<2.0.0,>=1.11.0 (from strands-agents[mistral])
  Downloading mcp-1.18.0-py3-none-any.whl.metadata (80 kB)
Collecting opentelemetry-api<2.0.0,>=1.30.0 (from strands-agents[mistral])
  Downloading opentelemetry_api-1.38.0-py3-none-any.whl.metadata (1.5 kB)
Collecting opentelemetry-instrumentation-threading<1.00b0,>=0.51b0 (from strands-agents[mistral])
  Downloading opentelemetry_instrumentation_threading-0.59b0-py3-none-any.whl.metadata (2.1 kB)
Collecting opentelemetry-sdk<2.0.0,>=1.30.0 (from strands-agents[mistral])
  Downloading opentelemetry_s

In [1]:
import json
import os
import sys
import dotenv
import boto3
import requests
from collections import Counter
import yaml

import copy

from datetime import datetime

import pprint

import yaml
from pathlib import Path
from typing import Dict, List, Optional, Tuple, TypeVar
from tqdm import tqdm

# Pydantic for structured data
from pydantic import BaseModel, Field

# AWS authentication
from botocore.auth import SigV4Auth
from botocore.awsrequest import AWSRequest

sys.path.append('..')

from src.utils import (
    save_json,
    read_json,
    load_file_content,
    get_job_paths,
    get_training_paths,
    sanity_check,
    chat_with_persona,
    validate_submission_format,
    track_api_call,  # Cost tracking from utils
    print_cost_summary,  # Cost summary from utils
    reset_cost_tracker  # Reset cost tracker from utils
)

from src.my_utils import (
    display_markdown_file,
    call_mistral,
    get_agent,
    batch_extract
)

from src.models.persona_info import PersonaInfo
from src.models.job_info import JobInfo
from src.models.training_info import TrainingInfo
from src.models.generic_models import (
    BooleanModel,
    BooleanModelWithRationale,
    ListOfIds
)

from src.prompts.find_training_matches_prompt import (
    FIND_TRAINING_MATCHES_PROMPT,
    FIND_TRAINING_MATCHES_PROMPT_BY_NAME,
    CHECK_PERSONA_TRAINING_MATCH,
    FIND_TRAINING_MATCHES_FOR_JOB_PROMPT
)

from src.prompts.find_job_matches_prompt import (
    FIND_JOB_MATCHES_PROMPT
)

In [2]:
DATA_PERSONAS_INFO_DIR = Path('../data_personas_info')

In [3]:
with open("../src/config.yaml", "r") as f:
    config = yaml.safe_load(f)

personas_info_data_version = config["personas_info_data_version"]
print(f"personas_info_data_version version: {personas_info_data_version}")

personas_info_data_version version: v10


In [4]:
# Load Personas data
filename = f"last_final_personas_info_{personas_info_data_version}.json"
personas_save_path = DATA_PERSONAS_INFO_DIR / filename

personas_data = read_json(personas_save_path)

# Convert to PersonaInfo objects
personas = {
    pid: PersonaInfo.model_validate_json(data)
    for pid, data in personas_data.items()
}

print(f"✅ Loaded {len(personas)} personas")
print("\n" + "="*50)

✅ Loaded 100 personas



In [5]:
print("Personas with empty proposed_job_ids")
for id in personas:
    persona = personas[id]
    if persona.recommendation_type != 'jobs_trainings':
        continue

    if len(persona.proposed_job_ids) == 0:
        print(f"{id} : proposed_job_ids is empty")

print("\nPersonas with empty hard_filtered_jobs_ids")
for id in personas:
    persona = personas[id]
    if persona.recommendation_type != 'jobs_trainings':
        continue

    if len(persona.hard_filtered_jobs_ids) == 0:
        print(f"{id} : hard_filtered_jobs_ids is empty")

Personas with empty proposed_job_ids
persona_069 : proposed_job_ids is empty
persona_077 : proposed_job_ids is empty
persona_079 : proposed_job_ids is empty

Personas with empty hard_filtered_jobs_ids
persona_069 : hard_filtered_jobs_ids is empty
persona_077 : hard_filtered_jobs_ids is empty
persona_079 : hard_filtered_jobs_ids is empty


In [15]:
print("Personas with empty skills")
for id in personas:
    persona = personas[id]
    if persona.recommendation_type != 'trainings_only':
        continue
    
    if len(persona.skills) == 0:
        print(f"{id} : skills is empty")


Personas with empty skills
