In [3]:
!pip install strands-agents[mistral] python-dotenv dictdiffer

Collecting python-dotenv
  Downloading python_dotenv-1.1.1-py3-none-any.whl.metadata (24 kB)
Collecting dictdiffer
  Downloading dictdiffer-0.9.0-py2.py3-none-any.whl.metadata (4.8 kB)
Collecting strands-agents[mistral]
  Downloading strands_agents-1.13.0-py3-none-any.whl.metadata (13 kB)
Collecting docstring-parser<1.0,>=0.15 (from strands-agents[mistral])
  Downloading docstring_parser-0.17.0-py3-none-any.whl.metadata (3.5 kB)
Collecting mcp<2.0.0,>=1.11.0 (from strands-agents[mistral])
  Downloading mcp-1.19.0-py3-none-any.whl.metadata (85 kB)
Collecting opentelemetry-api<2.0.0,>=1.30.0 (from strands-agents[mistral])
  Downloading opentelemetry_api-1.38.0-py3-none-any.whl.metadata (1.5 kB)
Collecting opentelemetry-instrumentation-threading<1.00b0,>=0.51b0 (from strands-agents[mistral])
  Downloading opentelemetry_instrumentation_threading-0.59b0-py3-none-any.whl.metadata (2.1 kB)
Collecting opentelemetry-sdk<2.0.0,>=1.30.0 (from strands-agents[mistral])
  Downloading opentelemetry_s

In [15]:
import json
import os
import sys
import dotenv
import boto3
import requests
from collections import Counter
import yaml

import copy

from datetime import datetime

import pprint

import yaml
from pathlib import Path
from typing import Dict, List, Optional, Tuple, TypeVar
from tqdm import tqdm

# Pydantic for structured data
from pydantic import BaseModel, Field

# AWS authentication
from botocore.auth import SigV4Auth
from botocore.awsrequest import AWSRequest

sys.path.append('..')

from src.utils import (
    save_json,
    read_json,
    load_file_content,
    get_job_paths,
    get_training_paths,
    sanity_check,
    chat_with_persona,
    validate_submission_format,
    track_api_call,  # Cost tracking from utils
    print_cost_summary,  # Cost summary from utils
    reset_cost_tracker  # Reset cost tracker from utils
)

from src.my_utils import (
    display_markdown_file,
    call_mistral,
    get_agent,
    batch_extract,
    compute_stat_for_multi_items
)

from src.models.persona_info import PersonaInfo
from src.models.job_info import JobInfo
from src.models.training_info import TrainingInfo
from src.models.generic_models import (
    BooleanModel,
    BooleanModelWithRationale,
    ListOfIds
)

from src.prompts.find_training_matches_prompt import (
    FIND_TRAINING_MATCHES_PROMPT,
    FIND_TRAINING_MATCHES_PROMPT_BY_NAME,
    CHECK_PERSONA_TRAINING_MATCH,
    FIND_TRAINING_MATCHES_FOR_JOB_PROMPT
)

from src.prompts.find_job_matches_prompt import (
    FIND_JOB_MATCHES_PROMPT
)

In [16]:
DATA_JOBS_DIR = Path('../data_jobs')

In [17]:
with open("../src/config.yaml", "r") as f:
    config = yaml.safe_load(f)

job_data_version = config["job_data_version"]
print(f"job_data_version version: {job_data_version}")

job_data_version version: v4


In [18]:
# Load Jobs data
filename = f"final_jobs_{job_data_version}.json"
#filename = f"final_jobs_{job_data_version}.json"
jobs_save_path = DATA_JOBS_DIR / filename

jobs_data = read_json(jobs_save_path)

# Convert to JobInfo objects
jobs_info = {
    job_id: JobInfo.model_validate_json(data)
    for job_id, data in jobs_data.items()
}

print(f"✅ Loaded {len(jobs_info)} jobs")
print("\n" + "="*50)

✅ Loaded 200 jobs



In [19]:
def compute_job_data_stastistics(jobs_data):
    # Extract infos from the dictionary
    work_types = []
    education_level_requireds = []
    years_of_experience_requireds = []
    for job_data in jobs_data.values():
        data = json.loads(job_data)
        work_types.append(data['work_type'])
        education_level_requireds.append(data['education_level_required'])
        years_of_experience_requireds.append(data['years_of_experience_required'])

    type_counts = Counter(work_types)
    print("\n" + "="*50)
    print("\nwork_types Counts:")
    for type_name, count in type_counts.most_common():
        print(f"{type_name}: {count}")
    print(f"\nTotal count: {sum(type_counts.values())}")

    type_counts = Counter(education_level_requireds)
    print("\n" + "="*50)
    print("\neducation_level_requireds Counts:")
    for type_name, count in type_counts.most_common():
        print(f"{type_name}: {count}")
    print(f"\nTotal count: {sum(type_counts.values())}")

    type_counts = Counter(years_of_experience_requireds)
    print("\n" + "="*50)
    print("\nyears_of_experience_requireds Counts:")
    for type_name, count in type_counts.most_common():
        print(f"{type_name}: {count}")
    print(f"\nTotal count: {sum(type_counts.values())}")

    item_counts = compute_stat_for_multi_items(jobs_data, 'required_languages', include_values=False)
    print("\n" + "="*50)
    print("\nRequired Languages Counts:")
    print(f"\nTotal occurrences: {sum(item_counts.values())}")
    print(f"Total unique languages: {len(item_counts)}")
    for item, count in item_counts.most_common():
        print(f"{item}: {count}")

In [20]:
compute_job_data_stastistics(jobs_data)



work_types Counts:
onsite: 158
remote: 42

Total count: 200


education_level_requireds Counts:
Graduação: 81
Técnico: 60
Tecnólogo: 53
Bacharelado: 6

Total count: 200


years_of_experience_requireds Counts:
1: 61
2: 59
0: 46
3: 34

Total count: 200


Required Languages Counts:

Total occurrences: 254
Total unique languages: 2
Portuguese: 200
English: 54


# Display jobs by education level

In [27]:
for job_id in jobs_info:
    job = jobs_info[job_id]
    if job.education_level_required == 'Técnico':
        print("\n---")
        print(job_id)
        print(job.title)
        print(job.domains)
        print(job.job_description)



---
j100
Legal Intern – Negotiation & Research
['Legal Research And Procedural Support']
Legal Intern – Negotiation & Research will conduct legal research on complex cases and regulatory matters, support contract negotiations and client discussions, draft legal documents, briefs, and correspondence, and analyze case law to prepare detailed research summaries.


---
j102
Junior Legal Specialist – Statutory Review
['Legal Research And Procedural Support']
Analyzes and interprets statutory requirements, reviews legislation and regulatory frameworks, and prepares summaries of legal requirements for internal teams. Works closely with senior legal counsel and compliance teams to ensure accurate interpretation of laws and regulations.


---
j106
Junior Legal Analyst – Ethics & Drafting
['Legal Research And Procedural Support']
Junior Legal Analyst – Ethics & Drafting in Curitiba, Brazil, supporting legal research, document drafting, and contract reviews while ensuring ethical compliance and 