In [2]:
import os
import re
import time
import json
import pathlib

from google import genai

from cleanlab_tlm import TLM

from athina_logger.api_key import AthinaApiKey
from athina_logger.inference_logger import InferenceLogger
from athina_logger.exception.custom_exception import CustomException

In [3]:
import os
import sys
sys.path.append(os.path.abspath("../"))

In [4]:
from jd_quantifier import gen_and_log_schema_from_jd

In [5]:
folder_jd_path = pathlib.Path("../data/raw/jd/unv")

# List out names of all the files in the directory
# https://realpython.com/get-all-files-in-directory-python/
filepaths = folder_jd_path.rglob("*")

# Keep only files
filepaths = [f for f in filepaths if f.is_file()]

list(filepaths)

[PosixPath('../data/raw/jd/unv/Climate Change & Environmental Officer.txt'),
 PosixPath('../data/raw/jd/unv/Online Teaching for the Global Citizenship Course.txt'),
 PosixPath('../data/raw/jd/unv/Tình Nguyện Viên Nâng Cao Nhận Thức Về Công Việc Tình Nguyện Tại Liên Hợp Quốc.txt'),
 PosixPath('../data/raw/jd/unv/[Online] Chương Trình Tình Nguyện Liên Hợp Quốc UN Volunteers (UNV) Tuyển Dụng Tình Nguyện Viên Mảng Truyền Thông 2022.txt')]

In [6]:
secret_keys_folder = "../secret_keys/"

# Read the Gemini API key
with open(secret_keys_folder + "gemini_api_key.txt", 'r', encoding='utf-8') as file:
    gemini_api_key = file.read().strip()

In [7]:
# Define the folder containing the prompt templates
prompt_template_folder = "../prompt_templates/"

# List all files in the prompt template folder
prompt_files = os.listdir(prompt_template_folder)

# Filter out files that match the pattern "prompt_template_v*.txt"
prompt_files = [f for f in prompt_files if re.match(r"prompt_template_v\d+\.txt", f)]

# Sort the files by version number in descending order
prompt_files.sort(key=lambda x: int(re.search(r'\d+', x).group()), reverse=True)

# Get the prompt template with the highest version number
latest_prompt_template_file = prompt_files[0]

# Read the content of the latest prompt template file
with open(prompt_template_folder + latest_prompt_template_file, "r", encoding="utf-8") as file:
    prompt_template = file.read().strip()

In [8]:
# Setup the Cleanlab TLM API key
with open(secret_keys_folder + "cleanlab_tlm_api_key.txt", 'r', encoding='utf-8') as file:
    cleanlab_tlm_api_key = file.read().strip()
os.environ["CLEANLAB_TLM_API_KEY"] = cleanlab_tlm_api_key

In [9]:
# Read the Athina API key from the file
with open(secret_keys_folder + "athina_api_key.txt", 'r', encoding='utf-8') as file:
    athina_api_key = file.read().strip()

# Set Athina API Key
AthinaApiKey.set_api_key(athina_api_key)

In [10]:
google_model_lists = [
    "gemini-2.5-pro-exp-03-25",
    "gemini-1.5-pro"
]

In [None]:
# scoring_scale_min = 1
# scoring_scale_max = 5

# # For each models, generate and log the schema on all job descriptions
# for model in google_model_lists:
#     print(f"--- Processing model: {model} ---")
#     for jd_path in filepaths:
#         print(f"  Processing job description file: {jd_path.name}")
#         try:
#             schema = gen_and_log_schema_from_jd(
#                 jd_path=jd_path,
#                 scoring_scale_min=scoring_scale_min,
#                 scoring_scale_max=scoring_scale_max,
#                 google_api_key=gemini_api_key,
#                 model=model,
#                 cleanlab_tlm_api_key=cleanlab_tlm_api_key,
#                 athina_api_key=athina_api_key
#             )
#             print(f"  Generated schema: {schema}")
#         except Exception as e:
#             print(f"  Error processing {jd_path} with model {model}: {e}")

In [13]:
scoring_scale_min = 1
scoring_scale_max = 5

schema = gen_and_log_schema_from_jd(
    jd_path=filepaths[0],
    scoring_scale_min=scoring_scale_min,
    scoring_scale_max=scoring_scale_max,
    google_api_key=gemini_api_key,
    model="gemini-2.5-pro-exp-03-25",
    prompt_template=prompt_template,
    cleanlab_tlm_api_key=cleanlab_tlm_api_key,
    athina_api_key=athina_api_key
)

print(schema)

  Reading job description from: ../data/raw/jd/unv/Climate Change & Environmental Officer.txt
  Generating prompt...
  Initializing Google GenAI client for model: gemini-2.5-pro-exp-03-25
  Calling Google GenAI model...
  Received response in 75.2306 seconds.
  Cleaning response...
  Parsing JSON response...
  Validating schema...
--- Running Validation ---
Found candidate category list under key: 'scoringCategories'
Found candidate weight key: 'weight_percent'
Calculated total weight: 100.0 from 5 items.
--- Validation Successful ---
  Schema validity: True
  Setting up Athina Logger...
  Logging inference to Athina...
  Logging successful.
{'schemaMetadata': {'roleName': 'Climate Change & Environmental Officer (International UN Volunteer Specialist)', 'organizationFocus': "Children's Rights (Education, Health, Protection, WASH, Climate/Environment) - UNICEF", 'screeningPhase': 'Initial Candidate Screening', 'scoringScale': {'min': 1, 'max': 5}, 'version': '1.0', 'description': 'Scori