In [None]:
import os
import re
import time
import json
import math

from google import genai

from cleanlab_tlm import TLM

from athina_logger.api_key import AthinaApiKey
from athina_logger.inference_logger import InferenceLogger
from athina_logger.exception.custom_exception import CustomException

In [None]:
folder_jd_path = "../data/raw/jd/"

# List out names of all the files in the directory
filenames = os.listdir(folder_jd_path)
filenames

In [None]:
job_descriptions = []

# Read the contents of each file
for filename in filenames:
    with open(folder_jd_path + filename, 'r', encoding='utf-8') as file:
        content = file.read()
        job_descriptions.append(content)
        
print(job_descriptions[0])

In [52]:
secret_keys_folder = "../secret_keys/"

# Read the Gemini API key
with open(secret_keys_folder + "gemini_api_key.txt", 'r', encoding='utf-8') as file:
    gemini_api_key = file.read().strip()

In [None]:
# Define the folder containing the prompt templates
prompt_template_folder = "./"

# List all files in the prompt template folder
prompt_files = os.listdir(prompt_template_folder)

# Filter out files that match the pattern "prompt_template_v*.txt"
prompt_files = [f for f in prompt_files if re.match(r"prompt_template_v\d+\.txt", f)]

# Sort the files by version number in descending order
prompt_files.sort(key=lambda x: int(re.search(r'\d+', x).group()), reverse=True)

# Get the prompt template with the highest version number
latest_prompt_template_file = prompt_files[0]

# Read the content of the latest prompt template file
with open(prompt_template_folder + latest_prompt_template_file, "r", encoding="utf-8") as file:
    prompt_template = file.read().strip()

print(prompt_template)

In [54]:
def get_prompt(prompt_template: str, scoring_scale_min: int,
               scoring_scale_max: int, job_description: str):
    """
    Generates a prompt by filling in the provided template with the given parameters.

    Args:
        prompt_template (str): The template string for the prompt.
        scoring_scale_min (int): The minimum value on the scoring scale (e.g., 1).
        scoring_scale_max (int): The maximum value on the scoring scale (e.g., 5).
        screening_phase (str): The phase this schema is for (e.g., "Initial Application/Resume Screening").
        job_description (str): The job description to be included in the prompt.

    Returns:
        str: The generated prompt with the parameters filled in.

    Raises:
        Exception: If the minimum score is not less than the maximum score.
    """
    # Check if the minimum score is less than the maximum score
    if scoring_scale_min >= scoring_scale_max:
        raise Exception("The min score of the scale must be less than the max score.")
    
    # Replace single braces with double braces in the template first
    escaped_template = prompt_template.replace("{", "{{").replace("}", "}}")
    
    # But restore the actual format placeholders
    escaped_template = escaped_template.replace("{{scoring_scale_min}}", "{scoring_scale_min}")
    escaped_template = escaped_template.replace("{{scoring_scale_max}}", "{scoring_scale_max}")
    escaped_template = escaped_template.replace("{{job_description}}", "{job_description}")
    
    # Fill the prompt template with values of parameters
    prompt = escaped_template.format(
        scoring_scale_min=scoring_scale_min,
        scoring_scale_max=scoring_scale_max,
        job_description=job_description
    )
    
    return prompt

In [None]:
scoring_scale_min = 1
scoring_scale_max = 5
sample_job_description = job_descriptions[0]

prompt = get_prompt(prompt_template=prompt_template,
                    scoring_scale_min=scoring_scale_min,
                    scoring_scale_max=scoring_scale_max,
                    job_description=sample_job_description)
print(prompt)

In [None]:
model = 'gemini-2.0-flash'
client = genai.Client(api_key=gemini_api_key)

# Record the start time
start_time = time.perf_counter()

response = client.models.generate_content(
    model='gemini-2.0-flash',
    contents=prompt,
)

# Record the end time
end_time = time.perf_counter()

# Calculate the response time
response_time = end_time - start_time

# Use the response as a JSON string.
print(response.text)
print(f"Response Time: {response_time:.6f} seconds")

In [None]:
clean_json_string = response.text.strip().removeprefix('```json\n').removesuffix('\n```')
clean_json_string

In [None]:
scoring_schema = json.loads(clean_json_string)
scoring_schema

In [None]:
print(type(scoring_schema))

In [60]:
def check_valid_schema(scoring_schema: dict) -> bool:
    """
    Validates a scoring schema dictionary.

    Checks performed:
    1.  Input is a dictionary.
    2.  Attempts to find a list containing dictionary elements (assumed scoring categories).
    3.  Attempts to find a key within those dictionaries representing weight percentage
        (looks for keys containing 'weight' or 'percent' with numeric values).
    4.  Checks if the sum of these numeric weight percentages is close to 100.
    5.  Handles potential missing keys, non-list/dict structures, and non-numeric weights gracefully.

    Args:
        scoring_schema: The dictionary representing the parsed JSON scoring schema.

    Returns:
        True if the schema appears valid (primarily based on weights summing to 100
        and basic structure being identifiable), False otherwise.
    """
    print(f"--- Running Validation ---") # Optional: for debugging

    # 1. Basic Input Type Check
    if not isinstance(scoring_schema, dict):
        print("Validation Failed: Input is not a dictionary.")
        return False
    if not scoring_schema:
        print("Validation Failed: Input dictionary is empty.")
        return False

    category_list = None
    category_list_key = None

    # 2. Find the list containing category dictionaries
    for key, value in scoring_schema.items():
        if isinstance(value, list):
            # Check if the list primarily contains dictionaries
            if value and all(isinstance(item, dict) for item in value):
                 # Assume the first such list found is the one we want
                category_list = value
                category_list_key = key
                print(f"Found candidate category list under key: '{category_list_key}'") # Optional
                break
            elif not value: # It's an empty list
                 # Could be the category list, but empty. Handle later.
                 category_list = value
                 category_list_key = key
                 print(f"Found candidate category list (empty) under key: '{category_list_key}'") # Optional
                 break # Treat empty list as potential category list for now

    if category_list is None:
        print("Validation Failed: Could not find a list containing dictionaries (potential scoring categories).")
        return False

    # Handle case where the identified list is empty
    if not category_list:
        print("Validation Failed: The identified category list is empty. Schema requires categories.")
        # Assuming an empty schema isn't valid if weights are expected.
        # If a schema with 0 categories summing to 0 *is* valid, change this logic.
        return False

    weight_key = None
    # 3. Find the weight percentage key within the first category dictionary
    first_category = category_list[0] # We know the list is not empty here
    if isinstance(first_category, dict):
        for key, value in first_category.items():
            # Heuristic: key name suggests weight AND value is numeric
            key_lower = key.lower()
            if ('weight' in key_lower or 'percent' in key_lower) and isinstance(value, (int, float)):
                weight_key = key
                print(f"Found candidate weight key: '{weight_key}'") # Optional
                break # Assume the first match is the one we want

    if weight_key is None:
        print(f"Validation Failed: Could not find a suitable weight key (containing 'weight' or 'percent' with a numeric value) in the first item of list '{category_list_key}'.")
        return False

    # 4. Calculate the sum of weights
    total_weight = 0.0
    found_items = 0
    for index, item in enumerate(category_list):
        if not isinstance(item, dict):
            print(f"Validation Failed: Item at index {index} in list '{category_list_key}' is not a dictionary.")
            return False

        weight_value = item.get(weight_key) # Use .get() for safety

        if weight_value is None:
            # Treat missing weight key in an item as invalid
            print(f"Validation Failed: Weight key '{weight_key}' missing in item at index {index} of list '{category_list_key}'.")
            return False

        if not isinstance(weight_value, (int, float)):
             # Treat non-numeric weight as invalid
            print(f"Validation Failed: Weight value '{weight_value}' for key '{weight_key}' in item at index {index} is not numeric.")
            return False

        total_weight += float(weight_value) # Convert to float for consistent summation
        found_items += 1

    print(f"Calculated total weight: {total_weight} from {found_items} items.") # Optional

    # 5. Check if the sum is close to 100
    # Use math.isclose for robust floating-point comparison
    # abs_tol=1e-9 is a small tolerance for potential floating point inaccuracies
    is_sum_valid = math.isclose(total_weight, 100.0, abs_tol=1e-9)

    if not is_sum_valid:
        print(f"Validation Failed: Total weight {total_weight} is not equal to 100.")
        return False

    print("--- Validation Successful ---")
    return True

In [None]:
response_validity = check_valid_schema(scoring_schema=scoring_schema)

In [62]:
# Setup the Cleanlab TLM API key
with open(secret_keys_folder + "cleanlab_tlm_api_key.txt", 'r', encoding='utf-8') as file:
    cleanlab_tlm_api_key = file.read().strip()
os.environ["CLEANLAB_TLM_API_KEY"] = cleanlab_tlm_api_key

In [63]:
tlm = TLM(options={"log": ["explanation"]})

In [64]:
tlm_response = tlm.get_trustworthiness_score(prompt, response=str(scoring_schema))

In [None]:
tlm_response

In [66]:
trustworthiness_score = tlm_response['trustworthiness_score']
trustworthiness_explanation = tlm_response['log']['explanation']

In [77]:
# Read the Athina API key from the file
with open(secret_keys_folder + "athina_api_key.txt", 'r', encoding='utf-8') as file:
    athina_api_key = file.read().strip()

# Set Athina API Key
AthinaApiKey.set_api_key(athina_api_key)

In [None]:
try:
  InferenceLogger.log_inference(
      prompt_slug="npo_hr_screening_schema_writer_jd",
      prompt=prompt,
      language_model_id=model,
      response=str(scoring_schema),
      response_time=response_time,
      custom_attributes={
          "response_validity": response_validity,
          "trustworthiness_score": trustworthiness_score,
          "trustworthiness_explanation": trustworthiness_explanation
      }
  )
except Exception as e:
  if isinstance(e, CustomException):
      print(e.status_code)
      print(e.message)
  else:
      print(e)

In [1]:
google_model_lists = [
    "gemini-2.5-pro-exp-03-25",
    "gemini-2.0-flash",
    "gemini-2.0-flash-lite",
    "gemini-1.5-flash",
    "gemini-1.5-flash-8b",
    "gemini-1.5-pro"
]