https://python.langchain.com/docs/tutorials/rag/
</br>https://python.langchain.com/docs/integrations/text_embedding/
</br>https://platform.openai.com/settings/organization/billing/overview

In [36]:
import json
import sqlite3
from pathlib import Path
import pandas as pd
from langchain.document_loaders import PyPDFLoader, DirectoryLoader
from langchain_core.prompts.chat import PromptTemplate
from langchain_openai import ChatOpenAI
from settings import OPENAI_API_KEY
from types_ import Application, ApplicationRating

In [37]:
def stop_loop(iteration: int, limit: int) -> None:
    """Stops the loop if the iteration count reaches the limit, i dont want infinite loops or waste all my OPENAI credits because of them.
    Args:
        iteration (int): The current iteration count.
        limit (int): The maximum number of iterations allowed.
    Raises:
        StopIteration: If the iteration count reaches the limit."""
    if iteration >= limit - 1:
        raise StopIteration("Reached the limit of iterations.")

In [38]:
def evalueate_applicantion(
        llm: ChatOpenAI,
        prompt: str,
        candidate_application: Application,
        job_description: str,
    ) -> ApplicationRating:
    """
    Evaluate an applicant's documents against a job description using a language model.
    Args:
        llm (ChatOpenAI): The language model to use for evaluation.
        applicant (Application): The applicant's documents, either a single Document or a list of Documents.
        job_description (str): The job description to evaluate against.
    Returns:
        ApplicationRating: A dictionary containing the evaluation results, including a score and a recommendation.
    """

    prompt = PromptTemplate(
        template = prompt,
        template_format = "jinja2",
        input_variables = ["candidate_application", "job_description"],
    ).invoke({
        "candidate_application": candidate_application,
        "job_description": job_description,
    })

    answer = llm.invoke(prompt)

    return json.loads(answer.additional_kwargs['function_call']['arguments'])

In [39]:
PROMPT_TEMPLATE = Path('/Users/caiopavesi/Code/bmw-job-matcher/data/prompt.jinja').read_text()
APPLICANT_DOCUMENTS_FILES_PATH = Path('/Users/caiopavesi/Library/Mobile Documents/com~apple~CloudDocs/0/Work/Job applications/Templates/Latest')

In [40]:
loader = DirectoryLoader(APPLICANT_DOCUMENTS_FILES_PATH, glob = "*.pdf", loader_cls = PyPDFLoader)
docs_content = loader.load()

In [41]:
function_schema = {
    "name": "assess_candidate_fit",
    "description": "Evaluates candidate fit against job description and returns structured assessment.",
    "parameters": {
        "type": "object",
        "properties": {
            "match_score": {"type": "number"},
            "match_strengths": {"type": "string"},
            "match_weaknesses": {"type": "string"},
            "match_improvement_points": {"type": "string"},
            "match_conclusions": {"type": "string"},
            "match_possible_interview": {"type": "boolean"},
        },
        "required": [
            "match_score",
            "match_strengths",
            "match_weaknesses",
            "match_improvement_points",
            "match_conclusions",
            "match_possible_interview"
        ],
    },
}

In [42]:
llm = ChatOpenAI(
    model_name = "gpt-4o",
    temperature = 0.0,
    api_key = OPENAI_API_KEY,
    model_kwargs = {
        "functions": [function_schema],
        "function_call": {"name": "assess_candidate_fit"},
    }
)

In [43]:
FILTER_QUERY = """SELECT job_portal_id, job_description FROM jobs
WHERE job_portal_id NOT IN
(SELECT job_portal_id FROM matches);"""

In [44]:
INSERT_QUERY = """INSERT INTO MATCHES (
    job_portal_id,
    match_score,
    match_strengths,
    match_weaknesses,
    match_improvement_points,
    match_conclusions,
    match_possible_interview
) VALUES (?, ?, ?, ?, ?, ?, ?)
"""

In [45]:
query_search = lambda conn: pd.read_sql_query(FILTER_QUERY, conn).itertuples()
# ? - Creation of a tuple with the values of the job_portal_id and the dictionary of rate
query_params = lambda id, rate: (id, *[v for v in rate.values()])

In [46]:
with sqlite3.connect('../data/jobs.db', autocommit = True) as connection:
    for index, job_id, job_description in query_search(connection):
        rate = evalueate_applicantion(llm, PROMPT_TEMPLATE, docs_content, job_description)
        connection.execute(INSERT_QUERY, query_params(job_id, rate))

        try:
            stop_loop(index, 20)
        except StopIteration as e:
            break