https://python.langchain.com/docs/tutorials/rag/
</br>https://python.langchain.com/docs/integrations/text_embedding/
</br>https://platform.openai.com/settings/organization/billing/overview

In [73]:
import json
import sqlite3
from pathlib import Path
import pandas as pd
from langchain.document_loaders import PyPDFLoader, DirectoryLoader
from langchain_core.prompts.chat import PromptTemplate
from langchain_openai import ChatOpenAI
from settings import OPENAI_API_KEY
from types_ import Application, ApplicationRating

In [74]:
def evalueate_applicantion(
        llm: ChatOpenAI,
        prompt: str,
        applicant: Application,
        job_description: str,
    ) -> ApplicationRating:
    """
    Evaluate an applicant's documents against a job description using a language model.
    Args:
        llm (ChatOpenAI): The language model to use for evaluation.
        applicant (Application): The applicant's documents, either a single Document or a list of Documents.
        job_description (str): The job description to evaluate against.
    Returns:
        ApplicationRating: A dictionary containing the evaluation results, including a score and a recommendation.
    """

    prompt = PromptTemplate(
        template = prompt,
        template_format = "jinja2",
        input_variables = ["context", "question"],
    ).invoke({
        "context": applicant,
        "question": job_description,
    })

    answer = llm.invoke(prompt)

    return json.loads(answer.content)

In [76]:
PROMPT_TEMPLATE = Path('/Users/caiopavesi/Code/bmw-job-matcher/data/prompt.jinja').read_text()
APPLICANT_DOCUMENTS_FILES_PATH = Path('/Users/caiopavesi/Library/Mobile Documents/com~apple~CloudDocs/0/Work/Job applications/Templates/Latest')

In [77]:
loader = DirectoryLoader(APPLICANT_DOCUMENTS_FILES_PATH, glob = "*.pdf", loader_cls = PyPDFLoader)
docs_content = loader.load()

In [78]:
llm = ChatOpenAI(
    model = "gpt-4.1",
    temperature = 0,
    api_key = OPENAI_API_KEY
)

In [79]:
filter_query = """SELECT job_portal_id, job_description FROM jobs
WHERE job_portal_id NOT IN
(SELECT job_portal_id FROM matches);"""

In [99]:
insert_query = """INSERT INTO MATCHES (
    job_portal_id,
    match_score,
    match_strengths,
    match_weaknesses,
    match_improvement_points,
    match_conclusions,
    match_possible_interview
) VALUES (?, ?, ?, ?, ?, ?, ?)
"""

In [None]:
with sqlite3.connect('../data/jobs.db') as connection:
    for index, job in pd.read_sql_query(filter_query, connection).iterrows():
        rate = evalueate_applicantion(llm, PROMPT_TEMPLATE, docs_content, job['job_description'])
        query_params = (job['job_portal_id'], *[v for v in rate.values()])
        connection.execute(insert_query, query_params)