In [17]:
import os
from typing import TypedDict, Annotated, List


In [18]:
# from dotenv import load_dotenv
from langchain_openai import ChatOpenAI
from langchain_core.prompts import ChatPromptTemplate
from langchain.agents.output_parsers import OpenAIFunctionsAgentOutputParser
from langchain.agents import AgentExecutor, create_openai_functions_agent
from langchain_core.pydantic_v1 import BaseModel
from langchain_community.document_loaders import PyPDFLoader
from langchain_text_splitters import MarkdownHeaderTextSplitter, RecursiveCharacterTextSplitter
from langchain_community.document_loaders import UnstructuredMarkdownLoader
from langchain_core.documents import Document
from langchain_community.document_loaders import TextLoader
from langchain.embeddings.openai import OpenAIEmbeddings
from langchain.vectorstores import Chroma


from langgraph.checkpoint.sqlite import SqliteSaver
from langgraph.graph import StateGraph, END

from langchain import hub

In [19]:
%load_ext autoreload
%autoreload 2

The autoreload extension is already loaded. To reload it, use:
  %reload_ext autoreload


In [20]:
from tavily import TavilyClient

In [21]:
from IPython.display import Markdown, display

In [22]:
from nodes import *

In [23]:
RESUME_PATH = "./data/Resume.md"
PERSISTENT_DIRECTORY = './docs/'


In [24]:
headers_to_split_on = [
    ("#", "Section"),
    ("##", "Job position"),
]
markdown_splitter = MarkdownHeaderTextSplitter(headers_to_split_on=headers_to_split_on, 
                                               strip_headers=True, 
                                            #    return_each_line=True
                                               )


# separators = ['- ', '\n\n', '\n', ' ', '']
# chunk_size = 250
# chunk_overlap = 30
# text_splitter = RecursiveCharacterTextSplitter(
#     separators=separators,
#     chunk_size=chunk_size, 
#     chunk_overlap=chunk_overlap
# )


In [25]:
loader = TextLoader(RESUME_PATH)
docs = loader.load()
txt = ' '.join([d.page_content for d in docs])

In [26]:
# splitted_docs = text_splitter.split_documents(markdown_splitter.split_text(txt))
splitted_docs = markdown_splitter.split_text(txt)


In [27]:
splitted_docs

[Document(page_content='Jakub Drdak  \n+420 728 533 859  \njak.drd@gmail.com  \nlinkedin.com/in/jakub-drdak'),
 Document(metadata={'Section': 'Bio'}, page_content='Machine Learning and AI Specialist with 6+ years of experience having driven advancements in advertising and recommender systems at Seznam.cz, a leading Czech tech company. I’ve led research teams, tackled large-scale data projects, and initiated and driven complex strategic initiatives. I’m also a seasoned speaker and event organiser, with a few co-authored papers in top conferences such as RECSYS and SIGIR. I highly value and foster teamwork and cross-functional collaboration and enjoy seeking ways to present complex problems and findings in a clear and understandable manner for everyone.'),
 Document(metadata={'Section': 'Experience', 'Job position': 'Study and Travel'}, page_content='- Language Proficiency: Traveled and studied to achieve proficiency in English, enabling effective collaboration in international teams. En

In [28]:
from langchain.retrievers.self_query.base import SelfQueryRetriever
from langchain.chains.query_constructor.base import AttributeInfo

In [29]:
model = ChatOpenAI(model='gpt-4o-mini', temperature=0)


In [30]:
# https://learn.deeplearning.ai/courses/langchain-chat-with-your-data/lesson/5/retrieval
metadata_field_info = [
    AttributeInfo(
        name="Section",
        description="The section of the resume",
        type="string",
    ),
    AttributeInfo(
        name="Job Position",
        description="The job position",
        type="string",
    ),
]

In [31]:
embedding_function = OpenAIEmbeddings(model='text-embedding-3-small')


In [32]:
vectordb = Chroma.from_documents(documents=splitted_docs, persist_directory=PERSISTENT_DIRECTORY, embedding=embedding_function)


In [33]:
document_content_description = 'Resume'
retriever = SelfQueryRetriever.from_llm(
    model,
    vectordb,
    document_content_description,
    metadata_field_info,
    verbose=True
)

In [34]:
# retriever = vectordb.as_retriever()

In [35]:
from langchain.tools.retriever import create_retriever_tool


In [36]:
tool = create_retriever_tool(retriever, 
                      "lookup-the-resume",
                      "Look up and return passages from the resume")
tools = [tool]

In [37]:
# from langchain.memory import ConversationBufferMemory
# memory_QA = ConversationBufferMemory(
#     memory_key="chat_history",
#     return_messages=True
# )

In [38]:
# from langchain.chains import ConversationalRetrievalChain

In [39]:
memory = SqliteSaver.from_conn_string(":memory:")

In [40]:
file = open("./data/job_description.txt", "r")
content = file.read()

agent_state = AgentState()
agent_state['job_description'] = content 

In [41]:
agent_state['model'] = model

In [42]:
requirements = job_description_analyst_node(agent_state)

In [43]:
# class RequirementFulfilled(BaseModel):
#     fulfilled: bool
    
# retriever=vectordb.as_retriever()
# # TODO deprecated https://python.langchain.com/v0.1/docs/use_cases/question_answering/chat_history/
# qa = ConversationalRetrievalChain.from_llm(
#     model, 
#     # model.with_structured_output(RequirementFulfilled),
#     retriever=retriever,
#     memory=memory_QA
# )

In [44]:
agent_state['tools'] = tools
agent_state['job_requirements'] = requirements['job_requirements']
requirements_checker_node(agent_state)
        



[1m> Entering new AgentExecutor chain...[0m
[32;1m[1;3m
Invoking: `lookup-the-resume` with `{'query': 'Doctorate in Data Science, Mathematics, Statistics, Econometrics, Economics, Operations Research, Computer Science, or related field'}`


[0m[36;1m[1;3m- Master’s degree in Knowledge Engineering
- Bachelor’s degree in Information Systems and Management

- Master’s degree in Knowledge Engineering
- Bachelor’s degree in Information Systems and Management

- Master’s degree in Knowledge Engineering
- Bachelor’s degree in Information Systems and Management

- Master’s degree in Knowledge Engineering
- Bachelor’s degree in Information Systems and Management[0m[32;1m[1;3mNEED MORE INFO: Do you have any additional qualifications or certifications related to Data Science, Mathematics, Statistics, Econometrics, Economics, Operations Research, or Computer Science?[0m

[1m> Finished chain.[0m
<<<<<<<>>>>>>>


[1m> Entering new AgentExecutor chain...[0m
[32;1m[1;3m
Invoking: `l