Define the text (will be used for later):

In [1]:
text = """
getlanded.com | getlanded.etsy.com | getlanded@gmail.com
(With page numbers where you can find more info in the included “Guide to Landing the Job”)
#1. CREATE AN UNFORGETTABLE RESUME
c Focus your resume around your target job /6
c Include keywords from the job posting(s) to prove you’re a great match /10
c Figure out what’s important to the employer /13
c Demonstrate how you can provide value based on what the employer values /16
c Create bullets based on your achievements, not just your duties /19
c Quantify those achievement-based bullets with numbers /23
c Start your bullets with a Success Verb /27
c Make sure your formatting is consistent (fonts, bullets, periods, etc.) /32
c Shorten your LinkedIn URL /32
c Use past and present tense correctly /33
c Avoid third person /33
c Prioritize your information /33
c PROOFREAD your resume!! /33
c Make sure your email address is professional /33
c Include your location: just city and state is best /33
c Create a killer Professional Profile, since it gets read first /34
c Put your job title first, not the company name /37
Quick Tip CHECKLIST
getlanded.com | getlanded.etsy.com | getlanded@gmail.com
#2. ENSURE IT ACTUALLY GETS SEEN
c Apply to jobs you are at least an 80% match for /41
c Include keywords from the job posting so your resume actually gets chosen /10
c Apply using less popular job sites to maximize your chances of standing out /41
c If you apply using Indeed, use your own uploaded resume, not their version /42
c Fill out every single field of the job application /42
c When you upload to job postings, make sure the resume they spit back at you is
correct, or copy/paste yourself from your Word/Pages file /42
c Upload to job postings as a PDF file, NOT a Word/Pages file /42
c Create a unique cover letter to entice the reader to read your resume /43
c Spend HALF your time networking! Get referred to the job by someone you
know, or connect with an actual person at the company /47
Quick Tip CHECKLIST
"""

In [7]:
import os
from langchain.graphs import Neo4jGraph
from langchain.vectorstores.neo4j_vector import Neo4jVector
from langchain_openai.embeddings import OpenAIEmbeddings
from dotenv import load_dotenv
from langchain_experimental.graph_transformers import LLMGraphTransformer
from langchain_core.documents import Document
from langchain.agents import create_react_agent, AgentExecutor, Tool
from langchain import hub
from langchain.chains import RetrievalQA
from langchain_openai import ChatOpenAI
from langchain.chains import GraphCypherQAChain
load_dotenv()

True

# 1. Neo4j Intro: Manual Graph Creation

Here, we will use langchain's neo4j interface to add nodes, relationships, and clear the database.

In [8]:
# Class that can write nodes, relationships, and clear the graph
class Neo4jDatabase:
    def __init__(self):
        self.graph = Neo4jGraph()
        
    def clear_database(self):
        self.graph.query("MATCH (n) DETACH DELETE n")
    
    def add_nodes_to_graph(self, nodes):
        for node in nodes:
            self.graph.query(
                "CREATE (n:Person {id: $id, name: $name})",
                {"id": node['id'], "name": node['name']}
            )
        
    def add_relationship_to_graph(self, relationships):
        for relationship in relationships:
            self.graph.query(
                """
                MATCH (a:Person {id: $start_id}), (b:Person {id: $end_id})
                CREATE (a)-[:RELATIONSHIP_TYPE {type: $type, since: $since, strength: $strength}]->(b)
                """,
                {
                    "start_id": relationship['start_id'],
                    "end_id": relationship['end_id'],
                    "type": relationship['type'],
                    "since": relationship['since'],
                    "strength": relationship['strength']
                }
            )

In [9]:
# Init the database
neo4j_db = Neo4jDatabase()

# Clear the database
neo4j_db.clear_database()

# Add nodes to the graph
nodes = [
    {"id": 1, "name": "Alice"},
    {"id": 2, "name": "Bob"},
    {"id": 3, "name": "Charlie"}
]
neo4j_db.add_nodes_to_graph(nodes)
print("Nodes added successfully!")

# Add relationships to the graph
relationships = [
    {"start_id": 1, "end_id": 2, "type": "KNOWS", "since": "2020", "strength": "high"},
    {"start_id": 2, "end_id": 3, "type": "FRIENDS_WITH", "since": "2019", "strength": "medium"}
]
neo4j_db.add_relationship_to_graph(relationships)
print("Relationships added successfully!")

Nodes added successfully!
Relationships added successfully!


# 2. LLM-Based Graph Creation from Documents

Here we will automatically create graph database (in neo4j) from a document. Then, we will filter the nodes and relationships in langchain.

In [10]:
# Init LLM and a transformer to create Cypher query
llm = ChatOpenAI()
llm_transformer = LLMGraphTransformer(llm=llm)

# Read the documents and convert to graph docs
documents = [Document(page_content=text)]
graph_documents = llm_transformer.convert_to_graph_documents(documents)

# Print created nodes
print("Unfiltered nodes:")
for node in graph_documents[0].nodes:
    print(node)
print("\n")
    
# Filter the nodes with another LLMGraphTransformer
llm_transformer_filtered = LLMGraphTransformer(
    llm=llm,
    allowed_nodes=["Resume tip", "Job advice"],
    allowed_relationships=["JOB_APPLICATION_TIP", "RESUME_TIP"]
)
graph_documents_filtered = llm_transformer_filtered.convert_to_graph_documents(documents)

# Print filtered nodes
print("Filtered nodes:")
for node in graph_documents_filtered[0].nodes:
    print(node)
print("\n")

# Upload graph to Neo4j
graph = Neo4jGraph()
graph.query("MATCH (n) DETACH DELETE n")
graph.add_graph_documents(graph_documents)
print("Successfully uploaded!")
# Now, we created a graph, but it has no semantic understanding yet

Unfiltered nodes:
id='Resume' type='Document'
id='Email' type='Communication'
id='Linkedin' type='Platform'
id='Professional Profile' type='Information'
id='Location' type='Information'
id='Cover Letter' type='Document'


Filtered nodes:
id='Create_An_Unforgettable_Resume' type='Resume tip'
id='Ensure_It_Actually_Gets_Seen' type='Resume tip'


Successfully uploaded!


In [11]:
# Create vector database (index) for certain node properties (creates embeddings and uploads it to the online database)
# This will help user-query when using an LLM over the graph-database
vector_index = Neo4jVector.from_existing_graph(
    OpenAIEmbeddings(model="text-embedding-ada-002"),
    node_label="Information",
    text_node_properties=['id'],
    embedding_node_property='info', # Node attribute where it will be saved
    index_name="vector3"  # Watch out that the same index needs to have the same dimension (myb use the same model for the same index)
)  # This returns vector index (which has no awareness of graph structure; it's simply a vector embedding of a dict of 'text_node_properties')



### 2.1. Similarity Search

Here we will perform a similarity search between a **query** and an **embedding**. This is not yet RAG, as we are not actually using an LLM for generation (the output of this would usually be a context that would go to the LLM in a standard RAG application).

In [12]:
# Perform similarity search (on the vector index)
response = vector_index.similarity_search(
    "What info is good?"
)
print("Information: ", [r.page_content.split(':')[1].strip() for r in response])

# Perform Cypher query (on graph database)
response = graph.query(
    "MATCH (t:Platform) RETURN count(*)"
)
print("Platforms: ", response)

Information:  ['Professional Profile', 'Location']
Platforms:  [{'count(*)': 1}]


### 2.2. RAG (for vector-index chain)


Here we will perform RAG. For that, we need to init a new LLM to do it. Here the output (that we saw previously) is used as a context for a new pass to the LLM. **Keep in mind that this only has access to the index, not the graph database structure!!!**

In [14]:
# QA-chain (for QA retrieval)
vector_qa = RetrievalQA.from_chain_type(
    llm=ChatOpenAI(),
    chain_type="stuff",
    retriever=vector_index.as_retriever()
)

# Perform RAG (on the vector QA chain for retrieval)
response = vector_qa.invoke(
    "What platforms can I use for job search based on the knowledge graph?"
)
print("Answer: ", response)

Answer:  {'query': 'What platforms can I use for job search based on the knowledge graph?', 'result': 'Based on the knowledge graph, you can use platforms like LinkedIn, Indeed, Glassdoor, Monster, and CareerBuilder for job searches.'}


Another way to create a vector-QA chain (this is more novel):

In [None]:
from langchain.chains.combine_documents import create_stuff_documents_chain
from langchain.chains import create_retrieval_chain
from langchain import hub

# Get the prompt for retrieval creation
retrieval_qa_chat_prompt = hub.pull("langchain-ai/retrieval-qa-chat")

# Create the chain
combine_docs_chain = create_stuff_documents_chain(
    ChatOpenAI(), retrieval_qa_chat_prompt
)
vector_qa = create_retrieval_chain(retriever=vector_index.as_retriever(), combine_docs_chain=combine_docs_chain)

# Perform RAG (on the vector QA chain for retrieval)
response = vector_qa.invoke({"input":
    "What platforms can I use for job search based on the knowledge graph?"
})
print("Answer: ", response)

# 2.3. LLM-Cypher Chain

Here we will create another chain. This one will only have access to the graph database structure.

In [15]:
# Updates the schema to get the latest schema version inside the local cache
graph.refresh_schema()

# Init the chain (this will first perform a Cypher query, and then after that it will perform the QA query)
# Take into account that this is a chain, not an agent
cypher_chain = GraphCypherQAChain.from_llm(
    cypher_llm = ChatOpenAI(temperature=0, model_name='gpt-4'), # Translates the user-query to Cypher query (use more advanced model for this)
    qa_llm = ChatOpenAI(temperature=0), graph=graph, verbose=True,  # Answers the question using an LLM whose context is the output of the previous Cypher query
)

In [17]:
# Run the chain
answer = cypher_chain.invoke(
    "What platforms exist?"
)
print("ANSWER 1", answer)



[1m> Entering new GraphCypherQAChain chain...[0m
Generated Cypher:
[32;1m[1;3mMATCH (p:Platform) RETURN p.id[0m
Full Context:
[32;1m[1;3m[{'p.id': 'Linkedin'}][0m

[1m> Finished chain.[0m
ANSWER 1 {'query': 'What platforms exist?', 'result': 'Linkedin'}


In [18]:
# Run the chain
answer = cypher_chain.invoke(
    "How many information tips are there?"
)
print("ANSWER 2: ", answer)



[1m> Entering new GraphCypherQAChain chain...[0m
Generated Cypher:
[32;1m[1;3mMATCH (d:Document)-[:CONTENT]->(i:Information) RETURN COUNT(i)[0m
Full Context:
[32;1m[1;3m[{'COUNT(i)': 2}][0m

[1m> Finished chain.[0m
ANSWER 2:  {'query': 'How many information tips are there?', 'result': 'There are 2 information tips.'}


## 2.4. LLM Agent with Cypher Tool (Final Product of KG-based RAG)

Now we will combine the last two chains here. We will create an **agent** that has index both to the graph database chain (for structure) and to the vector-index (for the embeddings).

In [22]:
# Init Cypher tools
tools = [
    Tool(
        name="Information",
        func=vector_qa.invoke,
        description="""Useful when you need to answer questions about descriptions of tasks.
        Not useful for counting the number of tasks.
        Use full question as input.
        """,
        handle_tool_error=True  # Good to have it on, in case there is an error, it will be handled by the agent (the tool will signal the agent instead of raising an exception)
    ),
    Tool(
        name="Graph",
        func=cypher_chain.invoke,
        description="""Useful when you need to answer questions about microservices,
        their dependencies or assigned people. Also useful for any sort of 
        aggregation like counting the number of tasks, etc.
        Use full question as input.
        """,
        handle_tool_error=True
    ),
]
# tools = [vector_qa.run, cypher_chain.run]

# Prompt
prompt_agent = hub.pull("hwchase17/react")
llm = ChatOpenAI(temperature=0.2, model_name='gpt-4')

# Init the agent
agent = create_react_agent(
    llm,
    tools,
    prompt_agent
)
agent_executor = AgentExecutor(
    agent=agent,
    tools=tools
)

In [23]:
# User query 1
query = "What platforms exist?"
response = agent_executor.invoke({"input": query})
print(response)



[1m> Entering new GraphCypherQAChain chain...[0m
Generated Cypher:
[32;1m[1;3mMATCH (p:Platform) RETURN p.id[0m
Full Context:
[32;1m[1;3m[{'p.id': 'Linkedin'}][0m

[1m> Finished chain.[0m
{'input': 'What platforms exist?', 'output': 'The platform that exists is Linkedin.'}


In [24]:
# User query 2
query = "How many information tips are there?"
response = agent_executor.invoke({"input": query})
print(response)



[1m> Entering new GraphCypherQAChain chain...[0m
Generated Cypher:
[32;1m[1;3mMATCH (d:Document)-[:CONTENT]->(i:Information) RETURN COUNT(i)[0m
Full Context:
[32;1m[1;3m[{'COUNT(i)': 2}][0m

[1m> Finished chain.[0m
{'input': 'How many information tips are there?', 'output': 'There are 2 information tips available.'}
