We saw in the last notebook that pushing summary into PGVector removes a lot of information from the text, we'll try to make a collection using original text to see if it does a better job at preserving context.

In [12]:
from langchain_community.document_loaders import TextLoader
from langchain_text_splitters import RecursiveCharacterTextSplitter
from langchain_postgres.vectorstores import PGVector
from langchain_core.runnables import chain
from langchain_core.documents import Document
from langchain_openai import OpenAIEmbeddings, ChatOpenAI
from langchain_core.prompts import ChatPromptTemplate
from langchain_core.output_parsers import StrOutputParser
import uuid

In [3]:
# load the documents

raw_doc = TextLoader('data/HP1.txt', encoding='utf-8').load()
splitter = RecursiveCharacterTextSplitter(chunk_size=2000, chunk_overlap=200)
splitted_doc = splitter.split_documents(raw_doc)

In [5]:
splitted_doc[0]

Document(metadata={'source': 'data/HP1.txt'}, page_content='M r. and Mrs. Dursley, of number four, Privet Drive, were proud to say that they were perfectly normal, thank you very much. They were the last people youâ€™d expect to be involved in anything strange or mysterious, because they just didnâ€™t hold with such nonsense.\n\nMr. Dursley was the director of a firm called Grunnings, which made drills. He was a big, beefy man with hardly any neck, although he did have a very large mustache. Mrs. Dursley was thin and blonde and had nearly twice the usual amount of neck, which came in very useful as she spent so much of her time craning over garden fences, spying on the neighbors. The Dursleys had a small son called Dudley and in their opinion there was no finer boy anywhere.\n\nThe Dursleys had everything they wanted, but they also had a secret, and their greatest fear was that somebody would discover it. They didnâ€™t think they could bear it if anyone found out about the Potters. Mrs

In [7]:
# embed each chunk and insert it into the vector store

embedding_model = OpenAIEmbeddings(model="text-embedding-3-small")
connection = 'postgresql+psycopg://langchain:langchain@localhost:6024/langchain'
collection_name = "Harry_Potter_Complete"

db = PGVector.from_documents(
    documents=splitted_doc,
    embedding=embedding_model,
    collection_name=collection_name,
    connection=connection
)

In [8]:
db.similarity_search("Harry Potter", k=2)

[Document(id='ee02dcc6-f879-44aa-910f-b3075c6f4e55', metadata={'source': 'data/HP1.txt'}, page_content="â€œYeah,â€\x9d said Hagrid in a very muffled voice, â€œIâ€™ll be takinâ€™ Sirius his bike back. Gâ€™night, Professor McGonagall â€” Professor Dumbledore, sir.â€\x9d\n\nWiping his streaming eyes on his jacket sleeve, Hagrid swung himself onto the motorcycle and kicked the engine into life; with a roar it rose into the air and off into the night.\n\nâ€œI shall see you soon, I expect, Professor McGonagall,â€\x9d said Dumbledore, nodding to her. Professor McGonagall blew her nose in reply.\n\nDumbledore turned and walked back down the street. On the corner he stopped and took out the silver Put-Outer. He clicked it once, and twelve balls of light sped back to their street lamps so that Privet Drive glowed suddenly orange and he could make out a tabby cat slinking around the corner at the other end of the street. He could just see the bundle of blankets on the step of number four.\n\nâ€œG

In [10]:
retriever = db.as_retriever()

prompt = ChatPromptTemplate.from_template("""Answer the question based only on the following context : {context} 
    question: {question}
""")

llm = ChatOpenAI(model="gpt-3.5-turbo", temperature=0)

chain = prompt | llm

docs = retriever.invoke("What company does Mr. Dursley work for?")
context = '\n\n'.join(d.page_content for d in docs)

chain.invoke({"context": context, "question": """What company does Mr. Dursley work for?"""})

AIMessage(content='Mr. Dursley works for a firm called Grunnings, which makes drills.', additional_kwargs={'refusal': None}, response_metadata={'token_usage': {'completion_tokens': 18, 'prompt_tokens': 1828, 'total_tokens': 1846, 'completion_tokens_details': {'accepted_prediction_tokens': 0, 'audio_tokens': 0, 'reasoning_tokens': 0, 'rejected_prediction_tokens': 0}, 'prompt_tokens_details': {'audio_tokens': 0, 'cached_tokens': 0}}, 'model_provider': 'openai', 'model_name': 'gpt-3.5-turbo-0125', 'system_fingerprint': None, 'id': 'chatcmpl-ClWoeknsG7JclG2X1wWrcBdL9yeb4', 'service_tier': 'default', 'finish_reason': 'stop', 'logprobs': None}, id='lc_run--eb7d1041-051a-4c7c-a1cc-db13596ed4f3-0', usage_metadata={'input_tokens': 1828, 'output_tokens': 18, 'total_tokens': 1846, 'input_token_details': {'audio': 0, 'cache_read': 0}, 'output_token_details': {'audio': 0, 'reasoning': 0}})

In [29]:
@chain
def summarize_docs(docs):
    res = []
    summarizer_prompt = ChatPromptTemplate.from_template(
        "Summarize this doc while preserving named entities:\n\n{doc}"
    )
    summarizer_llm = ChatOpenAI(model='gpt-3.5-turbo')
    summarizer_chain = summarizer_prompt | summarizer_llm

    for doc in docs:
        doc_id = doc.id
        content = doc.page_content
        summarizer_msg = summarizer_chain.invoke({"doc": content})
        summarizer_text = summarizer_msg.content
        new_doc = Document(page_content=summarizer_text, metadata={"id": doc_id})
        res.append(new_doc)
    
    return res

In [30]:
@chain
def get_answer(question):
    # get relevant docs
    docs = retriever.invoke(question)
    # extract content
    context = '\n\n'.join(d.page_content for d in docs)

    # prepare prompt
    formatted = prompt.invoke({"context": context, "question": question})

    # make llm call
    answer = llm.invoke(formatted)
    answer_text = answer.content if hasattr(answer, "content") else answer

    # summarize context
    context_summary = summarize_docs.invoke(docs)

    return {"answer": answer_text, "context": context_summary}

In [31]:
get_answer.invoke("Who told Professor Dumbledore she liked his new earmuffs?")

{'answer': 'Madam Pomfrey told Professor Dumbledore she liked his new earmuffs.',
 'context': [Document(metadata={'id': 'c274dcc8-47e4-4fdd-885c-139922edac56'}, page_content='The document is a conversation between Professor McGonagall and Dumbledore about using the name Voldemort instead of "You-Know-Who." McGonagall is anxious to discuss rumors about why Voldemort has disappeared and what finally stopped him, but Dumbledore avoids the topic. Dumbledore acknowledges that Voldemort had powers he never will, but McGonagall admires him for being too noble to use them. McGonagall expresses concern about the rumors circulating and is waiting for Dumbledore to confirm the truth.'),
  Document(metadata={'id': 'd1625180-0519-42df-9745-778c6cf91e30'}, page_content='A man uses a magic device called the Put-Outer to extinguish all the streetlights on a deserted street before revealing himself to be Dumbledore. He then interacts with Professor McGonagall, who was briefly disguised as a cat, discus

In [32]:
get_answer.invoke("How many presents did Dudley get for his birthday?")

{'answer': 'Dudley got thirty-nine presents for his birthday.',
 'context': [Document(metadata={'id': 'b2db2913-a51b-4eee-a309-c46d4ec9b6a4'}, page_content='The document describes a scene in the Dursley household where Harry is preparing breakfast while Uncle Vernon and Dudley are present. Dudley is disappointed with the number of presents he received for his birthday and Aunt Petunia tries to calm him down. In the end, Dudley agrees to be satisfied with the number of presents he received. Uncle Vernon finds the situation amusing.'),
  Document(metadata={'id': '06a54161-518d-4e2f-909a-cc1d9bcd2707'}, page_content="Aunt Petunia, Uncle Vernon, Dudley, and Harry are celebrating Dudley's birthday. However, Mrs. Figg, who usually looks after Harry while the Dursleys take Dudley out, has broken her leg and can't watch Harry. Aunt Petunia is angry and worried about what to do with Harry. Uncle Vernon suggests calling Marge or Yvonne, but both options are not feasible. The Dursleys speak about

In [33]:
get_answer.invoke("What did Mrs. Figg’s, the lady who would watch Harry on Dudley’s birthdays, house smell of?")

{'answer': 'Cabbage',
 'context': [Document(metadata={'id': '06a54161-518d-4e2f-909a-cc1d9bcd2707'}, page_content="Aunt Petunia, Uncle Vernon, and Dudley celebrate Dudley's birthday with a lot of gifts. However, when Aunt Petunia receives a call that Mrs. Figg can't take care of Harry, the Dursleys are left with a problem. They consider calling Aunt Marge or Yvonne, but both options are unavailable. Harry is secretly relieved that he won't have to spend his day with Mrs. Figg and her cats."),
  Document(metadata={'id': 'ad77f03d-d0e7-4933-a507-6a16d18a74d0'}, page_content='Harry declines Dudleyâ€™s attempt to shove his head down a toilet before Dudley goes off to buy his school uniform. While Dudley is getting his uniform, Harry spends time at Mrs. Figg’s house, where he watches TV and eats old chocolate cake. When Dudley returns in his extravagant uniform, the Dursleys are impressed, but Harry struggles not to laugh. The next morning, Aunt Petunia is dyeing Dudley’s old clothes gray f

In [34]:
get_answer.invoke("Name Mrs. Figg’s cats")

{'answer': 'Tibbles, Snowy, Mr. Paws, and Tufty',
 'context': [Document(metadata={'id': 'd1625180-0519-42df-9745-778c6cf91e30'}, page_content='A man named Dumbledore used a Put-Outer to extinguish street lights, revealing the presence of a woman named Professor McGonagall, who was disguised as a cat. The two engaged in a conversation about a missed celebration.'),
  Document(metadata={'id': '1683eb5e-c65b-4f36-a93b-e62fc844882b'}, page_content='A tawny owl flies past a window unnoticed by the Dursley family in the morning. Mr. Dursley leaves for work, struggling to say goodbye to his tantrum-throwing son, Dudley. On his way to work, Mr. Dursley sees a cat reading a map, but dismisses it as a trick of the light. Despite the odd sighting, he focuses on his work responsibilities for the day.'),
  Document(metadata={'id': '06a54161-518d-4e2f-909a-cc1d9bcd2707'}, page_content="Aunt Petunia tells Uncle Vernon that Mrs. Figg has broken her leg and can't take Harry on Dudley's birthday outing.

In [35]:
get_answer.invoke("What school was Harry going to attend before he got accepted to Hogwarts?")

{'answer': 'Stonewall High',
 'context': [Document(metadata={'id': '8f80a2e7-b32d-49f7-a066-6daef70a2cc6'}, page_content='The document outlines a list of supplies needed for Hogwarts School of Witchcraft and Wizardry, including uniforms, course books, and other equipment. Hagrid helps Harry navigate through London to purchase these items.'),
  Document(metadata={'id': 'f48f6f6d-51b8-4c49-81bc-b497db190bf5'}, page_content='In the document, Hagrid confronts Vernon Dursley about hiding a letter from Harry, revealing to Harry that he is a wizard and has been accepted to Hogwarts School of Witchcraft and Wizardry. The letter from Hogwarts, signed by Minerva McGonagall, informs Harry of his acceptance and provides a list of necessary books and equipment. Harry is left with questions about what it means for Hogwarts to await his owl.'),
  Document(metadata={'id': '4701cd36-2a03-4189-ad4e-ce8a658c6fd8'}, page_content="Hagrid helps Harry onto the train back to the Dursleys, giving him his Hogwa

In [36]:
get_answer.invoke("What colors were on Dudley’s new Smeltings uniform?")

{'answer': 'Dudley’s new Smeltings uniform was maroon tailcoats, orange knickerbockers, and flat straw hats called boaters.',
 'context': [Document(metadata={'id': 'ad77f03d-d0e7-4933-a507-6a16d18a74d0'}, page_content="Harry refused Dudley's disparaging comments towards him and his appearance and received bleak treatment at Mrs. Figg's house. Harry watched Dudley parade in his new Smeltings uniform, prompting emotional reactions from the Dursleys. Additionally, Aunt Petunia attempted to dye Dudley's old clothes gray to pass off as Harry's new school uniform."),
  Document(metadata={'id': '5b0fcd8d-3ff6-4f35-a772-92858e8053c7'}, page_content="Aunt Petunia is dyeing Dudley's old clothes gray for Harry to wear to school. Harry receives a letter addressed to him, Mr. H. Potter, from Hogwarts School of Witchcraft and Wizardry. The letter is thick and heavy, made of yellow parchment with emerald-green ink. Harry is shocked as no one has ever written to him before."),
  Document(metadata={'id

In [37]:
get_answer.invoke("What color was Weasley jumper that Harry got as a Christmas present?")

{'answer': 'Harry got an emerald green Weasley jumper as a Christmas present.',
 'context': [Document(metadata={'id': 'f8d48142-dc0a-42dd-a18e-c37a2f05e2c5'}, page_content="Harry discovers an invisibility cloak left by his father, which makes him completely invisible when he wears it. A mysterious note reveals that it belonged to his father and is now being returned to him. Harry is unsure of who sent it and feels strange about the cloak's origins. He hides it from his friends, Fred and George, who give him a Christmas sweater."),
  Document(metadata={'id': '46dbb50c-5b60-4c56-b426-c91ddaa74789'}, page_content='Ron, George, Percy, and Harry are spending Christmas together. Ron is forced to wear a maroon sweater, while Percy is given a sweater with a "P for prefect" on it. The twins, Gred and Forge, insist they all wear the sweaters and Percy is dragged away from the prefects\' table to sit with his family. The Christmas dinner is extravagant, with plenty of food and wizard crackers tha

In [38]:
get_answer.invoke("During the sorting ceremony who was the first student to be sorted into Gryffindor?")

{'answer': 'Lavender Brown was the first student to be sorted into Gryffindor.',
 'context': [Document(metadata={'id': '11f169ea-0240-4037-8201-0d7240788af2'}, page_content='Professor McGonagall led Harry and the other first years to a small chamber before the start-of-term banquet at Hogwarts. She explained that they would be sorted into the houses of Gryffindor, Hufflepuff, Ravenclaw, and Slytherin, where they would spend their time at the school. Each house competes for points throughout the year, with the house cup awarded to the one with the most points. The Sorting Ceremony would take place shortly, and the students were advised to look presentable. Harry, Ron, and Neville nervously waited to be sorted into their respective houses, unsure of the process.'),
  Document(metadata={'id': '9f6c9e93-e836-4323-be4c-a608303d8c34'}, page_content="The document describes the beginning of the Sorting Ceremony at Hogwarts, where Professor McGonagall leads the first years into the Great Hall l

In [39]:
get_answer.invoke("What two interesting phenomenon does the Muggle news report on the day the wizarding world is celebrating Voldermort’s downfall?")

{'answer': "The Muggle news reports on flocks of owls and shooting stars on the day the wizarding world is celebrating Voldemort's downfall.",
 'context': [Document(metadata={'id': 'cb5d9cac-4de1-4ee2-b594-21da58fba8e2'}, page_content='The document features a conversation between Professor McGonagall and Dumbledore about recent events, including the disappearance of You-Know-Who. McGonagall is frustrated that people are being careless and sharing rumors, while Dumbledore remains calm. They discuss how even Muggles have noticed strange occurrences. McGonagall expresses concern about people discovering their world. Dumbledore reassures her and offers her a lemon drop.'),
  Document(metadata={'id': 'c622e609-b734-413a-a5a0-125730163238'}, page_content="Dumbledore announces a change of decoration at Hogwarts, transforming Slytherin symbols into Gryffindor ones. Harry enjoys the celebration, feeling that life will return to normal next year. Despite their worries, all three friends pass the

In [40]:
get_answer.invoke("Name five things that Hagrid had in his coat pockets when he met Harry for the first time")

{'answer': "1. A grubby little package wrapped up in brown paper\n2. A parchment envelope containing Harry's letter\n3. A list of everything Harry needed for Hogwarts\n4. A handsome, leather-covered book full of wizard photographs\n5. Loads of Chocolate Frogs",
 'context': [Document(metadata={'id': '9c44048c-2261-451d-999c-af2d77ed9495'}, page_content="The document describes Harry's visit to Vault 713 at Gringotts bank, which had no keyhole and was opened by Griphook, a Gringotts goblin. Inside the vault, they find a mysterious package that Hagrid hides in his coat. After a wild cart ride back to the surface, Harry is left with a bag full of money from his vault. Hagrid suggests getting a uniform at Madam Malkin's shop, where Harry meets a friendly witch. He learns that another student is also being fitted for Hogwarts robes."),
  Document(metadata={'id': '8f80a2e7-b32d-49f7-a066-6daef70a2cc6'}, page_content='The document outlines the required equipment and supplies for first-year stud

In [41]:
get_answer.invoke("Name two wizards Harry meets the first time he enters the Leaky Cauldron.")

{'answer': 'Hagrid and Professor Quirrell',
 'context': [Document(metadata={'id': '9c6f1997-988b-47c0-967f-35faaf807c10'}, page_content='Harry and Hagrid navigate a bustling street filled with ordinary Muggles, searching for a hidden magical world. They eventually arrive at the Leaky Cauldron, a mysterious and seemingly invisible pub to most passersby. Hagrid is a familiar figure there, greeted warmly by the regulars as they prepare for their next adventure.'),
  Document(metadata={'id': 'a1536812-3e6d-4b71-928e-ee42a884c0c6'}, page_content="The document describes Harry Potter's return to the Leaky Cauldron, where he is greeted by various individuals including Hagrid, Dedalus Diggle, Doris Crockford, and Professor Quirrell. The bartender recognizes Harry and everyone in the bar seems excited to meet him. Professor Quirrell nervously introduces himself as one of Harry's future teachers at Hogwarts."),
  Document(metadata={'id': 'e8811123-2436-4caf-a2e2-53c6ddbae3e6'}, page_content='Harr

In [42]:
get_answer.invoke("What vault was the sorcerer’s stone stored in in Gringotts?")

{'answer': 'Vault seven hundred and thirteen.',
 'context': [Document(metadata={'id': '3c01812d-51f4-4d99-9944-c527c3a0a2fd'}, page_content='Harry and Hagrid visit a vault filled with gold coins and valuable items that belong to Harry. Hagrid explains the different types of currency and helps Harry gather some of the treasure. They then proceed to vault seven hundred and thirteen, which has no keyhole and is opened by Griphook melting the door.'),
  Document(metadata={'id': '9c44048c-2261-451d-999c-af2d77ed9495'}, page_content="Vault 713, which had no keyhole, was opened by the goblin Griphook who revealed that the door would trap anyone but a goblin who tried to open it. Inside the vault, Harry found a mysterious package that Hagrid pocketed without explanation. After leaving Gringotts with a bag full of money, Hagrid left Harry to get his Hogwarts uniform while he went to the Leaky Cauldron for a drink. Harry entered Madam Malkin's shop where he was greeted by a smiling witch dressed

In [43]:
get_answer.invoke("Where did Harry and Hagrid buy Hedwig?")

{'answer': 'They bought Hedwig at Eeylops Owl Emporium.',
 'context': [Document(metadata={'id': '16abad6c-96ab-4375-9481-5b41cfd7f0ba'}, page_content='The document describes a visit to Hogwarts where Harry Potter and Hagrid purchase school books at Flourish and Blotts. Hagrid almost has to drag Harry away from a book on curses. Hagrid advises Harry not to use magic in the Muggle world and they continue shopping for potion ingredients and a wand. Harry realizes he still needs a birthday present.'),
  Document(metadata={'id': '5d6fd8c0-4f15-4b3f-bd7c-0c1304d723fb'}, page_content='Summary: Hagrid accompanies Harry to get his wand and a birthday present. Hagrid decides to get Harry an owl, which Harry is grateful for. They then go to Ollivanders to get Harry a wand, where they are greeted by a mysterious atmosphere and a soft-spoken voice.'),
  Document(metadata={'id': '9c6f1997-988b-47c0-967f-35faaf807c10'}, page_content="Harry and Hagrid explore an ordinary street filled with Muggle shop

In [44]:
get_answer.invoke("Where did Harry buy his school robes?")

{'answer': "Harry bought his school robes at a shop called Madam Malkin's.",
 'context': [Document(metadata={'id': '8f80a2e7-b32d-49f7-a066-6daef70a2cc6'}, page_content='The document discusses Harry Potter receiving a letter from Hagrid about attending Hogwarts School of Witchcraft and Wizardry. The letter lists the uniform, course books, and other equipment Harry will need, including a wand and a cauldron. Hagrid takes Harry to London to buy the supplies, but struggles with navigating the city.'),
  Document(metadata={'id': '16abad6c-96ab-4375-9481-5b41cfd7f0ba'}, page_content='Harry and Hagrid go shopping for Hogwarts school supplies at Flourish and Blotts and the Apothecary. They see a variety of magical books and potion ingredients, including cursed items. Hagrid reminds Harry not to use magic in the Muggle world except for special circumstances and helps him pick out a wand as a birthday present.'),
  Document(metadata={'id': '7aba7b56-d143-4337-9a8b-1e3b7d64c617'}, page_content="

In [45]:
get_answer.invoke("What wood is Harry’s wand made out of, what is at its core, and how long is it?")

{'answer': "Harry's wand is made out of holly wood, with a core of phoenix feather, and it is eleven inches long.",
 'context': [Document(metadata={'id': '38af9fd6-e9be-493b-9caf-a47006b1071f'}, page_content='Hagrid and Mr. Ollivander discuss wands with Harry Potter, with Ollivander explaining the unique qualities of Ollivander wands. Ollivander measures Harry for a wand and presents him with different options, each with different core materials. Harry tries several wands, but none seem to be the right fit until Ollivander suggests an ebony and unicorn hair wand for him to try.'),
  Document(metadata={'id': 'd71f2f29-d8b5-43cf-9e32-b21cf02806a8'}, page_content="The document describes Harry Potter's visit to Ollivander's wand shop. Mr. Ollivander helps Harry find the perfect wand, which happens to have a phoenix feather that is related to the feather used in Voldemort's wand that gave Harry his scar. Ollivander mentions that the wand chooses the wizard and expects great things from Harr