In [None]:

from langchain_community.document_loaders import PyPDFLoader
from langchain_text_splitters import RecursiveCharacterTextSplitter
from langchain_core.embeddings import OpenAIEmbeddings
from langchain_community.vectorstores import Chroma # vector store
import os

from dotenv import load_dotenv

load_dotenv('../.env')


loader = PyPDFLoader('~/Books/algorithms_to_live_by.pdf')
text_splitter = RecursiveCharacterTextSplitter(chunk_size=1024, chunk_overlap=224)

document = loader.load()
document = text_splitter.split_documents(document)

db = Chroma.from_documents(document, OpenAIEmbeddings(api_key=os.getenv('OPENAI_API_KEY')))

  db = Chroma.from_documents(document, OpenAIEmbeddings(api_key=os.getenv('OPENAI_API_KEY')))


In [3]:
query = 'Who are the authors of the book'
result = db.similarity_search(query=query)
result

[Document(metadata={'producer': 'calibre 3.42.0 [https://calibre-ebook.com]', 'source': '/home/bitcot/Books/algorithms_to_live_by.pdf', 'creator': 'calibre 3.42.0 [https://calibre-ebook.com]', 'title': 'Algorithms to Live By', 'page': 2, 'total_pages': 384, 'page_label': '3', 'creationdate': '2019-07-04T07:03:28+00:00', 'author': 'Brian Christian'}, page_content='Begin\tReading\nTable\tof\tContents\nAbout\tthe\tAuthors\nCopyright\tPage\n\t\nThank\tyou\tfor\tbuying\tthis\nHenry\tHolt\tand\tCompany\tebook.\n\t\nTo\treceive\tspecial\toffers,\tbonus\tcontent,\nand\tinfo\ton\tnew\treleases\tand\tother\tgreat\treads,\nsign\tup\tfor\tour\tnewsletters.\n\t\nOr\tvisit\tus\tonline\tat\nus.macmillan.com/newslettersignup\n\t\nFor\temail\tupdates\ton\tBrian\tChristian,\tclick\t\nhere\n.\nFor\temail\tupdates\ton\tTom\tGriffiths,\tclick\t\nhere\n.'),
 Document(metadata={'creator': 'calibre 3.42.0 [https://calibre-ebook.com]', 'producer': 'calibre 3.42.0 [https://calibre-ebook.com]', 'page_label': '35

In [4]:
result[0]

Document(metadata={'producer': 'calibre 3.42.0 [https://calibre-ebook.com]', 'source': '/home/bitcot/Books/algorithms_to_live_by.pdf', 'creator': 'calibre 3.42.0 [https://calibre-ebook.com]', 'title': 'Algorithms to Live By', 'page': 2, 'total_pages': 384, 'page_label': '3', 'creationdate': '2019-07-04T07:03:28+00:00', 'author': 'Brian Christian'}, page_content='Begin\tReading\nTable\tof\tContents\nAbout\tthe\tAuthors\nCopyright\tPage\n\t\nThank\tyou\tfor\tbuying\tthis\nHenry\tHolt\tand\tCompany\tebook.\n\t\nTo\treceive\tspecial\toffers,\tbonus\tcontent,\nand\tinfo\ton\tnew\treleases\tand\tother\tgreat\treads,\nsign\tup\tfor\tour\tnewsletters.\n\t\nOr\tvisit\tus\tonline\tat\nus.macmillan.com/newslettersignup\n\t\nFor\temail\tupdates\ton\tBrian\tChristian,\tclick\t\nhere\n.\nFor\temail\tupdates\ton\tTom\tGriffiths,\tclick\t\nhere\n.')

In [10]:
# using lanceDB
from langchain_community.vectorstores import LanceDB

emb = OpenAIEmbeddings(api_key=os.getenv('OPENAI_API_KEY'))
lance_db = LanceDB(
    uri='./lancedb',
    embedding=emb
)
vector_store = lance_db.from_documents(document, emb)

In [13]:
resutl = vector_store.similarity_search('summerize the first chapter')
result

[Document(metadata={'producer': 'calibre 3.42.0 [https://calibre-ebook.com]', 'source': '/home/bitcot/Books/algorithms_to_live_by.pdf', 'creator': 'calibre 3.42.0 [https://calibre-ebook.com]', 'title': 'Algorithms to Live By', 'page': 2, 'total_pages': 384, 'page_label': '3', 'creationdate': '2019-07-04T07:03:28+00:00', 'author': 'Brian Christian'}, page_content='Begin\tReading\nTable\tof\tContents\nAbout\tthe\tAuthors\nCopyright\tPage\n\t\nThank\tyou\tfor\tbuying\tthis\nHenry\tHolt\tand\tCompany\tebook.\n\t\nTo\treceive\tspecial\toffers,\tbonus\tcontent,\nand\tinfo\ton\tnew\treleases\tand\tother\tgreat\treads,\nsign\tup\tfor\tour\tnewsletters.\n\t\nOr\tvisit\tus\tonline\tat\nus.macmillan.com/newslettersignup\n\t\nFor\temail\tupdates\ton\tBrian\tChristian,\tclick\t\nhere\n.\nFor\temail\tupdates\ton\tTom\tGriffiths,\tclick\t\nhere\n.'),
 Document(metadata={'creator': 'calibre 3.42.0 [https://calibre-ebook.com]', 'producer': 'calibre 3.42.0 [https://calibre-ebook.com]', 'page_label': '35

## Retriever and Chain
using LLM along with prompt to query vector store - using lancedb 

In [None]:
from langchain_community.llms import OpenAI

