In [9]:
import os
from utils import LLMModels
from langchain_core.prompts import ChatPromptTemplate
from langchain_core.output_parsers import StrOutputParser
from langchain_community.document_loaders import PyMuPDFLoader
from langchain.text_splitter import RecursiveCharacterTextSplitter
from langchain_community.vectorstores import DocArrayInMemorySearch
from langchain_core.runnables import RunnableParallel,RunnablePassthrough


In [3]:
temp_file_path = os.path.join('BooksBuckets','Rich Dad Poor Dad.pdf')


pdf_loader = PyMuPDFLoader(temp_file_path)

text_documents = pdf_loader.load()
text_splitter = RecursiveCharacterTextSplitter(chunk_size=1000,
    chunk_overlap=500)
documents = text_splitter.split_documents(text_documents)

In [24]:
prompt_template = """
Context: {context}

Question: {question}

Instructions: Provide a clear, concise, and accurate answer to the question based on the context. Make sure to consider all relevant details from the context provided.
"""
prompt = ChatPromptTemplate.from_template(prompt_template)
# prompt.format(
#     context = documents,
#     question= "who is Marry?"
# )

In [5]:
given_models = {
    "1": "gpt-3.5-turbo",
    "2": "mixtral:8x7b",
    "3": "llama3.1:8b",
}
model_name = given_models['1']
llm_models = LLMModels(model_name=model_name)
model , embeddings = llm_models.get_llm_model()


In [15]:
vector_db = DocArrayInMemorySearch.from_documents(documents, embeddings)
retriever = vector_db.as_retriever()

In [25]:
# vector_db.similarity_search_with_score(query="who was mike")

In [26]:
# retriever.invoke("who was mike",k=2)

In [27]:
setup = RunnableParallel(
    context = retriever,
    question= RunnablePassthrough()
)
# setup.invoke("who was mike")


In [28]:
parser = StrOutputParser()
# setup_prompt = {
#     "context": retriever,
#     "question": RunnablePassthrough(),
# }

chain = setup | prompt | model | parser
# 
# response_message = chain.invoke({
#     "question" :"How poor work for money?"
# })
response_message = chain.invoke("How poor work for money?")
response_message

'The poor and the middle class work for money by trading their time and skills for a paycheck. They typically rely on traditional employment to earn a living, often working for others in exchange for a salary or wages. This stands in contrast to the rich, who have money work for them through investments and passive income streams.'

In [5]:
response_message

AIMessage(content="Marry is Suannana's sister.", additional_kwargs={'refusal': None}, response_metadata={'token_usage': {'completion_tokens': 9, 'prompt_tokens': 56, 'total_tokens': 65}, 'model_name': 'gpt-3.5-turbo-0125', 'system_fingerprint': None, 'finish_reason': 'stop', 'logprobs': None}, id='run-1db0ca71-63c1-404f-8d7f-1655fe5c2def-0', usage_metadata={'input_tokens': 56, 'output_tokens': 9, 'total_tokens': 65})

In [14]:
response_message.input

AttributeError: 'str' object has no attribute 'input'

In [39]:
documents[0:10]

[Document(metadata={'source': 'BooksBuckets/Rich Dad Poor Dad.pdf', 'file_path': 'BooksBuckets/Rich Dad Poor Dad.pdf', 'page': 1, 'total_pages': 253, 'format': 'PDF 1.4', 'title': 'Rich Dad Poor Dad', 'author': 'Robert T. Kiyosaki', 'subject': '', 'keywords': '', 'creator': 'calibre (5.20.0) [http://calibre-ebook.com]', 'producer': 'calibre (5.20.0) [http://calibre-ebook.com]', 'creationDate': "D:20210619182417+00'00'", 'modDate': "D:20210619182420+00'00'", 'trapped': ''}, page_content='“Rich Dad Poor Dad is a starting point for anyone\nlooking to gain control of their ﬁnancial future.”\n– USA TODAY'),
 Document(metadata={'source': 'BooksBuckets/Rich Dad Poor Dad.pdf', 'file_path': 'BooksBuckets/Rich Dad Poor Dad.pdf', 'page': 3, 'total_pages': 253, 'format': 'PDF 1.4', 'title': 'Rich Dad Poor Dad', 'author': 'Robert T. Kiyosaki', 'subject': '', 'keywords': '', 'creator': 'calibre (5.20.0) [http://calibre-ebook.com]', 'producer': 'calibre (5.20.0) [http://calibre-ebook.com]', 'creation

In [2]:
from app.adoptors.pinecone_client import PineconeClient
from tqdm.autonotebook import tqdm
pinecone = PineconeClient()
pinecone.create_index(index_name="testing")

index name : testing Index is already in Pinecone 
