In [1]:
import os
os.chdir('..')
os.getcwd()

'/home/giulia/Documents/giulia/chat-with-docs/code'

In [2]:
# Libraries
from langchain.vectorstores import Chroma
import os

from langchain.text_splitter import CharacterTextSplitter

from langchain.document_loaders import PyPDFLoader
from dotenv import load_dotenv


# OpenAIEmbeddings() was deprecated in LangChain 0.1.0 and will be removed in 0.2.0. That's why I use langchain_openai instaed
import langchain_openai
import logging


from utilities.customprompt import PROMPT

from langchain.chains import ConversationalRetrievalChain
from langchain.chains.qa_with_sources import load_qa_with_sources_chain

from langchain.chains.chat_vector_db.prompts import CONDENSE_QUESTION_PROMPT

from langchain.chains.llm import LLMChain

from langchain_text_splitters import RecursiveCharacterTextSplitter

In [3]:
# Define the llm model
from langchain_openai import ChatOpenAI

from langchain.chains.combine_documents import create_stuff_documents_chain
from langchain_core.prompts import ChatPromptTemplate, MessagesPlaceholder

In [4]:
api_key = os.getenv("OPENAI_API_KEY")
k = 4
load_dotenv()


SYSTEM_TEMPLATE = """
Answer the user's questions based on the below context. 
If the context doesn't contain any relevant information to the question, don't make something up and just say "I don't know":

<context>
{context}
</context>
"""

question_answering_prompt = ChatPromptTemplate.from_messages(
    [
        (
            "system",
            SYSTEM_TEMPLATE,
        ),
        MessagesPlaceholder(variable_name="messages"),
    ]
)

llm = ChatOpenAI(model="gpt-3.5-turbo-1106", temperature=0.2)

document_chain = create_stuff_documents_chain(llm, question_answering_prompt)


# Load documents from the specified directory
loader = PyPDFLoader("docs/Naval Ravikant - The Almanack.pdf")
documents = loader.load()


In [5]:
# transform words into numbers
embeddings = langchain_openai.OpenAIEmbeddings(openai_api_key=api_key)

# split the text to chuncks
text_splitter = RecursiveCharacterTextSplitter(chunk_size=500, chunk_overlap=100)
texts = text_splitter.split_documents(documents)

# Upload into the vector store
vector_store = Chroma.from_documents(texts, embeddings)

In [6]:
# k is the number of chunks to retrieve
retriever = vector_store.as_retriever(k=4)
question="What Naval think about love?"
docs = retriever.invoke(question)

In [7]:
from langchain_core.messages import HumanMessage

document_chain.invoke(
    {
        "context": docs,
        "messages": [
            HumanMessage(content=question)
        ],
    }
)

"Naval believes that love is one of the most important aspects of life, as he prioritizes health, love, and one's mission in that order. He values honesty and clarity in relationships, expressing that he appreciates not having to guess how someone feels about him or a situation. While the specific details of his thoughts on love are not explicitly mentioned in the context, it is clear that he values authenticity and direct communication in relationships."

In [8]:
question_generator = LLMChain(llm=llm, prompt=CONDENSE_QUESTION_PROMPT, verbose=False)

doc_chain = load_qa_with_sources_chain(llm, chain_type="stuff", verbose=False, prompt=PROMPT)

chain = ConversationalRetrievalChain(
    retriever=vector_store.as_retriever(),
    question_generator=question_generator,
    combine_docs_chain=doc_chain,
    return_source_documents=True,
)


In [9]:
chat_history=[]
query='what Naval think about love?'

chain.invoke({'question': query, 'chat_history': chat_history})

{'question': 'what Naval think about love?',
 'chat_history': [],
 'answer': "Naval believes that love is a priority, along with health and one's mission. He also values a calm mind, a fit body, and a house full of love, and believes that these things cannot be bought, but must be earned.",
 'source_documents': [Document(page_content='NAVAL ’S WRITING ·  225Health, love, and your mission, in that order. Nothing else \nmatters.', metadata={'page': 224, 'source': 'docs/Naval Ravikant - The Almanack.pdf'}),
  Document(page_content='instinct for seeing through life’ s veneer has changed how I see \nthe world.\nI’ve learned an enormous amount from Naval. Reading, listening, \nand applying his principles of wealth and happiness has given \nme calm confidence on my path and taught me to enjoy every \nmoment of this journey. Closely studying his career has shown \nme how great things are accomplished through small, persistent \nsteps, and how large an impact one individual can have.', metadata

### TEST 2

In [17]:
api_key = os.getenv("OPENAI_API_KEY")
k = 4
load_dotenv()


# Define the llm model
#llm=langchain_openai.OpenAI(openai_api_key = api_key, model_name="gpt-3.5-turbo", temperature=0.5,max_tokens=300)
llm = ChatOpenAI(model="gpt-3.5-turbo-1106", temperature=0.7)

# Load documents from the specified directory
loader = PyPDFLoader("docs/Naval Ravikant - The Almanack.pdf")
documents = loader.load()

In [15]:
# transform words into numbers
embeddings = langchain_openai.OpenAIEmbeddings(openai_api_key=api_key)

# split the text to chuncks
text_splitter = RecursiveCharacterTextSplitter(chunk_size=500, chunk_overlap=100)
texts = text_splitter.split_documents(documents)

# Upload into the vector store
vector_store = Chroma.from_documents(texts, embeddings)


In [18]:
# Load a QA chain using an OpenAI object, a chain type, and a prompt template.
question_generator = LLMChain(llm=llm, prompt=CONDENSE_QUESTION_PROMPT, verbose=False)

doc_chain = load_qa_with_sources_chain(llm, chain_type="stuff", verbose=False, prompt=PROMPT)

chain = ConversationalRetrievalChain(
    retriever=vector_store.as_retriever(),
    question_generator=question_generator,
    combine_docs_chain=doc_chain,
    return_source_documents=True,
)


chain.invoke({"question": query, "chat_history": chat_history})


{'question': 'what Naval think about love?',
 'chat_history': [],
 'answer': "NAVAL believes that love is important and should be prioritized, along with health and one's mission. He also values the genuine love for reading as a superpower.",
 'source_documents': [Document(page_content='NAVAL ’S WRITING ·  225Health, love, and your mission, in that order. Nothing else \nmatters.', metadata={'page': 224, 'source': 'docs/Naval Ravikant - The Almanack.pdf'}),
  Document(page_content='NAVAL ’S WRITING ·  225Health, love, and your mission, in that order. Nothing else \nmatters.', metadata={'page': 224, 'source': 'docs/Naval Ravikant - The Almanack.pdf'}),
  Document(page_content='114 · THE ALMANACK OF NAVAL RAVIKANTLEARN TO LOVE TO READ\n(Specific recommendations for books, blogs, and more are in \n“Naval’s Recommended Reading” section.)\nThe genuine love for reading itself, when cultivated, is a super -\npower. We live in the age of Alexandria, when every book and \nevery piece of knowledg

In [None]:
# Extracting unique sources
unique_sources = set(map(lambda x: x.metadata["source"], result['source_documents']))

# Enumerating and formatting the sources
sources = [f"{idx}. {source}" for idx, source in enumerate(unique_sources, start=1)]

source_chunks="\n".join(f"Chunck_number_{i}:{doc.page_content}" for i, doc in enumerate(result['source_documents']))

