### Config

In [1]:
from dotenv import load_dotenv
import os

load_dotenv()

os.environ['GROQ_API_KEY'] = os.getenv('GROQ_API_KEY')
os.environ['LANGCHAIN_API_KEY'] = os.getenv('LANGCHAIN_API_KEY')
os.environ["LANGCHAIN_TRACING_V2"] = "true"
os.environ['LANGCHAIN_PROJECT'] = "BuffetBot"
os.environ['GOOGLE_API_KEY'] = os.getenv('GOOGLE_API_KEY')
os.environ["OPENAI_API_KEY"] = os.getenv("OPENAI_API_KEY")

### Knowledge Base (Don't touch)

In [2]:
import os
import numpy as np
from langchain.embeddings import OllamaEmbeddings 
from langchain.text_splitter import RecursiveCharacterTextSplitter
from langchain_openai import OpenAIEmbeddings
import faiss
from langchain_community.docstore.in_memory import InMemoryDocstore
from langchain_community.vectorstores import FAISS



pdf_files = ["charlie_munger.pdf", "intelligent_investor.pdf", "buffet_essays.pdf"]

documents = []

for pdf_file in pdf_files:
    loader = PyPDFLoader(pdf_file)
    documents.extend(loader.load())


NameError: name 'PyPDFLoader' is not defined

In [3]:
from uuid import uuid4

index = faiss.IndexFlatL2(len(OpenAIEmbeddings().embed_query("hello world")))

vector_store = FAISS.load_local(
    "knowledge_base", embeddings, allow_dangerous_deserialization=True
)


uuids = [str(uuid4()) for _ in range(len(documents))]
vector_store.add_documents(documents=documents, id=uuids)


NameError: name 'FAISS' is not defined

In [59]:
vector_store.save_local("knowledge_base")

### Playground

In [4]:
import os
import numpy as np
from langchain.embeddings import OllamaEmbeddings 
from langchain.text_splitter import RecursiveCharacterTextSplitter
from langchain_openai import OpenAIEmbeddings
import faiss
from langchain_community.docstore.in_memory import InMemoryDocstore
from langchain_community.vectorstores import FAISS

In [6]:
os.environ['OPENAI_API_KEY'] = os.getenv('OPENAI_API_KEY') ### PLACE YOUR OPENAI API KEY HERE
embeddings = OpenAIEmbeddings()

In [7]:
embeddings = OpenAIEmbeddings()

vector_store = FAISS.load_local(
    "knowledge_base", embeddings, allow_dangerous_deserialization=True
)
retriever = vector_store.as_retriever(search_type="mmr", search_kwargs={"k": 5})


### Naive RAG

In [8]:

from langchain_openai import ChatOpenAI
from langchain_core.output_parsers import StrOutputParser
from langchain import hub
from langchain_core.runnables import RunnablePassthrough

# Prompt
prompt = hub.pull("rlm/rag-prompt")

llm = ChatOpenAI(model_name="gpt-4o-mini", temperature=0)
def format_docs(docs):
    return "\n\n".join(doc.page_content for doc in docs)

# Chain
naive_rag_chain = (
    {"context": retriever | format_docs, "question": RunnablePassthrough()}
    | prompt
    | llm
    | StrOutputParser()
)

  prompt = loads(json.dumps(prompt_object.manifest))


#### RAG Fusion

In [9]:
from langchain.prompts import ChatPromptTemplate

#------------------------------
# RAG-Fusion Question Generating Prompt
template = """You are a financial assistant that takes questions for Warren Buffet and generates multiple queries, so that Warren understands them properly \n
Generate multiple search queries related to: {question} \n
Output (4 queries):"""
#------------------------------

prompt_rag_fusion = ChatPromptTemplate.from_template(template)

In [10]:

from langchain_core.output_parsers import StrOutputParser
from langchain_openai import ChatOpenAI

generate_queries = (
    prompt_rag_fusion 
    | ChatOpenAI(temperature=0)
    | StrOutputParser() 
    | (lambda x: x.split("\n"))
)

In [11]:
generate_queries.invoke("how to invest like you")

['1. What investment strategies does Warren Buffet recommend for long-term success?',
 '2. How does Warren Buffet evaluate potential investment opportunities?',
 '3. What are the key principles that Warren Buffet follows when making investment decisions?',
 "4. How can individual investors apply Warren Buffet's value investing approach to their own portfolios?"]

In [12]:
from langchain.load import dumps, loads

def reciprocal_rank_fusion(results: list[list], k=60, n=5):

    fused_scores = {}

    # Iterate through each list of ranked documents
    for docs in results:
        # Iterate through each document in the list, with its rank (position in the list)
        for rank, doc in enumerate(docs):
            # Convert the document to a string format to use as a key (assumes documents can be serialized to JSON)
            doc_str = dumps(doc)
            # If the document is not yet in the fused_scores dictionary, add it with an initial score of 0
            if doc_str not in fused_scores:
                fused_scores[doc_str] = 0
            # Retrieve the current score of the document, if any
            previous_score = fused_scores[doc_str]
            # Update the score of the document using the RRF formula: 1 / (rank + k)
            fused_scores[doc_str] += 1 / (rank + k)

    # Sort the documents based on their fused scores in descending order to get the final reranked results
    reranked_results = [
        (loads(doc), score)
        for doc, score in sorted(fused_scores.items(), key=lambda x: x[1], reverse=True)
    ]

    # Return the reranked results as a list of tuples, each containing the document and its fused score
    return reranked_results[:5]

retrieval_chain_rag_fusion = generate_queries | retriever.map() | reciprocal_rank_fusion

In [13]:
from langchain_core.runnables import RunnablePassthrough

#------------------------------
# Prompt for Generating Answer from RAG-Fusion. Keep {context} and {question} to not break it.
template = """Answer the following question based on this context:

{context}

Question: {question}
"""
#------------------------------
prompt = ChatPromptTemplate.from_template(template)

rag_fusion_chain = (
    {"context": retrieval_chain_rag_fusion, 
     "question": RunnablePassthrough()} 
    | prompt
    | llm
    | StrOutputParser()
)

#### HyDE

In [14]:
from langchain.prompts import ChatPromptTemplate

#------------------------------
# Prompt for generating HyDE paragraph
template = """You are Warren Buffet. Answer this question with a passage using your principles: {question}
Passage:"""
#------------------------------


prompt_hyde = ChatPromptTemplate.from_template(template)

from langchain_core.output_parsers import StrOutputParser
from langchain_openai import ChatOpenAI

generate_docs_for_retrieval = (
    prompt_hyde | ChatOpenAI(temperature=0) | StrOutputParser() 
)

# Run
question = "how to invest like you?"
generate_docs_for_retrieval.invoke({"question":question})

"Investing like Warren Buffet requires a disciplined approach and a long-term perspective. One of my key principles is to invest in companies with strong competitive advantages and a proven track record of success. I believe in focusing on the fundamentals of a business, rather than trying to time the market or chase short-term gains.\n\nAnother important aspect of investing like me is to do thorough research and analysis before making any investment decisions. I always look for companies with a solid balance sheet, consistent earnings growth, and a management team that is aligned with shareholders' interests.\n\nI also believe in the power of patience and staying true to your investment thesis, even when the market is volatile or uncertain. By taking a long-term view and staying focused on the fundamentals, I have been able to achieve consistent returns for my shareholders over the years.\n\nIn conclusion, to invest like Warren Buffet, focus on companies with strong competitive advant

In [15]:
# Retrieve
retrieval_chain = generate_docs_for_retrieval | retriever 
retireved_docs = retrieval_chain.invoke({"question":question})
retireved_docs

#------------------------------
# Prompt for Generating Answer from HyDE. Keep {context} and {question} to not break it.
template = """Answer the following question based on this context:

{context}

Question: {question}
"""
#------------------------------

prompt = ChatPromptTemplate.from_template(template)

hyde_rag_chain = (
    {"context": retrieval_chain, 
     "question": RunnablePassthrough()} 
    |prompt
    | llm
    | StrOutputParser()
)

### Testing

In [84]:
question = "how do I invest like you?"

In [86]:
naive_rag_chain.invoke(question)

'To invest like an intelligent investor, focus on controlling what you can, such as minimizing costs, managing risk, and maintaining realistic expectations. Adopt a long-term strategy by consistently investing in a total stock-market index fund and avoid reacting to market fluctuations. Lastly, prioritize self-discipline and continuous learning to improve your investment decisions over time.'

In [87]:
rag_fusion_chain.invoke(question)

'To invest like Warren Buffett, consider the following key principles based on his investment philosophy:\n\n1. **Understand the Business**: Focus on companies that you can easily understand. Buffett emphasizes investing in businesses with strong economic characteristics and a clear business model.\n\n2. **Evaluate Management**: Assess the quality of the management team. Look for leaders who have a track record of making wise decisions, allocating capital effectively, and prioritizing shareholder interests over personal gain.\n\n3. **Look for "Franchise" Companies**: Invest in companies with strong consumer brands, robust financial health, and a competitive edge in their market. These are often companies that can sustain growth over the long term.\n\n4. **Margin of Safety**: Always seek a margin of safety in your investments. This means buying stocks at a price that is significantly below their intrinsic value to minimize risk.\n\n5. **Long-Term Perspective**: Adopt a long-term investm

In [92]:
hyde_rag_chain.invoke({"question":question})

'To invest like the intelligent investors discussed in the provided context, consider the following principles:\n\n1. **Control the Controllable**: Focus on aspects of investing that you can manage, such as:\n   - **Brokerage Costs**: Trade rarely and choose low-cost options.\n   - **Ownership Costs**: Avoid mutual funds with high annual expenses.\n   - **Expectations**: Set realistic return expectations based on historical data rather than fantasies.\n   - **Risk Management**: Decide how much of your total assets to invest in the stock market, diversify your investments, and regularly rebalance your portfolio.\n   - **Tax Efficiency**: Hold investments for at least one year, and ideally five years, to minimize capital gains taxes.\n   - **Behavior**: Maintain discipline and avoid emotional reactions to market fluctuations.\n\n2. **Long-Term Perspective**: Adopt a long-term investment horizon (25-30 years). Consider investing in a total stock-market index fund and contribute regularly,

In [None]:
#cicamaca