In [1]:
import os
import openai
import sys
sys.path.append('../..')

from dotenv import load_dotenv, find_dotenv
_ = load_dotenv(find_dotenv()) # read local .env file

openai.api_key  = os.environ['OPENAI_API_KEY']

In [2]:
llm_name = "gpt-3.5-turbo"


In [3]:
from langchain.vectorstores import Chroma
from langchain_openai import OpenAIEmbeddings
embedding = OpenAIEmbeddings(model="text-embedding-3-small")

persist_directory = 'data/chroma/'
embedding = OpenAIEmbeddings()
bookdb = Chroma(persist_directory=persist_directory, embedding_function=embedding)

In [4]:
bookdb._collection.count()

79

In [5]:
question = "why lean?"
docs = bookdb.similarity_search(question, k=5)
docs

[Document(page_content='One, two, three…\nHow does a project get to be a  year beh ind schedule? One day at a time.\n—Fred Brooks, software engineer and computer scientist', metadata={'page': 6, 'source': 'data/getting-real/getting-real-02-starting-line.pdf'}),
 Document(page_content='“We Don’t Want a Thousand Features”\nSteve Jobs gave a small private presentation about the iTunes Music Store to some independent\nrecord label people. My favorite line of the day was when people kept raising their hand saying,\n“Does it do [x]?”, “Do you plan to add [y]?”. Finally Jobs said, “Wait wait — put your hands\ndown. Listen: I know you have a thousand ideas for all the cool features iTunes could have. So\ndo we. But we don’t want a thousand features. That would be u gly. Innovation is not about\nsaying yes to everything. It’s about saying NO to all but the most crucial features.”\n—-Derek Sivers, president and programmer, CD Baby and HostBaby (from Say NO by default)', metadata={'page': 2, 'sou

In [6]:
from langchain_openai import ChatOpenAI
llm = ChatOpenAI(model_name=llm_name, temperature=0)

In [7]:
from langchain.chains import RetrievalQA

In [8]:
qa_chain = RetrievalQA.from_chain_type(
    llm,
    retriever=bookdb.as_retriever()
)

In [9]:
result = qa_chain.invoke({"query": question})
result["result"]

'Lean methodology is often favored in software development and other industries because it emphasizes efficiency, reducing waste, and focusing on delivering value to the customer. By eliminating unnecessary steps, features, or processes, lean practices aim to streamline production and improve overall quality. This approach can lead to faster development cycles, reduced costs, and increased customer satisfaction.'

In [10]:
from langchain.prompts import PromptTemplate

# Build prompt
template = """
Use the following pieces of context to answer the question at the end. If you don't know the answer, just say that you don't know, don't try to make up an answer. 
Use three sentences maximum. Keep the answer as concise as possible. Always say "thanks for asking!" at the end of the answer. 
{context}
Question: {question}
Helpful Answer:"""
QA_CHAIN_PROMPT = PromptTemplate.from_template(template)

In [11]:
# Run chain
qa_chain = RetrievalQA.from_chain_type(
    llm,
    retriever=bookdb.as_retriever(),
    return_source_documents=True,
    chain_type_kwargs={"prompt": QA_CHAIN_PROMPT}
)

result = qa_chain.invoke({"query": question})
result["result"]

'Lean methodology is important in software development because it focuses on delivering the most crucial features efficiently, avoiding unnecessary complexity and waste. By saying no to non-essential features, lean development helps teams stay focused and deliver value to customers faster. Thanks for asking!'

In [12]:
result["source_documents"]

[Document(page_content='One, two, three…\nHow does a project get to be a  year beh ind schedule? One day at a time.\n—Fred Brooks, software engineer and computer scientist', metadata={'page': 6, 'source': 'data/getting-real/getting-real-02-starting-line.pdf'}),
 Document(page_content='“We Don’t Want a Thousand Features”\nSteve Jobs gave a small private presentation about the iTunes Music Store to some independent\nrecord label people. My favorite line of the day was when people kept raising their hand saying,\n“Does it do [x]?”, “Do you plan to add [y]?”. Finally Jobs said, “Wait wait — put your hands\ndown. Listen: I know you have a thousand ideas for all the cool features iTunes could have. So\ndo we. But we don’t want a thousand features. That would be u gly. Innovation is not about\nsaying yes to everything. It’s about saying NO to all but the most crucial features.”\n—-Derek Sivers, president and programmer, CD Baby and HostBaby (from Say NO by default)', metadata={'page': 2, 'sou

In [13]:
qa_chain = RetrievalQA.from_chain_type(
    llm,
    retriever=bookdb.as_retriever()
)

In [14]:
question = "Is lean a book topic?"
result = qa_chain.invoke({"query": question})
result["result"]

'Lean is not specifically mentioned as a book topic in the provided context. However, the principles of lean methodology, which focus on efficiency, reducing waste, and continuous improvement, are often discussed in books related to project management, software development, and business strategy. If you are interested in learning more about lean principles, there are many books available on the topic.'

In [15]:
# no memory of previous question
question = "why that is needed?"
result = qa_chain.invoke({"query": question})
result['result']

'It seems like you\'re asking why the concepts and philosophies discussed in the book "Getting Real" are needed. The book emphasizes building, launching, and iterating on software products quickly, focusing on real-world results rather than theoretical concepts. This approach is beneficial for entrepreneurs, designers, programmers, and marketers working on innovative ideas in today\'s fast-paced digital landscape. By following the principles outlined in the book, individuals can adapt to agile development practices, create successful products, and stay ahead of the competition.'

In [16]:
from langchain.memory import ConversationBufferMemory
memory = ConversationBufferMemory(
    memory_key="chat_history",
    return_messages=True
)

In [17]:
from langchain.chains import ConversationalRetrievalChain
retriever=bookdb.as_retriever()
qa = ConversationalRetrievalChain.from_llm(
    llm,
    retriever=retriever,
    memory=memory
)

In [18]:
question = "Is lean a book topic?"
result = qa.invoke({"question": question})
result['answer']

'Lean is not specifically mentioned as a book topic in the provided context. However, the principles of lean methodology, which focus on maximizing value while minimizing waste, are often discussed in books related to project management, software development, and business practices. If you are interested in learning more about lean methodology, there are many books available on the subject that you may find helpful.'

In [19]:
question = "why that is needed?"
result = qa.invoke({"question": question})
result['answer']

'Learning more about lean methodology is essential because it emphasizes the importance of focusing on the most crucial features and saying no to unnecessary ones. This approach helps in streamlining processes, reducing waste, and delivering value to customers efficiently. By understanding lean methodology, individuals and organizations can improve their decision-making, prioritize effectively, and ultimately create better products or services.'