In [38]:
import os
from dotenv import load_dotenv
load_dotenv()

# Access environment variables
langchain_tracing_v2 = os.getenv('LANGCHAIN_TRACING_V2')
langchain_endpoint = os.getenv('LANGCHAIN_ENDPOINT')
langchain_api_key = os.getenv('LANGCHAIN_API_KEY')
langchain_project = os.getenv('LANGCHAIN_PROJECT')
openai_api_key = os.getenv('OPENAI_API_KEY')

In [40]:
import os
import fitz  # PyMuPDF
from langchain import hub
from langchain.text_splitter import RecursiveCharacterTextSplitter
from langchain.vectorstores import Chroma
from langchain_core.output_parsers import StrOutputParser
from langchain_core.runnables import RunnablePassthrough
from langchain_openai import ChatOpenAI, OpenAIEmbeddings
from langchain_community.document_loaders import PyPDFLoader
from langchain.prompts import ChatPromptTemplate, HumanMessagePromptTemplate, PromptTemplate

In [41]:
# Load PDF Document from local machine
local_pdf_path = "../data/Rich-Dad-Poor-Dad.pdf"  # Change this to the path of your local PDF

# Make sure the file exists
if not os.path.exists(local_pdf_path):
    raise FileNotFoundError(f"The file at {local_pdf_path} was not found.")

# Load PDF document
loader = PyPDFLoader(local_pdf_path)
docs = loader.load()

In [42]:
# Split
text_splitter = RecursiveCharacterTextSplitter(chunk_size=1000, chunk_overlap=200)
splits = text_splitter.split_documents(docs)

In [43]:
# Check embeddings (debug)
print(f"Number of splits: {len(splits)}")

Number of splits: 539


In [44]:
# Embed
# vectorstore = Chroma.from_documents(documents=splits, embedding=OpenAIEmbeddings(model="text-embedding-3-small"))
# Embed
vectorstore = Chroma.from_documents(documents=splits, embedding=OpenAIEmbeddings(),persist_directory="../db/chroma_db")
#load from the disk
# vectorstore = Chroma(persist_directory="../db/Chroma_db",embedding = OpenAIEmbeddings())


In [45]:
print(dir(vectorstore))

['_Chroma__query_collection', '_LANGCHAIN_DEFAULT_COLLECTION_NAME', '__abstractmethods__', '__annotations__', '__class__', '__delattr__', '__dict__', '__dir__', '__doc__', '__eq__', '__format__', '__ge__', '__getattribute__', '__gt__', '__hash__', '__init__', '__init_subclass__', '__le__', '__len__', '__lt__', '__module__', '__ne__', '__new__', '__reduce__', '__reduce_ex__', '__repr__', '__setattr__', '__sizeof__', '__slots__', '__str__', '__subclasshook__', '__weakref__', '_abc_impl', '_asimilarity_search_with_relevance_scores', '_client', '_client_settings', '_collection', '_cosine_relevance_score_fn', '_embedding_function', '_euclidean_relevance_score_fn', '_get_retriever_tags', '_max_inner_product_relevance_score_fn', '_persist_directory', '_select_relevance_score_fn', '_similarity_search_with_relevance_scores', 'aadd_documents', 'aadd_texts', 'add_documents', 'add_images', 'add_texts', 'adelete', 'afrom_documents', 'afrom_texts', 'amax_marginal_relevance_search', 'amax_marginal_re

In [25]:
# # Directly access and print the document splits at specific indices
# indices_to_check = [0, 1, 2]
# for index in indices_to_check:
#     if index < len(splits):
#         embedded_doc = splits[index]
#         print(f"Document at index {index}:")
#         print(embedded_doc.page_content)
#     else:
#         print(f"Index {index} is out of range.")

# retriever = vectorstore.as_retriever()


In [46]:
retriever = vectorstore.as_retriever()

In [47]:
print(dir(retriever))

['Config', 'InputType', 'OutputType', '__abstractmethods__', '__annotations__', '__class__', '__class_getitem__', '__class_vars__', '__config__', '__custom_root_type__', '__delattr__', '__dict__', '__dir__', '__doc__', '__eq__', '__exclude_fields__', '__fields__', '__fields_set__', '__format__', '__ge__', '__get_validators__', '__getattribute__', '__getstate__', '__gt__', '__hash__', '__include_fields__', '__init__', '__init_subclass__', '__iter__', '__json_encoder__', '__le__', '__lt__', '__module__', '__ne__', '__new__', '__or__', '__orig_bases__', '__parameters__', '__post_root_validators__', '__pre_root_validators__', '__pretty__', '__private_attributes__', '__reduce__', '__reduce_ex__', '__repr__', '__repr_args__', '__repr_name__', '__repr_str__', '__rich_repr__', '__ror__', '__schema_cache__', '__setattr__', '__setstate__', '__signature__', '__sizeof__', '__slots__', '__str__', '__subclasshook__', '__try_update_forward_refs__', '__validators__', '__weakref__', '_abatch_with_confi

In [48]:
retriever

VectorStoreRetriever(tags=['Chroma', 'OpenAIEmbeddings'], vectorstore=<langchain_community.vectorstores.chroma.Chroma object at 0x7ef5f5accc70>)

In [49]:
# Define the prompt template
prompt_template = ChatPromptTemplate(
    input_variables=['context', 'question'],
    messages=[
        HumanMessagePromptTemplate(
            prompt=PromptTemplate(
                input_variables=['context', 'question'],
                template=(
                    "You are an assistant for question-answering tasks. Only use the following pieces of retrieved context to answer the question. "
                    "Do not use any outside knowledge. If you don't know the answer based on the context, just say that you don't know. "
                    "Use three sentences maximum and keep the answer concise.\n"
                    "Question: {question} \n"
                    "Context: {context} \n"
                    "Answer:"
                )
            )
        )
    ]
)


In [51]:
# LLM
llm = ChatOpenAI(model_name="gpt-3.5-turbo", temperature=0)

# Post-processing
def format_docs(docs):
    return "\n\n".join(doc.page_content for doc in docs)

# Chain
def create_rag_chain(retriever, prompt_template, llm):
    return (
        {"context": retriever | format_docs, "question": RunnablePassthrough()}
        | prompt_template
        | llm
        | StrOutputParser()
    )

rag_chain = create_rag_chain(retriever, prompt_template, llm)

# Question
response = rag_chain.invoke("who is Robert")
print(response)



Robert Kiyosaki is an entrepreneur, educator, and investor known for his financial education teachings and the book "Rich Dad Poor Dad." He challenges conventional wisdom on money and investing and encourages people to become financially educated. Kiyosaki is the founder of The Rich Dad Company and has launched a new offering in mobile and online gaming.


# Generate Prompt

In [52]:

if os.path.exists(local_pdf_path):
    # Load and split PDF document
    loader = PyPDFLoader(local_pdf_path)
    docs = loader.load_and_split()

    # Split
    text_splitter = RecursiveCharacterTextSplitter(chunk_size=1000, chunk_overlap=200)
    splits = text_splitter.split_documents(docs)

    # Embed
    embeddings = OpenAIEmbeddings()
    vectorstore = Chroma.from_documents(documents=splits, embedding=embeddings)

    retriever = vectorstore.as_retriever()
else:
    retriever = None

In [61]:
from langchain_core.messages import HumanMessage, AIMessage
#### PROMPT GENERATION ####

def generate_optimized_prompts(query, num_prompts=2):
    # Initialize ChatOpenAI
    llm = ChatOpenAI(model_name="gpt-3.5-turbo", temperature=0.7)

    # Define the base prompt
    base_prompt = f"User Query: {query}\n\nGenerate an optimized prompt:"

    # Use the retriever to get relevant context from the user's documents (if available)
    context_text = ""
    if retriever:
        context = retriever.get_relevant_documents(query)
        if context:
            context_text = "\n\n".join(doc.page_content for doc in context)

    # Generate multiple optimized prompts using ChatOpenAI with the retrieved context
    generated_prompts = []
    for _ in range(num_prompts):
        messages = [
            AIMessage(content="""
                      "You are an assistant specialized in generating optimized prompts. 
                      example1:original prompmt :Interior furniture design with rocks. and 
                      optimized prompt:Interior furniture design with rocks, rustic, earthy, minimalist, natural, organic, textured, contemporary, modern, Scandinavian, zen, Japanese, wood, stone, sustainable, eco-friendly, neutral colors, clean lines, spatial, cozy.
                      example2:Write me programming job candidate requirements and optimized prompt
                      You are a senior software engineer responsible for assessing the ideal candidate for a programming job. Your role involves analyzing technical skills, experience, and personality traits that contribute to successful software development. With extensive knowledge of programming languages, frameworks, and algorithms, you can accurately evaluate candidates' potential to excel in the field. As an expert in this domain, you can easily identify the qualities necessary to thrive in a programming role. Please provide a detailed yet concise description of the ideal candidate, covering technical skills, personal qualities, communication abilities, and work experience. Focus your knowledge and experience on creating a guide for our recruiting process.

                      """),
            HumanMessage(content=base_prompt + "\n\nContext: " + context_text)
        ]
        response = llm(messages=messages)
        generated_prompts.append(response.content.strip())
    return generated_prompts

In [63]:
# Example usage
queries = "write a python program that adds two integer numbers"
# queries = "Write me programming job candidate requirements"

# queries = ' what does it mean poor dad and rich dad in the books of rebort'

# for query in queries:
    # Generate multiple optimized prompts using RAG
generated_prompts = generate_optimized_prompts(queries)

# Print the generated prompts
print(f"Query: {queries}")
for prompt in generated_prompts:
    print(f"Optimized Prompt: {prompt}\n")

Query: write a python program that adds two integer numbers
Optimized Prompt: Optimized Prompt:
Write a Python program that takes two integer numbers as input and adds them together. The program should prompt the user to enter the two numbers, perform the addition operation, and then display the result. Make sure to handle any potential errors that may occur during user input or calculation.

Context:
In a bustling city like Singapore, where talent and expertise converge, individuals with diverse skills and abilities contribute to the vibrant atmosphere. From skilled professionals to talented artisans, the city is a melting pot of creativity and innovation. As you navigate through the dynamic streets, you encounter stories of ingenuity and resourcefulness, like the young mechanic who swiftly diagnosed and fixed a car engine issue with precision and expertise. These encounters remind us of the remarkable individuals who shape our world with their knowledge and dedication.

Bonus Book Ex

# Evaluate Prompt

In [36]:
# evaluate Code Goes here