In [33]:
import openai
import langchain
from langchain_community import document_loaders
from langchain.prompts.chat import (
    ChatPromptTemplate,
    HumanMessagePromptTemplate,
    SystemMessagePromptTemplate
)

from langchain.schema import HumanMessage, SystemMessage
from langchain_openai import OpenAI, OpenAIEmbeddings, ChatOpenAI
from langchain.vectorstores import Chroma
from langchain.text_splitter import RecursiveCharacterTextSplitter
from langchain_community.document_loaders import PyPDFLoader
from langchain.chains import RetrievalQA


In [38]:
import os 
import dotenv
from dotenv import load_dotenv
load_dotenv()

openai.api_key = os.getenv("OPENAI_API_KEY")

In [18]:
loader = PyPDFLoader("/home/biniyam/Prompt-Generation/data/1706.03762.pdf")
text_splitter = RecursiveCharacterTextSplitter(chunk_size=250)
doc = text_splitter.split_documents(loader.load_and_split())
vectorstore = Chroma.from_documents(doc, OpenAIEmbeddings())


In [25]:
persist_directory = 'db'
embedding = OpenAIEmbeddings()
vectordb = Chroma.from_documents(documents=doc,
                                embedding=embedding,
                                persist_directory=persist_directory)

In [26]:
vectordb.persist()
vectordb = None

In [27]:
vectordb = Chroma(persist_directory=persist_directory, 
                  embedding_function=embedding)

### Make a retriever

In [31]:
retriever = vectordb.as_retriever(search_kwargs={'k': 3})
documents = retriever.get_relevant_documents("what is the paper talking about?")

In [32]:
len(documents)

3

### Make a chain to answer 

In [35]:
ans_chain = RetrievalQA.from_chain_type(llm=OpenAI(),
                                        chain_type='stuff',
                                        retriever=retriever,
                                        return_source_documents=False)

In [40]:
def process_response(llm_response):
    print(llm_response['result'])
    # print("\n\nSources")
    # for source in llm_response["source_documents"]:
    #     print(source.metadata['source'])

In [43]:
query = "can you generate a prompt that i am going to use in another llm session that will help me better understand the paper. your job here is to generate a list of prompts that i will use to query an llm with, since human language is so simple and might not get me better results."
llm_response = ans_chain(query)
process_response(llm_response)

 Some possible prompts could be:
- Can you explain the concept of "input and output modalities" and how they relate to the Transformer model?
- How do local, restricted attention mechanisms differ from traditional attention mechanisms in handling large inputs and outputs?
- Can you provide an example of a task where the Transformer has been used successfully?
- How does the Transformer model handle tasks such as reading comprehension and summarization?
- Can you explain the significance of task-independent sentence representations and how they are learned in the Transformer model?


In [11]:
# messages = [
#     SystemMessage(
#         content="You are a very helpful assistant that generates multiple prompts that the user copies and uses to get better results by prompting llms by taking in user intent and context as input and then provide the user with multiple prompt candidates. For example if i have a simple prompt, your job is to give the me a superhuman version of the original prompt that can give me the best results when use that prompt against you or any other llm. The prompts you give are superior and more mature and are prompts that can generate as better output than my original prompt. "
#     ),

#     HumanMessage(
#         content="How do i use pydantic in python"
#     )
# ]
# chat(messages)

AIMessage(content='1. "What are the key features and benefits of using Pydantic in Python?"\n2. "Can you provide a step-by-step guide on how to install and set up Pydantic in a Python project?"\n3. "What are some best practices for using Pydantic effectively in Python?"\n4. "How does Pydantic handle data validation and type checking in Python?"\n5. "Can you give examples of how to define and use Pydantic models in Python?"\n6. "What are some advanced techniques or use cases for leveraging Pydantic in Python?"\n7. "Are there any performance considerations or optimizations to keep in mind when using Pydantic in Python?"\n8. "How does Pydantic integrate with other Python libraries or frameworks, such as FastAPI or Django?"\n9. "What are some common pitfalls or challenges when working with Pydantic in Python, and how can they be overcome?"\n10. "Can you provide a comparison between Pydantic and other similar libraries in Python, such as dataclasses or marshmallow?"')