In [None]:
from typing import *


from langchain.schema.document import Document
from langchain.embeddings import OpenAIEmbeddings
from langchain.vectorstores import FAISS
from langchain.document_loaders import DirectoryLoader, TextLoader
from langchain.text_splitter import RecursiveCharacterTextSplitter

In [None]:
loader = DirectoryLoader(
    "../prompts/examples", glob="**/*.txt", loader_cls=TextLoader
)
docs = loader.load()
print(docs[0].page_content)

==== EXAMPLE APP DESCRIPTION ====

Create a FastKafka application using localhost broker for testing, staging.airt.ai for staging and prod.airt.ai for production. Use the default port number. It should consume messages from 'receive_name' topic and the message will be a JSON encoded object with only one attribute: user_name. For each consumed message, construct a new message object and append 'Hello ' in front of the name attribute. Finally, publish the consumed message to 'send_greetings' topic.

==== EXAMPLE SKELETON CODE ====

from typing import *
from pydantic import BaseModel, Field
from fastkafka import FastKafka


class Greetings(BaseModel):
    user_name: str = Field(..., description="Name of the user.")

kafka_brokers = {
    "localhost": {
        "url": "localhost",
        "description": "local development kafka broker",
        "port": 9092,
    },
    "staging": {
        "url": "staging.airt.ai",
        "description": "staging kafka broker",
        "port": 9092,
    },

In [None]:
# text_splitter = RecursiveCharacterTextSplitter(
#     chunk_size=5000,
#     chunk_overlap=0,
#     separators=["==== EXAMPLE SKELETON CODE ====","\n\n", "\n", "(?<=\. )", " ", ""]
# )
# chunks = text_splitter.split_documents(docs)
# chunks[0].page_content

In [None]:
from langchain.text_splitter import CharacterTextSplitter
text_splitter = CharacterTextSplitter(
    separator = "==== EXAMPLE SKELETON CODE ====",
    chunk_size = 2000,
    chunk_overlap  = 20
)

chunks = text_splitter.split_documents(docs)

chunks[0].page_content

"==== EXAMPLE APP DESCRIPTION ====\n\nCreate a FastKafka application using localhost broker for testing, staging.airt.ai for staging and prod.airt.ai for production. Use the default port number. It should consume messages from 'receive_name' topic and the message will be a JSON encoded object with only one attribute: user_name. For each consumed message, construct a new message object and append 'Hello ' in front of the name attribute. Finally, publish the consumed message to 'send_greetings' topic."

In [None]:
db = FAISS.from_documents(chunks, OpenAIEmbeddings()) # type: ignore
db.save_local("../tmp_db/faiss_index")

In [None]:
query = """
Develop a FastKafka application using localhost broker for testing, staging.example.ai for staging and prod.example.ai for production. It should consume messages from 'course_updates' topic where the message is a JSON encoded object including two attributes: course_name and new_content. If new_content attribute is set, then construct a new message appending 'Updated: ' before the course_name attribute. Finally, publish this message to the 'notify_updates' topic. The application should use SASL_SSL with SCRAM-SHA-512 for authentication.
"""



db = FAISS.load_local("../tmp_db/faiss_index", OpenAIEmbeddings()) # type: ignore
results = db.max_marginal_relevance_search(query, k=3, fetch_k=4)

In [None]:
results[2]

Document(page_content='==== EXAMPLE APP DESCRIPTION ====\n\nDevelop a new FastKafka application that consumes JSON-encoded objects from the "receive_order" topic. These objects include attributes like "name" and "quantity." Upon consumption, enhance the message by adding a "location" attribute set to "Zagreb." Subsequently, forward the modified message to the "place_order" topic. After this, send another message to the "update_inventory" topic. This message should include a "quantity" attribute that corresponds to the received quantity value. No authentication is required.', metadata={'source': '../prompts/examples/example-3.txt'})

In [None]:
r = db.similarity_search("",filter=dict(source=results[2].metadata["source"]))
r[0]

Document(page_content='from typing import *\nfrom pydantic import BaseModel, Field\nfrom fastkafka import FastKafka\n\n\nclass Order(BaseModel):\n    name: str = Field(..., description="Name of the order.")\n    quantity: int = Field(..., description="Quantity of the order.")\n    location: str = Field("Zagreb", description="Location of the order.")\n\nclass InventoryUpdate(BaseModel):\n    quantity: int = Field(..., description="Quantity of the order to update inventory.")\n\nkafka_brokers = {\n    "localhost": {\n        "url": "localhost",\n        "description": "local development Kafka broker",\n        "port": 9092,\n    },\n    "staging": {\n        "url": "staging.airt.ai",\n        "description": "staging Kafka broker",\n        "port": 9092,\n    },\n    "production": {\n        "url": "prod.airt.ai",\n        "description": "production Kafka broker",\n        "port": 9092,\n    }\n}\n\napp_description = "A FastKafka application that consumes JSON-encoded objects from the \'r

In [None]:
from langchain.embeddings import OpenAIEmbeddings
from langchain.text_splitter import RecursiveCharacterTextSplitter
from langchain.storage import InMemoryStore
from langchain.document_loaders import TextLoader
from langchain.vectorstores import FAISS

In [None]:
loader = DirectoryLoader(
    "../prompts/examples", glob="**/*.txt", loader_cls=TextLoader
)
docs = loader.load()
docs

[Document(page_content='==== EXAMPLE APP DESCRIPTION ====\n\nCreate a FastKafka application using localhost broker for testing, staging.airt.ai for staging and prod.airt.ai for production. Use the default port number. It should consume messages from \'receive_name\' topic and the message will be a JSON encoded object with only one attribute: user_name. For each consumed message, construct a new message object and append \'Hello \' in front of the name attribute. Finally, publish the consumed message to \'send_greetings\' topic.\n\n==== EXAMPLE SKELETON CODE ====\n\nfrom typing import *\nfrom pydantic import BaseModel, Field\nfrom fastkafka import FastKafka\n\n\nclass Greetings(BaseModel):\n    user_name: str = Field(..., description="Name of the user.")\n\nkafka_brokers = {\n    "localhost": {\n        "url": "localhost",\n        "description": "local development kafka broker",\n        "port": 9092,\n    },\n    "staging": {\n        "url": "staging.airt.ai",\n        "description": 

In [None]:
# This text splitter is used to create the child documents
child_splitter = RecursiveCharacterTextSplitter(chunk_size=400)

# The storage layer for the parent documents
store = InMemoryStore()
retriever = ParentDocumentRetriever(
    vectorstore=db, 
    docstore=store, 
    child_splitter=child_splitter,
)

In [None]:
retriever.add_documents(docs, ids=None)

In [None]:
retrieved_docs = retriever.get_relevant_documents("justice breyer")
len(retrieved_docs)

2