In [1]:
from langchain_community.document_loaders import UnstructuredMarkdownLoader
from langchain_core.documents import Document
import os


In [4]:
md_path = "services/stable-diffusion-2.md"
loader = UnstructuredMarkdownLoader(md_path)
data = loader.load()
assert len(data) == 1
assert isinstance(data[0], Document)
readme_content = data[0].page_content
print(readme_content[:250])

license: openrail++ tags: - stable-diffusion - text-to-image

Stable Diffusion v2 Model Card

This model card focuses on the model associated with the Stable Diffusion v2 model, available here.

This stable-diffusion-2 model is resumed from stable-di


In [14]:
# Under the hood, Unstructured creates different "elements" for different chunks of text

# loader = UnstructuredMarkdownLoader(md_path, mode="elements")

# data = loader.load()
# print(f"Number of documents: {len(data)}\n")

# for document in data[:2]:
#     print(f"{document}\n")

# print(set(document.metadata["category"] for document in data))

Number of documents: 92

page_content='license: openrail++ tags: - stable-diffusion - text-to-image' metadata={'source': '/workspaces/composition-blueprint-engine/vectorstore/services/stable-diffusion-2.md', 'category_depth': 0, 'last_modified': '2024-09-01T07:32:57', 'languages': ['eng'], 'filetype': 'text/markdown', 'file_directory': '/workspaces/composition-blueprint-engine/vectorstore/services', 'filename': 'stable-diffusion-2.md', 'category': 'Title', 'element_id': 'a006eaa8de45bf77f46abffe5820cae9'}

page_content='Stable Diffusion v2 Model Card' metadata={'source': '/workspaces/composition-blueprint-engine/vectorstore/services/stable-diffusion-2.md', 'category_depth': 0, 'last_modified': '2024-09-01T07:32:57', 'languages': ['eng'], 'filetype': 'text/markdown', 'file_directory': '/workspaces/composition-blueprint-engine/vectorstore/services', 'filename': 'stable-diffusion-2.md', 'category': 'Title', 'element_id': 'a54a75c9a9eb3c871babd089c89d4ec5'}

{'Title', 'NarrativeText', 'Unc

In [6]:
documents = []
md_dir = "services/"

for filename in os.listdir(md_dir):
    if filename.endswith(".md"):
        md_path = os.path.join(md_dir, filename)
        loader = UnstructuredMarkdownLoader(md_path)
        data = loader.load()
        if data and isinstance(data[0], Document):
            documents.append(data[0])

print(len(documents))

21


In [7]:
from langchain_community.vectorstores import Chroma
from langchain_openai import ChatOpenAI, OpenAIEmbeddings
from langchain_core.output_parsers import StrOutputParser
from langchain_core.runnables import RunnablePassthrough
from langchain import hub
from langchain_core.prompts import PromptTemplate

In [8]:
from dotenv import dotenv_values

config = dotenv_values("../.env")
os.environ["LANGCHAIN_TRACING_V2"] = "true"
os.environ["LANGCHAIN_ENDPOINT"] = "https://api.smith.langchain.com"
os.environ["LANGCHAIN_API_KEY"] = config["LANGCHAIN_API_KEY"]
os.environ["OPENAI_API_KEY"] = config["OPENAI_API_KEY"]

In [9]:
persist_directory = "./chroma_local_db"
vectorstore = Chroma.from_documents(
    documents=documents,
    embedding=OpenAIEmbeddings(),
    persist_directory=persist_directory,
)

In [10]:
retriever = vectorstore.as_retriever()
# Prompt
# prompt = hub.pull("rlm/rag-prompt")
template = ("You are an AI assistant, expert at requirement decomposition and service composition. " 
"You are provided with various services that might be fit for fulfilling the user's request. "
"Your job is to break down the user's request and select the appropriate services that will fulfill the decomposed tasks. "
"If there are not suitable services available to fit the user's requirements, say that it is not possible to do so. "
"You should give your answer in a structure json output with clear indication of tasks, selected services, and any dependencies (file, values) between these tasks. "
"{context}"
"Question: {question}"
"Helpful Answer: "
)

prompt = PromptTemplate.from_template(template)


# LLM
llm = ChatOpenAI(model_name="gpt-4o-mini-2024-07-18", temperature=0)


# Post-processing
def format_docs(docs):
    return "\n\n".join(doc.page_content for doc in docs)


# Chain
rag_chain = (
    {"context": retriever | format_docs, "question": RunnablePassthrough()}
    | prompt
    | llm
    | StrOutputParser()
)

# Question
rag_chain.invoke(
    "I have a turtlebot with low processing power that is taking low quality pictures. I want to find if there are any ambulances in the pictures. Assume the images are provided to you."
)

'```json\n{\n  "tasks": [\n    {\n      "task_id": 1,\n      "task_description": "Process the input images to prepare them for object detection.",\n      "dependencies": [],\n      "selected_service": "YOLOS (tiny-sized) model"\n    },\n    {\n      "task_id": 2,\n      "task_description": "Detect objects in the processed images to identify ambulances.",\n      "dependencies": [1],\n      "selected_service": "YOLOS (tiny-sized) model"\n    }\n  ],\n  "selected_services": {\n    "service_1": {\n      "name": "YOLOS (tiny-sized) model",\n      "description": "YOLOS model fine-tuned on COCO 2017 object detection."\n    },\n    "service_2": {\n      "name": "YOLOS (tiny-sized) model",\n      "description": "YOLOS model fine-tuned on COCO 2017 object detection."\n    }\n  },\n  "dependencies": {\n    "task_1": {\n      "input": "images",\n      "output": "processed_images"\n    },\n    "task_2": {\n      "input": "processed_images",\n      "output": "detected_objects"\n    }\n  }\n}\n```'