In [29]:
%%capture

# update or install the necessary libraries
!pip install python-dotenv openai langchain unstructured chromadb tiktoken github

In [6]:
import os
import openai  # for calling the OpenAI API
from dotenv import load_dotenv

load_dotenv()

# Load your API key from an environment variable or secret management service
openai.api_key = os.getenv("OPENAI_API_KEY")

# for LangChain
os.environ["OPENAI_API_KEY"] = os.getenv("OPENAI_API_KEY")

In [9]:
from langchain.document_loaders import UnstructuredMarkdownLoader
from langchain.text_splitter import MarkdownTextSplitter
from langchain.text_splitter import CharacterTextSplitter
import github

In [25]:
# https://python.langchain.com/en/latest/modules/indexes/document_loaders/examples/markdown.html
loader = UnstructuredMarkdownLoader("/data/serverless_workflow_specification.md")
markdown_text = loader.load()
markdown_text[0].page_content

'Serverless Workflow Specification\n\nTable of Contents\n\nAbstract\n\nStatus of this document\n\nOverview\n\nWhy we need a specification?\n\nFocus on standards\n\nProject Components\n\nSpecification Details\n\nCore Concepts\n\nWorkflow Definition\n\nWorkflow Instance\n\nWorkflow Model\n\nWorkflow Data\nWorkflow Data Input\nInformation Passing Between States\nWorkflow data output\nState data filters\nAction data filters\nEvent data filters\nUsing multiple data filters\nData Merging\n\nWorkflow Functions\nUsing Functions for RESTful Service Invocations\nUsing Functions for Async API Service Invocations\nUsing Functions for RPC Service Invocations\nUsing Functions for GraphQL Service Invocations\nInvoking a GraphQL Query\nInvoking a GraphQL Mutation\nUsing Functions for OData Service Invocations\nCreating an OData Function Definition\nInvoking an OData Function Definition\nUsing Functions for Expression Evaluation\nDefining custom function types\n\nWorkflow Expressions\n\nWorkflow Defini

In [26]:
markdown_splitter = MarkdownTextSplitter(chunk_size=200, chunk_overlap=0)
docs = markdown_splitter.create_documents([markdown_text[0].page_content])

for doc in docs:
    print(doc.page_content)

Serverless Workflow Specification

Table of Contents

Abstract

Status of this document

Overview

Why we need a specification?

Focus on standards

Project Components

Specification Details
Core Concepts

Workflow Definition

Workflow Instance

Workflow Model
Workflow Data
Workflow Data Input
Information Passing Between States
Workflow data output
State data filters
Action data filters
Event data filters
Using multiple data filters
Data Merging
Workflow Functions
Using Functions for RESTful Service Invocations
Using Functions for Async API Service Invocations
Using Functions for RPC Service Invocations
Using Functions for GraphQL Service Invocations
Invoking a GraphQL Query
Invoking a GraphQL Mutation
Using Functions for OData Service Invocations
Creating an OData Function Definition
Invoking an OData Function Definition
Using Functions for Expression Evaluation
Defining custom function types
Workflow Expressions
Workflow Definition Structure
Workflow States
Event State
Operation Stat

In [35]:
from langchain.embeddings import OpenAIEmbeddings
from langchain.vectorstores import Chroma

embeddings = OpenAIEmbeddings()
# create the vectorestore to use as the index
db = Chroma.from_documents(docs, embeddings)
db

Using embedded DuckDB without persistence: data will be transient


<langchain.vectorstores.chroma.Chroma at 0xffff6c3322c0>

In [37]:
from langchain.chains import RetrievalQA
from langchain.llms import OpenAI

# expose this index in a retriever interface
retriever = db.as_retriever(search_type="similarity", search_kwargs={"k":2})
# create a chain to answer questions 
qa = RetrievalQA.from_chain_type(
    llm=OpenAI(), chain_type="stuff", retriever=retriever, return_source_documents=True)

In [39]:
query = "How does operation works?"
result = qa({"query": query})
result

{'query': 'How does operation works?',
 'result': ' Operation defines a set of actions to be performed in sequence or in parallel, and once all actions have been performed, a transition to another state can occur.',
 'source_documents': [Document(page_content='Depending on the function type, the operation property can be:', metadata={}),
  Document(page_content='Operation state defines a set of actions to be performed in sequence or in parallel.\nOnce all actions have been performed, a transition to another state can occur.', metadata={})]}