In [1]:
from langchain.agents.agent_types import AgentType
from langchain_experimental.agents.agent_toolkits import create_pandas_dataframe_agent
from langchain_openai import ChatOpenAI

from langchain import hub
from langchain.text_splitter import RecursiveCharacterTextSplitter
from langchain_community.document_loaders import WebBaseLoader
from langchain_community.vectorstores import LanceDB
from langchain_core.output_parsers import StrOutputParser
from langchain_core.runnables import RunnablePassthrough
from langchain_openai import ChatOpenAI, OpenAIEmbeddings
from langchain.prompts import PromptTemplate

import lancedb

from IPython.display import Markdown, display

## Utility Functions

In [19]:
from openai import OpenAI
PROMPT_TEMPLATE = PromptTemplate.from_template(
        """
        Take a look at the following source code and consider the description provided.
        Please provide a brief explanation of what the code does.        
        Extract any part of the context AS IS that is relevant to provide the explanation.                 

        It's safe to assume that the code is written in Java.

        > description:
        >>>
        {description}
        >>>
        
        > Source Code:
        >>>
        {code}
        >>>        
        """
    )

def interpret_code(description, code_example):
    message = PROMPT_TEMPLATE.format(description=description, code=code_example)
    client = OpenAI()    
    response = client.chat.completions.create(
            model="gpt-3.5-turbo-1106",
            messages=[
                {"role": "system", "content": "You are a service used to interpret code examples."},
                {"role": "user", "content": message}
            ]
        )    
    return response.choices[0].message.content


## Source data

In [3]:
import pandas as pd
from langchain_openai import OpenAI

df = pd.read_parquet('./data/parquet/sources-camel-kafka.parquet', engine='pyarrow')
df.head()

Unnamed: 0,project,path,language,tags,description,source
0,camel,/Users/opiske/code/java/camel/components/camel...,java,,"""Shows how to create a custom check that can d...","if (count.intValue() <= 1) {\n LOG.info(""Coun..."
1,camel,/Users/opiske/code/java/camel/components/camel...,java,,"""Shows how to build a Camel route can pause a ...",return new RouteBuilder(){\n @Override public...
2,camel,/Users/opiske/code/java/camel/components/camel...,java,,"""Shows to create an adapter that is run by Cam...",if (count.intValue() <= 1) {\n return true;\n...
3,camel,/Users/opiske/code/java/camel/components/camel...,java,,"""Shows to create a route that uses the resume ...",return new RouteBuilder(){\n @Override public...


In [4]:
from langchain_community.document_loaders import DataFrameLoader
loader = DataFrameLoader(df, page_content_column="source")
docs = loader.load()
print("Num docs: ", len(docs))

[print(d) for d in list(df["description"])]

Num docs:  4
"Shows how to create a custom check that can determine whether to pause or continue"
"Shows how to build a Camel route can pause a Kafka consumer when using the circuit breaker pattern"
"Shows to create an adapter that is run by Camel when the resume happens "
"Shows to create a route that uses the resume API"


[None, None, None, None]

In [20]:
# generate an additional description using the LLM

for index, row in df.iterrows():
    interpretation = interpret_code(row["description"], row["source"])
    print(f"{i}: {interpretation} \n")

3: This source code represents a custom check that determines whether to pause or continue based on the value of the "count" variable. If the count is equal to 1, the code logs a message and allows processing to proceed. If the count exceeds a defined threshold (SIMULATED_FAILURES), it again logs a message and allows processing to continue. If neither of these conditions is met, the code logs a message indicating that processing cannot proceed at the moment and returns false. 

3: This code snippet demonstrates the construction of a Camel route that leverages a circuit breaker to pause a Kafka consumer. The `CircuitBreaker` is configured with default settings and assigned the name "pausable". Event handlers are then attached to the circuit breaker to take action on successful or erroneous downstream calls.

The snippet further binds the circuit breaker to the Camel context and defines a route that listens to a Kafka topic. The route includes a pausable consumer, a KafkaConsumerListener

## Retriever setup

In [None]:
text_splitter = RecursiveCharacterTextSplitter(chunk_size=1000, chunk_overlap=200)
splits = text_splitter.split_documents(docs)
print(f"Created {len(splits)} splits")

# Use `description` as lancedb.embedding and `page_content` as lancedb.text
# this way the lookups will be performed against the description values, but still return the actual sources into the LLM context
embeddings = OpenAIEmbeddings()
db = lancedb.connect("/tmp/lancedb")
table = db.create_table(
    "camel",
    data=[
        {
            "vector": embeddings.embed_query("Hello World"),
            "text": "Hello World",
            "id": "1",
        }
    ],
    mode="overwrite",
)

# manually create the store so we get the embeddings right
lancedb_data = []
for i,doc in enumerate(splits):
    lancedb_data.append(
        {
            "vector": embeddings.embed_query(doc.metadata["description"]),
            "text": doc.metadata["description"] +"\n"+ doc.page_content,
            "id": i,
        }
    )
table.add(lancedb_data)

# create a vectorstore from the pre-populated table
vectorstore = LanceDB(table,embeddings)

# Retrieve and generate using the relevant snippets of the sources found in the parquet files.
retriever = vectorstore.as_retriever()
print(type(vectorstore))

Created 6 splits


In [None]:
# let's take a look at how the retriever actually works
matches = retriever.get_relevant_documents("Can you show me an example of using the resume API in Camel?")
[print(f"{i}:\n {m.page_content}") for i,m in enumerate(matches)]

## The actual Q&A involving the LLM

In [24]:
llm = ChatOpenAI(model_name="gpt-3.5-turbo-1106", temperature=0)

prompt = PromptTemplate.from_template("""
    You are an assistant for question-answering tasks. Use the following pieces of retrieved context to answer the question. 
    If you don't know the answer, just say that you don't know. 
    Please provide end-to-end examples in Java when applicable. 

    Question: {question} 

    Context: {context} 

    Answer:
""")

def format_docs(docs):      
    return "\n\n".join(doc.page_content for doc in docs)

rag_chain = (
    {"context": retriever | format_docs, "question": RunnablePassthrough()}
    | prompt
    | llm
    | StrOutputParser()
)

questions = [
    "Can you show me an example of using the resume API in Camel?",    
    "How do I determine whether to pause or continue a route? Do you have an example?",        
]

from langchain.globals import set_debug
set_debug(False)

display(Markdown("### Example Questions&Answers"))
for i,q in enumerate(questions):
    display(Markdown("#### "+q))
    result = rag_chain.invoke(q)    
    display(Markdown(result))    
    



### Example Questions&Answers

#### Can you show me an example of using the resume API in Camel?

I'm sorry, but I don't have enough information to provide a specific example of using the resume API in Camel based on the given context.

#### How do I determine whether to pause or continue a route? Do you have an example?

To determine whether to pause or continue a route, you can use a condition based on the count value. If the count is less than or equal to 1, or if it's greater than or equal to a predefined number of simulated failures, you can allow the processing to proceed. Otherwise, you can log that the processing cannot proceed at the moment and return false.

Here's an example in Java:

```java
public class RouteProcessor {
    private static final int SIMULATED_FAILURES = 5;

    public boolean canContinue(int count) {
        if (count <= 1) {
            System.out.println("Count is 1, allowing processing to proceed");
            return true;
        } else if (count >= SIMULATED_FAILURES) {
            System.out.println("Count is " + count + ", allowing processing to proceed because it's greater than retry count " + SIMULATED_FAILURES);
            return true;
        } else {
            System.out.println("Cannot proceed at the moment ... count is " + count);
            return false;
        }
    }

    public static void main(String[] args) {
        RouteProcessor processor = new RouteProcessor();
        int count = 3; // Replace with actual count value
        boolean canContinue = processor.canContinue(count);
        if (canContinue) {
            // Continue with the route
            System.out.println("Route processing continued");
        } else {
            // Pause the route
            System.out.println("Route processing paused");
        }
    }
}
```

In this example, the `canContinue` method takes the count as a parameter and determines whether to pause or continue the route based on the count value. The main method demonstrates how to use the `canContinue` method to make a decision on whether to pause or continue the route.