In [1]:
from langchain.agents.agent_types import AgentType
from langchain_experimental.agents.agent_toolkits import create_pandas_dataframe_agent
from langchain_openai import ChatOpenAI

from langchain import hub
from langchain.text_splitter import RecursiveCharacterTextSplitter
from langchain_community.document_loaders import WebBaseLoader
from langchain_community.vectorstores import LanceDB
from langchain_core.output_parsers import StrOutputParser
from langchain_core.runnables import RunnablePassthrough
from langchain_openai import ChatOpenAI, OpenAIEmbeddings
from langchain.prompts import PromptTemplate

import lancedb

from IPython.display import Markdown, display

## Source data

In [33]:
import pandas as pd
from langchain_openai import OpenAI

df = pd.read_parquet('./data/parquet/sources-camel-kafka.parquet', engine='pyarrow')

# add some keywords to improve the lookup
keywords = [
    "custom check, pause, resume",
    "pause consumer, curcuit breaker, kafka",
    "resume adapter, resume API",
    "resume API"
]

df.insert(4, "keywords", keywords, True)

df.head()

Unnamed: 0,project,path,language,tags,keywords,description,source
0,camel,/Users/opiske/code/java/camel/components/camel...,java,,"custom check, pause, resume","""Shows how to create a custom check that can d...","if (count.intValue() <= 1) {\n LOG.info(""Coun..."
1,camel,/Users/opiske/code/java/camel/components/camel...,java,,"pause consumer, curcuit breaker, kafka","""Shows how to build a Camel route can pause a ...",return new RouteBuilder(){\n @Override public...
2,camel,/Users/opiske/code/java/camel/components/camel...,java,,"resume adapter, resume API","""Shows to create an adapter that is run by Cam...",if (count.intValue() <= 1) {\n return true;\n...
3,camel,/Users/opiske/code/java/camel/components/camel...,java,,resume API,"""Shows to create a route that uses the resume ...",return new RouteBuilder(){\n @Override public...


In [34]:
from langchain_community.document_loaders import DataFrameLoader
loader = DataFrameLoader(df, page_content_column="source")
docs = loader.load()
print("Num docs: ", len(docs))

[print(d) for d in list(df["description"])]


Num docs:  4
"Shows how to create a custom check that can determine whether to pause or continue"
"Shows how to build a Camel route can pause a Kafka consumer when using the circuit breaker pattern"
"Shows to create an adapter that is run by Camel when the resume happens "
"Shows to create a route that uses the resume API"


[None, None, None, None]

## Retriever setup

In [35]:
text_splitter = RecursiveCharacterTextSplitter(chunk_size=1000, chunk_overlap=200)
splits = text_splitter.split_documents(docs)
print(f"Created {len(splits)} splits")

# Use `description` as lancedb.embedding and `page_content` as lancedb.text
# this way the lookups will be performed against the description values, but still return the actual sources into the LLM context
embeddings = OpenAIEmbeddings()
db = lancedb.connect("/tmp/lancedb")
table = db.create_table(
    "camel",
    data=[
        {
            "vector": embeddings.embed_query("Hello World"),
            "text": "Hello World",
            "id": "1",
        }
    ],
    mode="overwrite",
)

# manually create the store so we get the embeddings right
lancedb_data = []
for i,doc in enumerate(splits):
    lancedb_data.append(
        {
            "vector": embeddings.embed_query(doc.metadata["keywords"] + " "+ doc.metadata["description"]),
            "text": doc.metadata["description"] +"\n"+ doc.page_content,
            "id": i,
        }
    )
table.add(lancedb_data)

# create a vectorstore from the pre-populated table
vectorstore = LanceDB(table,embeddings)

# Retrieve and generate using the relevant snippets of the sources found in the parquet files.
retriever = vectorstore.as_retriever()
print(type(vectorstore))

Created 6 splits
<class 'langchain_community.vectorstores.lancedb.LanceDB'>


In [None]:
# let's take a look at how the retriever actually works
matches = retriever.get_relevant_documents("Can you show me an example of using the resume API in Camel?")
[print(f"{i}:\n {m.page_content}") for i,m in enumerate(matches)]

## The actual Q&A involving the LLM

In [36]:
llm = ChatOpenAI(model_name="gpt-3.5-turbo-1106", temperature=0)

prompt = PromptTemplate.from_template("""
    You are an assistant for question-answering tasks. Use the following pieces of retrieved context to answer the question. 
    If you don't know the answer, just say that you don't know. 
    Please provide end-to-end examples in Java when applicable. 

    Question: {question} 

    Context: {context} 

    Answer:
""")

def format_docs(docs):      
    return "\n\n".join(doc.page_content for doc in docs)

rag_chain = (
    {"context": retriever | format_docs, "question": RunnablePassthrough()}
    | prompt
    | llm
    | StrOutputParser()
)

questions = [
    "Can you show me an example of using the resume API in Camel?",    
    "How do I determine whether to pause or continue a route? Do you have an example?",        
]

from langchain.globals import set_debug
set_debug(False)

display(Markdown("### Example Questions&Answers"))
for i,q in enumerate(questions):
    display(Markdown("#### "+q))
    result = rag_chain.invoke(q)    
    display(Markdown(result))    
    



### Example Questions&Answers

#### Can you show me an example of using the resume API in Camel?

Here's an example of using the resume API in Camel:

```java
import org.apache.camel.builder.RouteBuilder;

public class ResumeExampleRoute extends RouteBuilder {
    private static final int RETRY_COUNT = 3;

    @Override
    public void configure() throws Exception {
        from("kafka:" + SOURCE_TOPIC + "?groupId=KafkaPausableConsumerIT&autoOffsetReset=earliest&keyDeserializer=org.apache.kafka.common.serialization.StringDeserializer" + "&valueDeserializer=org.apache.kafka.common.serialization.StringDeserializer" + "&autoCommitIntervalMs=1000&pollTimeoutMs=1000&autoCommitEnable=true&interceptorClasses=org.apache.camel.component.kafka.MockConsumerInterceptor")
                .pausable(testConsumerListener, o -> canContinue())
                .routeId("pausable-it")
                .process(exchange -> LOG.info("Got record from Kafka: {}", exchange.getMessage().getBody()))
                .to("direct:intermediate");

        from("direct:intermediate")
                .process(exchange -> {
                    LOG.info("Got record on the intermediate processor: {}", exchange.getMessage().getBody());
                    if (getCount() <= RETRY_COUNT) {
                        throw new RuntimeCamelException("Error");
                    }
                })
                .to(KafkaTestUtil.MOCK_RESULT);
    }
}
```

This example demonstrates how to create a Camel route that uses the resume API to pause a Kafka consumer when using the circuit breaker pattern. The `pausable` method is used to pause the consumer, and the `circuitBreaker` method is used to implement the circuit breaker pattern. The `canContinue` method is used to determine if the consumer can resume processing.

#### How do I determine whether to pause or continue a route? Do you have an example?

To determine whether to pause or continue a route, you can use a custom check or the circuit breaker pattern. 

Here's an example using a custom check:

```java
Integer count = getCount(); // Assuming getCount() is a method that retrieves the count
if (count.intValue() <= 1) {
  LOG.info("Count is 1, allowing processing to proceed");
  return true;
}
if (count.intValue() >= SIMULATED_FAILURES) {
  LOG.info("Count is {}, allowing processing to proceed because it's greater than retry count {}",count.intValue(),SIMULATED_FAILURES);
  return true;
}
LOG.info("Cannot proceed at the moment ... count is {}",count.intValue());
return false;
```

And here's an example using the circuit breaker pattern in a Camel route:

```java
CircuitBreaker circuitBreaker = CircuitBreaker.ofDefaults("pausable");
circuitBreaker.getEventPublisher().onSuccess(event -> {
  LOG.info("Downstream call succeeded");
  // Additional logic for success
}).onError(event -> {
  LOG.info("Downstream call error. Starting a thread to simulate checking for the downstream availability");
  // Additional logic for error
});
getCamelContext().getRegistry().bind("pausableCircuit", circuitBreaker);

from("kafka:" + SOURCE_TOPIC + "?groupId=KafkaPausableConsumerCircuitBreakerIT&autoOffsetReset=earliest&keyDeserializer=org.apache.kafka.common.serialization.StringDeserializer"+ "&valueDeserializer=org.apache.kafka.common.serialization.StringDeserializer"+ "&autoCommitIntervalMs=1000&pollTimeoutMs=1000&autoCommitEnable=true&interceptorClasses=org.apache.camel.component.kafka.MockConsumerInterceptor").pausable(new KafkaConsumerListener(), o -> canContinue()).routeId("pausable-it").process(exchange -> LOG.info("Got record from Kafka: {}",exchange.getMessage().getBody())).circuitBreaker().resilience4jConfiguration().circuitBreaker("pausableCircuit").end().to("direct:intermediate");

from("direct:intermediate").process(exchange -> {
  LOG.info("Got record on the intermediate processor: {}",exchange.getMessage().getBody());
  if (getCount() <= SIMULATED_FAILURES) {
    throw new RuntimeCamelException("Error");
  }
}).to(KafkaTestUtil.MOCK_RESULT).end();
```

These examples demonstrate how to determine whether to pause or continue a route using a custom check and the circuit breaker pattern in Java with Apache Camel.