<a href="https://colab.research.google.com/github/hanhanwu/Hanhan_LangGraph_Exercise/blob/main/scaling/try_ray_serve.ipynb" target="_parent"><img src="https://colab.research.google.com/assets/colab-badge.svg" alt="Open In Colab"/></a>

## About

* I'm trying to apply Ray Serve on retrieving here to see whether it can save more time, but seems not
* Lessons Learned
  * The function has to be named as `__call__()`, otherwise will get 500 error
  * `async` is needed for the function if there is `await` in the logic
  * The output can't be customized object, for example, it can output the retriever.weights but can't output retriever, because retriever is an object of `EnsembleRetriever`
* More examples of using Ray, check # check examples here: https://www.ray.io/

In [1]:
%%capture --no-stderr
%pip install -U --quiet langchain-community tiktoken langchain-openai langchainhub chromadb langchain langgraph langchain-text-splitters playwright unstructured
!playwright install
%pip install -U --quiet rank_bm25 faiss-cpu
%pip install "ray[serve]"

In [2]:
from google.colab import userdata

# load the environment variables set in colab
OPENAI_API_KEY = userdata.get('OPENAI_API_KEY')
LANGSMITH_API_KEY = userdata.get('LANGSMITH_API_KEY')  # used to pull rlm/rag-prompt

In [3]:
import time
import requests
from starlette.requests import Request
from typing import Dict

from langchain_community.document_loaders import PlaywrightURLLoader
from langchain_community.vectorstores import Chroma
from langchain.retrievers import EnsembleRetriever
from langchain_community.retrievers import BM25Retriever
from langchain_community.vectorstores import FAISS
from langchain_openai import OpenAIEmbeddings
from langchain_text_splitters import RecursiveCharacterTextSplitter
from langchain.tools.retriever import create_retriever_tool

from ray import serve

import nltk
nltk_resources = [
    'averaged_perceptron_tagger_eng',
    'wordnet',
    'stopwords',
    'punkt_tab'
]
for resource in nltk_resources:
    try:
        nltk.download(resource)
    except Exception as e:
        print(f"Error downloading {resource}: {e}")

[nltk_data] Downloading package averaged_perceptron_tagger_eng to
[nltk_data]     /root/nltk_data...
[nltk_data]   Unzipping taggers/averaged_perceptron_tagger_eng.zip.
[nltk_data] Downloading package wordnet to /root/nltk_data...
[nltk_data] Downloading package stopwords to /root/nltk_data...
[nltk_data]   Unzipping corpora/stopwords.zip.
[nltk_data] Downloading package punkt_tab to /root/nltk_data...
[nltk_data]   Unzipping tokenizers/punkt_tab.zip.


In [4]:
start_time = time.time()

urls = [
   "https://langchain-ai.github.io/langgraph/tutorials/introduction/",
]

loader = PlaywrightURLLoader(urls=urls, remove_selectors=["header", "footer"])
docs = await loader.aload()  # returns "Document" type

text_splitter = RecursiveCharacterTextSplitter.from_tiktoken_encoder(
    chunk_size=500, chunk_overlap=200
)
doc_splits = text_splitter.split_documents(docs)  # split into chunks with overlap

# choose retriever type based on the number of chunks
chunks_ct = len(doc_splits)
if chunks_ct < 30:
  print(chunks_ct, 'choose vectorstore based retriever')
  # use Vectorstore-backed retriever (the simplest retriever in LangChain)
  vectorstore = Chroma.from_documents(
      documents=doc_splits,
      collection_name="rag-chroma",
      embedding=OpenAIEmbeddings(api_key=OPENAI_API_KEY),
  )
  retriever = vectorstore.as_retriever()
else:
  print(chunks_ct, 'choose ensemble retriever')
  # use emsemble retriever
  # initialize the bm25 retriever and faiss retriever
  bm25_retriever = BM25Retriever.from_texts(
      [doc.page_content for doc in doc_splits], metadatas=[{"source": 1}] * len(doc_splits)
  )
  bm25_retriever.k = 2
  embedding = OpenAIEmbeddings(api_key=OPENAI_API_KEY)
  faiss_vectorstore = FAISS.from_texts(
      [doc.page_content for doc in doc_splits], embedding, metadatas=[{"source": 2}] * len(doc_splits)
  )
  faiss_retriever = faiss_vectorstore.as_retriever(search_kwargs={"k": 2})
  # initialize the ensemble retriever
  retriever = EnsembleRetriever(
      retrievers=[bm25_retriever, faiss_retriever], weights=[0.5, 0.5]
  )

end_time = time.time()
running_time = round(end_time - start_time, 4)
print(f"Running time: {running_time} seconds")

60 choose ensemble retriever
Running time: 13.4381 seconds


In [5]:
retriever

EnsembleRetriever(retrievers=[BM25Retriever(vectorizer=<rank_bm25.BM25Okapi object at 0x7e22ab858f90>, k=2), VectorStoreRetriever(tags=['FAISS', 'OpenAIEmbeddings'], vectorstore=<langchain_community.vectorstores.faiss.FAISS object at 0x7e22d90da950>, search_kwargs={'k': 2})], weights=[0.5, 0.5])

In [6]:
start_time = time.time()

@serve.deployment
class MyAppDeployment:
    def __init__(self, urls: list[str],
                       openai_api_key: str):
        self.urls = urls
        self.openai_api_key = openai_api_key

    # the function has to be named as "__call__"
    async def __call__(self, request: Request):
      loader = PlaywrightURLLoader(urls=urls, remove_selectors=["header", "footer"])
      docs = await loader.aload()  # returns "Document" type

      text_splitter = RecursiveCharacterTextSplitter.from_tiktoken_encoder(
            chunk_size=500, chunk_overlap=200
        )
      doc_splits = text_splitter.split_documents(docs)  # split into chunks with overlap

      # choose retriever type based on the number of chunks
      chunks_ct = len(doc_splits)
      if chunks_ct < 30:
        print(chunks_ct, 'choose vectorstore based retriever')
          # use Vectorstore-backed retriever (the simplest retriever in LangChain)
        vectorstore = Chroma.from_documents(
              documents=doc_splits,
              collection_name="rag-chroma",
              embedding=OpenAIEmbeddings(api_key=self.openai_api_key),
          )
        retriever = vectorstore.as_retriever()
      else:
        print(chunks_ct, 'choose ensemble retriever')
        # use emsemble retriever
        # initialize the bm25 retriever and faiss retriever
        bm25_retriever = BM25Retriever.from_texts(
              [doc.page_content for doc in doc_splits], metadatas=[{"source": 1}] * len(doc_splits)
          )
        bm25_retriever.k = 2
        embedding = OpenAIEmbeddings(api_key=self.openai_api_key)
        faiss_vectorstore = FAISS.from_texts(
              [doc.page_content for doc in doc_splits], embedding, metadatas=[{"source": 2}] * len(doc_splits)
          )
        faiss_retriever = faiss_vectorstore.as_retriever(search_kwargs={"k": 2})
          # initialize the ensemble retriever
        retriever = EnsembleRetriever(
              retrievers=[bm25_retriever, faiss_retriever], weights=[0.5, 0.5]
          )

      return retriever.weights


urls = [
   "https://langchain-ai.github.io/langgraph/tutorials/introduction/",
]
app = MyAppDeployment.bind(urls, OPENAI_API_KEY)
serve.run(app, route_prefix="/")
try:
    response = requests.get("http://localhost:8000/")
    # Check if the request was successful
    response.raise_for_status()
    print(response.json())
except requests.exceptions.RequestException as e:
    print(f"Error fetching data: {e}")


end_time = time.time()
running_time = round(end_time - start_time, 4)
print(f"Running time: {running_time} seconds")

2025-02-15 03:28:48,256	INFO worker.py:1832 -- Started a local Ray instance. View the dashboard at [1m[32mhttp://127.0.0.1:8265 [39m[22m
[36m(ProxyActor pid=3329)[0m INFO 2025-02-15 03:28:59,171 proxy 172.28.0.12 -- Proxy starting on node 7e3c71b0a6ac3b61c02d304a3f648f412fbceaa6ce449036190b6186 (HTTP port: 8000).
[36m(ProxyActor pid=3329)[0m INFO 2025-02-15 03:28:59,257 proxy 172.28.0.12 -- Got updated endpoints: {}.
INFO 2025-02-15 03:28:59,359 serve 509 -- Started Serve in namespace "serve".
[36m(ServeController pid=3328)[0m INFO 2025-02-15 03:28:59,437 controller 3328 -- Deploying new version of Deployment(name='MyAppDeployment', app='default') (initial target replicas: 1).
[36m(ProxyActor pid=3329)[0m INFO 2025-02-15 03:28:59,440 proxy 172.28.0.12 -- Got updated endpoints: {Deployment(name='MyAppDeployment', app='default'): EndpointInfo(route='/', app_is_cross_language=False)}.
[36m(ProxyActor pid=3329)[0m INFO 2025-02-15 03:28:59,451 proxy 172.28.0.12 -- Started <ray

[36m(ServeReplica:default:MyAppDeployment pid=3505)[0m 60 choose ensemble retriever
[0.5, 0.5]
Running time: 33.6831 seconds
