# Astrophysics Chat Application 

In [1]:
from ssec_tutorials import OLMO_MODEL

In [2]:
from pathlib import Path
from qdrant_client import QdrantClient
from uuid import uuid4

In [3]:
import panel as pn
import os
from langchain.chains.combine_documents import create_stuff_documents_chain
from langchain.chains.retrieval import create_retrieval_chain
from langchain.llms import LlamaCpp
from langchain.schema.runnable import RunnablePassthrough
from langchain_core.callbacks import CallbackManager
from langchain_core.prompts import PromptTemplate
from langchain_community.vectorstores import Qdrant
from langchain.embeddings import HuggingFaceEmbeddings

In [4]:
repo_root = Path("../../resources/data/").resolve()

In [5]:
repo_root

PosixPath('/Users/a42/Code/uw-ssec/tutorials/resources/data')

In [6]:
qdrant_path = repo_root / "qdrant/scipy_qdrant/"
qdrant_collection = "arxiv_astro-ph_abstracts_astropy_github_documentation"

In [7]:
embedding = HuggingFaceEmbeddings(model_name="sentence-transformers/all-MiniLM-L12-v2")

In [8]:
pn.extension()

In [9]:
assert os.path.exists(qdrant_path)

In [10]:
model_path = OLMO_MODEL

In [11]:
@pn.cache
def get_vector_store():
    # If the Qdrant Vector Database Collection already exists, load it
    client = QdrantClient(path=str(qdrant_path))
    db = Qdrant(
        client=client,
        collection_name=qdrant_collection,
        embeddings=embedding
    )
    return db

In [12]:
db = get_vector_store()

In [13]:
db

<langchain_community.vectorstores.qdrant.Qdrant at 0x10ca693d0>

In [53]:
def get_chain(callbacks):
    retriever = db.as_retriever(callbacks=callbacks, search_type="mmr", search_kwargs={"k": 2})
    
    # Callbacks support token-wise streaming
    callback_manager = CallbackManager(callbacks)
    olmo = LlamaCpp(
        model_path=str(model_path),
        callback_manager=callback_manager,
        temperature=0.8,
        n_ctx=4096,
        max_tokens=12,
        verbose=True,
        echo=False
    )
    prompt_template = PromptTemplate.from_template(
        template=olmo.client.metadata['tokenizer.chat_template'],
        template_format="jinja2",
        partial_variables={"add_generation_prompt": True, "eos_token": "<|endoftext|>"},
    )

    transformed_prompt_template = PromptTemplate.from_template(
        prompt_template.partial(
            messages=[
                {
                    "role": "user", 
                    "content": "You are an astrophysics expert. Please answer the question on astrophysics based on the following context. \
                                Context: {context} \
                                Question: {input}"
                }
            ]
        ).format()
    )

    document_chain = create_stuff_documents_chain(
        llm=olmo, 
        prompt=transformed_prompt_template
    )

    retrieval_chain = create_retrieval_chain(retriever, document_chain)

    return retrieval_chain

In [51]:
async def callback(contents, user, instance):
    callback_handler = pn.chat.langchain.PanelCallbackHandler(instance, user='OLMo', avatar='🌳')
    # Not return the result at the end of the generation
    # this prevents the model from repeating the result
    callback_handler.on_llm_end = lambda response, *args, **kwargs: None
    chain = get_chain(callbacks=[callback_handler])
    response = await chain.ainvoke({"input": contents})
    return response["context"]

In [52]:
pn.chat.ChatInterface(callback=callback, callback_exception='verbose').servable()