## Template application of LLM with Huggingface

In [None]:
import os
import requests
import json
import requests

## Downloading some files for the collection

In [None]:
import chromadb
import urllib.request

files_path = "./files/"

filenames = []

links = [
    "https://www.worldsquash.org/wp-content/uploads/2021/12/201127_Rules-of-Singles-Squash-2020-V2.pdf",
#    "https://www.tutorialspoint.com/squash/squash_tutorial.pdf",
    "http://aero-comlab.stanford.edu/jameson/squash_rackets.pdf",
]

In [None]:
#get or create folder 
if os.path.exists(files_path):
    print("existing folder")
else:
    os.mkdir(files_path)
    print("folder created")

In [None]:
for link in links:
    filename = link.split('/')[-1]
    filenames.append(filename)
    print(filename)
    urllib.request.urlretrieve(link, filename = files_path+filename)

## Custom Query Function for RAG extraction

In [None]:
from llama_index.core.query_engine import CustomQueryEngine
from llama_index.core.retrievers import BaseRetriever
from llama_index.core import get_response_synthesizer
from llama_index.core.response_synthesizers import BaseSynthesizer
from llama_index.core import VectorStoreIndex, SimpleDirectoryReader
from llama_index.core import PromptTemplate
from llama_index.embeddings.huggingface import HuggingFaceEmbedding
from llama_index.llms.langchain import LangChainLLM

In [None]:
# load documents
documents = SimpleDirectoryReader(files_path).load_data()

In [None]:
# loads BAAI/bge-small-en-v1.5
embed_model = HuggingFaceEmbedding(model_name="BAAI/bge-small-en-v1.5")

In [None]:
# Generate index from documents and define retriever for RAG function
index = VectorStoreIndex.from_documents(documents, embed_model = embed_model)
retriever = index.as_retriever()

In [None]:
# System Prompt for NousResearch - Mixtral 8x7B
qa_prompt = PromptTemplate(
    "<|im_start|>system \n"
    "Given the context information and not prior knowledge, "
    "answer the query.\n"
    "Context information is below.\n"
    "---------------------\n"
    "{context_str}\n"
    "---------------------\n"
    "<|im_start|>user \n "
    "Query: {query_str}\n"
    "<|im_end|> \n"
    "<|im_start|>assistant "
)


class RAGStringQueryEngine(CustomQueryEngine):
    """RAG String Query Engine."""

    retriever: BaseRetriever
    response_synthesizer: BaseSynthesizer
    llm: LangChainLLM
    qa_prompt: PromptTemplate

    def custom_query(self, query_str: str):
        nodes = self.retriever.retrieve(query_str)

        context_str = "\n\n".join([n.node.get_content() for n in nodes])
        response = self.llm.complete(
            qa_prompt.format(context_str=context_str, query_str=query_str)
        )

        return str(response)

## Testing it out

### Getting the LLM initiated

In [None]:
# get a token: https://huggingface.co/docs/api-inference/quicktour#get-your-api-token

from getpass import getpass

HUGGINGFACEHUB_API_TOKEN = getpass()
os.environ["HUGGINGFACEHUB_API_TOKEN"]=  HUGGINGFACEHUB_API_TOKEN

In [None]:
from langchain_community.llms import HuggingFaceHub

repo_id = "NousResearch/Nous-Hermes-2-Mixtral-8x7B-DPO"

llm = LangChainLLM(llm=HuggingFaceHub(repo_id=repo_id, model_kwargs={"temperature": 0.5, "max_new_tokens": 1024}))

In [None]:
synthesizer = get_response_synthesizer(response_mode="compact", llm = llm)

query_engine = RAGStringQueryEngine(
    retriever=retriever,
    response_synthesizer=synthesizer,
    llm=llm,
    qa_prompt=qa_prompt,
)

In [None]:
response = query_engine.query("how large is a single squash court supposed to be?")
#print(str(response))
print(response.response.split('<|im_start|>assistant ')[1])

In [None]:
print(str(response))