In [1]:
from langchain.vectorstores import Chroma
from langchain.embeddings import OpenAIEmbeddings
from langchain.text_splitter import RecursiveCharacterTextSplitter
from langchain import HuggingFaceHub
from langchain.chains import RetrievalQA
from langchain.document_loaders import TextLoader
import os 
from dotenv import load_dotenv
from time import time
import warnings
warnings.filterwarnings('ignore')

In [2]:
loader = TextLoader('Piano Tuning.txt', encoding='utf-8')
documents = loader.load()

In [3]:
text_splitter = RecursiveCharacterTextSplitter(chunk_size=2000, chunk_overlap=400)
texts = text_splitter.split_documents(documents)

In [4]:
len(texts)

154

In [5]:
texts[10]

Document(page_content='History gives credit to Italy for the first productions of this kind,\nabout 1600 A.D., when the faculty of music was beginning to manifest\nitself more boldly. Scientists saw that wonderful developments were\npossible, and we have reason to believe that experiments were made in\nEngland, France, Germany and all civilized countries about this time,\nfor the production of the instrument which we call, in this day, a\nPianoforte. (_Piano e forte_: soft and loud.)\n\nAt this time communication between the different countries was, of\ncourse, slow and uncertain, and experiments of this kind were probably\nunknown outside of the immediate neighborhood in which they were\ntried; therefore, much valuable and interesting history has not come\nto light. However, from the specimens which we have had the pleasure\nof seeing, and some of which we have had the opportunity to work on,\nwe infer that about the same line of difficulties presented themselves\nto all of these earl

In [None]:
_ = load_dotenv()

HUGGINGFACEHUB_API_TOKEN = os.environ["HUGGINGFACEHUB_API_TOKEN"]

llm=HuggingFaceHub(
    repo_id="HuggingFaceH4/zephyr-7b-beta", 
    model_kwargs={"temperature":0.2, "max_length":256},
    huggingfacehub_api_token=HUGGINGFACEHUB_API_TOKEN
    )

In [None]:
from langchain.embeddings import HuggingFaceInstructEmbeddings

instructor_embeddings = HuggingFaceInstructEmbeddings(model_name="BAAI/bge-base-en-v1.5",
                                                      model_kwargs={"device": "cuda"})

In [None]:
%%time
persist_directory = 'db_HuggingFace'

embedding = instructor_embeddings

vectordb = Chroma.from_documents(documents=texts,
                                 embedding=embedding,
                                 persist_directory=persist_directory)

In [None]:
retriever = vectordb.as_retriever(search_kwargs={"k": 2})
docs = retriever.get_relevant_documents("Piano Tuning.txt')")

In [None]:
len(docs)

In [None]:
qa_chain = RetrievalQA.from_chain_type(llm=llm,
                                  chain_type="stuff",
                                  retriever=retriever,
                                  return_source_documents=True)

In [None]:
def process_llm_response(qa_chain, query):
    print(f"Query: {query}\n")
    time_1 = time()
    llm_response = qa_chain(query)
    time_2 = time()
    print(f"Inference time: {round(time_2-time_1, 3)} sec.")
    print(f"\nResult:", llm_response['result'])
    print(f"\nmetadata:", llm_response['source_documents'][0].metadata)

In [None]:
query = "What is the steps of piano tunning?"
process_llm_response(qa_chain, query)

In [None]:
query = "What is the general construction of the piano?"
process_llm_response(qa_chain, query)

In [None]:
query = "Name all the defects to which the key is subject"
process_llm_response(qa_chain, query)

In [None]:
query = "Give 1 Technical Names and Uses of the Parts of the Upright Action? "
process_llm_response(qa_chain, query)

In [None]:
query = "How to repair faults in a piano aside from the action?"
process_llm_response(qa_chain, query)

In [None]:
query = "1 example of Technique or Modus Operandi in Piano Tuning"
process_llm_response(qa_chain, query)

In [None]:
query = "How to clean the piano?"
process_llm_response(qa_chain, query)
#wrong metadata

In [None]:
query = "How to fix if there is a leak is found in the air boards?"
process_llm_response(qa_chain, query)

In [None]:
query = "When a key snaps or clicks at the instant it is let up, give two conditions that might cause it"
process_llm_response(qa_chain, query)

In [None]:
query = "Give two causes for defective damping in a square piano"
process_llm_response(qa_chain, query)

In [None]:
qa_chain.retriever.search_type , qa_chain.retriever.vectorstore

In [None]:
print(qa_chain.combine_documents_chain.llm_chain.prompt.template)