In [1]:
from langchain.vectorstores import Chroma
from langchain.embeddings import OpenAIEmbeddings
from langchain.text_splitter import RecursiveCharacterTextSplitter
from langchain import HuggingFaceHub
from langchain.chains import RetrievalQA
from langchain.document_loaders import TextLoader
import os 
from dotenv import load_dotenv
from time import time
import warnings
warnings.filterwarnings('ignore')

In [2]:
loader = TextLoader('Piano Tuning.txt', encoding='utf-8')
documents = loader.load()

In [3]:
text_splitter = RecursiveCharacterTextSplitter(chunk_size=200, chunk_overlap=50)
texts = text_splitter.split_documents(documents)

In [4]:
len(texts)

1751

In [5]:
texts[10]

Document(page_content='sections have been omitted from the present edition because they were\nout-of-date: Practical Application of Piano Tuning as a Profession,', metadata={'source': 'Piano Tuning.txt'})

In [6]:
_ = load_dotenv()

HUGGINGFACEHUB_API_TOKEN = os.environ["HUGGINGFACEHUB_API_TOKEN"]

llm=HuggingFaceHub(
    repo_id="HuggingFaceH4/zephyr-7b-beta", 
    model_kwargs={"temperature":0.2, "max_length":256},
    huggingfacehub_api_token=HUGGINGFACEHUB_API_TOKEN
    )

In [7]:
from langchain.embeddings import HuggingFaceInstructEmbeddings

instructor_embeddings = HuggingFaceInstructEmbeddings(model_name="BAAI/bge-base-en-v1.5",
                                                      model_kwargs={"device": "cuda"})

load INSTRUCTOR_Transformer
max_seq_length  512


In [8]:
%%time
persist_directory = 'db_HuggingFace'

embedding = instructor_embeddings

vectordb = Chroma.from_documents(documents=texts,
                                 embedding=embedding,
                                 persist_directory=persist_directory)

CPU times: total: 6.41 s
Wall time: 10.7 s


In [9]:
retriever = vectordb.as_retriever(search_kwargs={"k": 2})
docs = retriever.get_relevant_documents("Piano Tuning.txt')")

In [10]:
len(docs)

2

In [11]:
qa_chain = RetrievalQA.from_chain_type(llm=llm,
                                  chain_type="stuff",
                                  retriever=retriever,
                                  return_source_documents=True)

In [12]:
def process_llm_response(qa_chain, query):
    print(f"Query: {query}\n")
    time_1 = time()
    llm_response = qa_chain(query)
    time_2 = time()
    print(f"Inference time: {round(time_2-time_1, 3)} sec.")
    print(f"\nResult:", llm_response['result'])
    print(f"\nmetadata:", llm_response['source_documents'][0].metadata)

In [13]:
query = "What is the steps of piano tunning?"
process_llm_response(qa_chain, query)

Query: What is the steps of piano tunning?

Inference time: 6.077 sec.

Result:  The steps of piano tuning involve adjusting the tension of the piano strings to produce the desired pitch. This is done using a tuning hammer and a tuning fork, which provides a reference tone. The process involves tuning each note in the piano to the correct pitch, as well as adjusting the relationship between notes to ensure they are in tune with each other. The tuner may also make minor adjustments to the action and keyboard to improve the overall performance of the piano. The process can take several hours for a large piano, and requires a high level of skill and attention to detail.

metadata: {'source': 'Piano Tuning.txt'}


In [14]:
query = "What is the general construction of the piano?"
process_llm_response(qa_chain, query)

Query: What is the general construction of the piano?

Inference time: 5.024 sec.

Result:  The piano is a musical instrument that consists of a large wooden cabinet with a row of keys on the front. Inside the cabinet, there are strings that are struck by hammers when the keys are pressed. The strings then vibrate, producing sound that is amplified by the wooden body of the piano. The piano also has pedals that can be pressed to sustain or dampen the sound. The overall design and construction of the piano has evolved over time, with improvements in materials and technology leading to more sophisticated and complex instruments.

metadata: {'source': 'Piano Tuning.txt'}


In [15]:
query = "Name all the defects to which the key is subject"
process_llm_response(qa_chain, query)
#wrong metadata

Query: Name all the defects to which the key is subject

Inference time: 0.351 sec.

Result:  The key is subject to defects such as incompleteness and inaccuracy, as stated in the provided context.

metadata: {'source': 'New Vegetarian Dishes.txt'}


In [16]:
query = "Give 1 Technical Names and Uses of the Parts of the Upright Action? "
process_llm_response(qa_chain, query)

Query: Give 1 Technical Names and Uses of the Parts of the Upright Action? 

Inference time: 2.215 sec.

Result:  One technical name and use of a part of the upright action is the hammer, which strikes the strings when the key is pressed, producing sound.

metadata: {'source': 'Piano Tuning.txt'}


In [None]:
query = "How to repair faults in a piano aside from the action?"
process_llm_response(qa_chain, query)

Query: How to repair faults in a piano aside from the action?



In [None]:
query = "1 example of Technique or Modus Operandi in Piano Tuning"
process_llm_response(qa_chain, query)

In [None]:
query = "How to clean the piano?"
process_llm_response(qa_chain, query)

In [None]:
query = "How to fix if there is a leak is found in the air boards?"
process_llm_response(qa_chain, query)

In [None]:
query = "When a key snaps or clicks at the instant it is let up, give two conditions that might cause it"
process_llm_response(qa_chain, query)

In [None]:
query = "Give two causes for defective damping in a square piano"
process_llm_response(qa_chain, query)

In [None]:
qa_chain.retriever.search_type , qa_chain.retriever.vectorstore

In [None]:
print(qa_chain.combine_documents_chain.llm_chain.prompt.template)