In [29]:
from langchain.document_loaders import PyPDFLoader, DirectoryLoader, PDFMinerLoader 
from langchain.text_splitter import RecursiveCharacterTextSplitter 
from langchain.embeddings import SentenceTransformerEmbeddings 
from langchain.vectorstores import Chroma 
from langchain.llms import HuggingFacePipeline
from langchain.chains import RetrievalQA 
from constants import CHROMA_SETTINGS
#from streamlit_chat import message
from transformers import AutoTokenizer, AutoModelForSeq2SeqLM 
from transformers import pipeline

checkpoint = "models/LaMini-T5-738M"
tokenizer = AutoTokenizer.from_pretrained(checkpoint)
base_model = AutoModelForSeq2SeqLM.from_pretrained(
    checkpoint)


In [34]:
def llm_pipeline():
    pipe = pipeline(
        'text2text-generation',
        model = base_model,
        tokenizer = tokenizer,
        max_length = 256,
        do_sample = True,
        temperature = 0.3,
        top_p= 0.95
        
    )
    local_llm = HuggingFacePipeline(pipeline=pipe)
    return local_llm

In [35]:
def qa_llm():
    llm = llm_pipeline()
    embeddings = SentenceTransformerEmbeddings(model_name="models/all-MiniLM-L6-v2")
    db = Chroma(persist_directory="db", embedding_function = embeddings, collection_name='VectorDB')
    retriever = db.as_retriever()
    qa = RetrievalQA.from_chain_type(
        llm = llm,
        chain_type = "stuff",
        retriever = retriever,
        return_source_documents=True
    )
    return qa

In [36]:
def process_answer(instruction):
    response = ''
    instruction = instruction
    qa = qa_llm()
    generated_text = qa(instruction)
    answer = generated_text['result']
    return answer

In [38]:
process_answer("what is the minimum storypoint for bumblebee pod")

'The minimum storypoint for Bumblebee Pod is 1.'