In [20]:
import os
from dotenv import load_dotenv

load_dotenv()
huggingfacehub_api_key= os.getenv('HUGGINGFACEHUB_API_TOKEN')
openai_api_key = os.getenv('OPENAI_API_KEY')

In [32]:
from langchain.chains import RetrievalQA
from langchain.embeddings import HuggingFaceEmbeddings
from langchain.text_splitter import RecursiveCharacterTextSplitter
from langchain.vectorstores import Chroma

In [33]:
from langchain.document_loaders import DirectoryLoader
pdf_loader = DirectoryLoader('../../../docs', glob="**/*.pdf")
txt_loader = DirectoryLoader('../../../docs', glob="**/*.txt")
csv_loader = DirectoryLoader('../../../docs', glob="**/*.csv")

In [34]:
loaders = [pdf_loader, txt_loader, csv_loader]
documents = []
for loader in loaders:
    documents.extend(loader.load())

In [35]:
embeddings = HuggingFaceEmbeddings()

In [36]:
# Step 2: Text Splitting
text_splitter = RecursiveCharacterTextSplitter(chunk_size=1024, chunk_overlap=64)
texts = text_splitter.split_documents(documents)

In [37]:
# Step 4: Vector Store
db = Chroma.from_documents(texts, embeddings, persist_directory="db")

In [38]:
from langchain import HuggingFaceHub

repo_id = "databricks/dolly-v2-3b"  # See https://huggingface.co/models?pipeline_tag=text-generation&sort=downloads for some other options

llm = HuggingFaceHub(repo_id=repo_id, model_kwargs={"temperature": 0, "max_length": 64})

Found model file at  ../../../models/ggml-gpt4all-j-v1.3-groovy.bin
gptj_model_load: loading model from '../../../models/ggml-gpt4all-j-v1.3-groovy.bin' - please wait ...
gptj_model_load: n_vocab = 50400
gptj_model_load: n_ctx   = 2048
gptj_model_load: n_embd  = 4096
gptj_model_load: n_head  = 16
gptj_model_load: n_layer = 28
gptj_model_load: n_rot   = 64
gptj_model_load: f16     = 2
gptj_model_load: ggml ctx size = 5401.45 MB


gptj_model_load: kv self size  =  896.00 MB
gptj_model_load: ................................... done
gptj_model_load: model size =  3609.38 MB / num tensors = 285


In [29]:
qa = RetrievalQA.from_chain_type(
    llm=llm,
    chain_type="stuff",
    retriever=db.as_retriever(search_kwargs={"k": 3}),
    return_source_documents=True,
    verbose=False,
)

In [39]:
query = "What is Deviare's eCommerce Platform"
qa.run(query)