In [2]:
from transformers import AutoTokenizer, AutoModelForCausalLM
from langchain.vectorstores import Chroma
from langchain.embeddings import HuggingFaceEmbeddings
from langchain.chains import RetrievalQA
from langchain.text_splitter import RecursiveCharacterTextSplitter
from langchain.docstore.document import Document
from sentence_transformers import SentenceTransformer
from datasets import load_dataset

In [5]:
# Load LLaMA Model

model_id = "meta-llama/Meta-Llama-3.1-8B"
llama_model = AutoModelForCausalLM.from_pretrained(model_id)

OSError: You are trying to access a gated repo.
Make sure to have access to it at https://huggingface.co/meta-llama/Meta-Llama-3.1-8B.
401 Client Error. (Request ID: Root=1-66fbfe85-1cfaaff117aaa21834fa04ea;30c0b527-57c1-4956-8cb4-a97f726877a1)

Cannot access gated repo for url https://huggingface.co/meta-llama/Meta-Llama-3.1-8B/resolve/main/config.json.
Access to model meta-llama/Llama-3.1-8B is restricted. You must have access to it and be authenticated to access it. Please log in.

In [3]:
# Load Documents from Path using TextLoader

ds = load_dataset("gwenshap/sales-transcripts")

In [4]:
# Print the first few examples
print(ds['train'][:5])


{'text': ['**Sales Rep**: Hi there! Thank you for taking the time to speak with me today. My name is Jamie, and I’m a sales representative from ModaMart. How are you today?', '', "**Customer**: Hi Jamie. I'm doing well, thanks. How about you?", '', '**Sales Rep**: I’m great, thank you! So, I understand you’ve been browsing our online store. Is there anything specific you’re looking for today?']}


In [5]:
# Wrap the text in Document objects
train_texts = ds['train']['text']
documents = [Document(page_content=text) for text in train_texts]

In [6]:
# Split large documents into chunks
text_splitter = RecursiveCharacterTextSplitter(chunk_size=1000, chunk_overlap=200)
split_documents = text_splitter.split_documents(documents)

In [7]:
sentence_model = SentenceTransformer('all-MiniLM-L6-v2')

# Extract the text content from the split documents
document_texts = [doc.page_content for doc in split_documents]

#  Embed the documents
embeddings = sentence_model.encode(document_texts)



In [None]:
# Embed the documents and initialize Chroma vector store

embedding_model = HuggingFaceEmbeddings(model_id="sentence-transformers/all-MiniLM-L6-v2")
vector_store = Chroma(embedding_function=embedding_model, persist_directory="./vector_base")
vector_store.add_documents(split_documents)
vector_store.persist()

In [None]:
# Create a Retriever and QA Chain

retriever = vector_store.as_retriever()
qa_chain = RetrievalQA.from_chain_type(llm=llama_model, retriever=retriever)

In [None]:
# Set Up a Simple Query Function to Run RAG

def ask_query(query):
    response = qa_chain.run(query)
    return response

In [None]:
# Query the Model

query = "What is Hugging Face."
response = ask_query(query)

In [None]:
print(response)