In [3]:
from transformers import AutoTokenizer, AutoModelForCausalLM
from langchain.vectorstores import Chroma
from langchain.embeddings import HuggingFaceEmbeddings
from langchain.chains import RetrievalQA
from langchain.text_splitter import RecursiveCharacterTextSplitter
from langchain.docstore.document import Document
from sentence_transformers import SentenceTransformer
from datasets import load_dataset

In [5]:
# Load LLaMA Model

model_id = "meta-llama/Meta-Llama-3.1-8B"
llama_model = AutoModelForCausalLM.from_pretrained(model_id)

Downloading shards:   0%|          | 0/4 [00:00<?, ?it/s]

model-00002-of-00004.safetensors:  36%|###6      | 1.81G/5.00G [00:00<?, ?B/s]

Error while downloading from https://cdn-lfs-us-1.hf.co/repos/59/43/594310891758e12c04540e5fb01589e425e25052bfa08d42a457c38d95b552b6/c28b25e7541751056ee126627e007f8d4288319733285e9f7b17b9ff6eb313f0?response-content-disposition=inline%3B+filename*%3DUTF-8%27%27model-00002-of-00004.safetensors%3B+filename%3D%22model-00002-of-00004.safetensors%22%3B&Expires=1728058886&Policy=eyJTdGF0ZW1lbnQiOlt7IkNvbmRpdGlvbiI6eyJEYXRlTGVzc1RoYW4iOnsiQVdTOkVwb2NoVGltZSI6MTcyODA1ODg4Nn19LCJSZXNvdXJjZSI6Imh0dHBzOi8vY2RuLWxmcy11cy0xLmhmLmNvL3JlcG9zLzU5LzQzLzU5NDMxMDg5MTc1OGUxMmMwNDU0MGU1ZmIwMTU4OWU0MjVlMjUwNTJiZmEwOGQ0MmE0NTdjMzhkOTViNTUyYjYvYzI4YjI1ZTc1NDE3NTEwNTZlZTEyNjYyN2UwMDdmOGQ0Mjg4MzE5NzMzMjg1ZTlmN2IxN2I5ZmY2ZWIzMTNmMD9yZXNwb25zZS1jb250ZW50LWRpc3Bvc2l0aW9uPSoifV19&Signature=K72oOFsYgnazPcePNuukjXKM2AhglhBhsay5DkFVxXlDMBEwjJHZ8XiMWTkKcHnj9tIQ4Xlxy4uiD6y-AsGfcHfRTjHUFRmevraJNg9DEHmIwc96uXjibwFI4bxETjMhWN84mnDlHl0kymjNLWb7z%7EhVIfNikd2Qraa6plQKAHuxyFaIVcOOOtRJ2iuFn2ft9TcvxX8GwO2DX7v9sZZ3DuXIwUIoHwYcBll%

model-00002-of-00004.safetensors:  97%|#########7| 4.85G/5.00G [00:00<?, ?B/s]

model-00003-of-00004.safetensors:   0%|          | 0.00/4.92G [00:00<?, ?B/s]

Error while downloading from https://cdn-lfs-us-1.hf.co/repos/59/43/594310891758e12c04540e5fb01589e425e25052bfa08d42a457c38d95b552b6/d8e9504dd4e4a146d484c52a97584ec14dac92237c46b064934af67a85e7d383?response-content-disposition=inline%3B+filename*%3DUTF-8%27%27model-00003-of-00004.safetensors%3B+filename%3D%22model-00003-of-00004.safetensors%22%3B&Expires=1728061480&Policy=eyJTdGF0ZW1lbnQiOlt7IkNvbmRpdGlvbiI6eyJEYXRlTGVzc1RoYW4iOnsiQVdTOkVwb2NoVGltZSI6MTcyODA2MTQ4MH19LCJSZXNvdXJjZSI6Imh0dHBzOi8vY2RuLWxmcy11cy0xLmhmLmNvL3JlcG9zLzU5LzQzLzU5NDMxMDg5MTc1OGUxMmMwNDU0MGU1ZmIwMTU4OWU0MjVlMjUwNTJiZmEwOGQ0MmE0NTdjMzhkOTViNTUyYjYvZDhlOTUwNGRkNGU0YTE0NmQ0ODRjNTJhOTc1ODRlYzE0ZGFjOTIyMzdjNDZiMDY0OTM0YWY2N2E4NWU3ZDM4Mz9yZXNwb25zZS1jb250ZW50LWRpc3Bvc2l0aW9uPSoifV19&Signature=Ac5WP4KzA7q13RsxyACG8UeJeM10WZWTEqn7vp2Bhv0TYmIUgkG1Fmk4Ey3uEGuOgI%7EDCQQ4jPjbMFfpwXBHj7JgPPZJ41QIWgoGxeE4eDcA82a2LXkchVzDSccyaOZE2xc%7ENfjuBQ8wszOkTDHLm08LOe2UKaBuHqNQDB5AeUrzz-uMNdBYnEXqqH8CGpS6LVYVaucnP5l%7EQ0W089HAYfc3ZxFeeDxq

model-00003-of-00004.safetensors:  10%|9         | 472M/4.92G [00:00<?, ?B/s]

Error while downloading from https://cdn-lfs-us-1.hf.co/repos/59/43/594310891758e12c04540e5fb01589e425e25052bfa08d42a457c38d95b552b6/d8e9504dd4e4a146d484c52a97584ec14dac92237c46b064934af67a85e7d383?response-content-disposition=inline%3B+filename*%3DUTF-8%27%27model-00003-of-00004.safetensors%3B+filename%3D%22model-00003-of-00004.safetensors%22%3B&Expires=1728061480&Policy=eyJTdGF0ZW1lbnQiOlt7IkNvbmRpdGlvbiI6eyJEYXRlTGVzc1RoYW4iOnsiQVdTOkVwb2NoVGltZSI6MTcyODA2MTQ4MH19LCJSZXNvdXJjZSI6Imh0dHBzOi8vY2RuLWxmcy11cy0xLmhmLmNvL3JlcG9zLzU5LzQzLzU5NDMxMDg5MTc1OGUxMmMwNDU0MGU1ZmIwMTU4OWU0MjVlMjUwNTJiZmEwOGQ0MmE0NTdjMzhkOTViNTUyYjYvZDhlOTUwNGRkNGU0YTE0NmQ0ODRjNTJhOTc1ODRlYzE0ZGFjOTIyMzdjNDZiMDY0OTM0YWY2N2E4NWU3ZDM4Mz9yZXNwb25zZS1jb250ZW50LWRpc3Bvc2l0aW9uPSoifV19&Signature=Ac5WP4KzA7q13RsxyACG8UeJeM10WZWTEqn7vp2Bhv0TYmIUgkG1Fmk4Ey3uEGuOgI%7EDCQQ4jPjbMFfpwXBHj7JgPPZJ41QIWgoGxeE4eDcA82a2LXkchVzDSccyaOZE2xc%7ENfjuBQ8wszOkTDHLm08LOe2UKaBuHqNQDB5AeUrzz-uMNdBYnEXqqH8CGpS6LVYVaucnP5l%7EQ0W089HAYfc3ZxFeeDxq

model-00003-of-00004.safetensors:  10%|#         | 514M/4.92G [00:00<?, ?B/s]

model-00004-of-00004.safetensors:   0%|          | 0.00/1.17G [00:00<?, ?B/s]

Loading checkpoint shards:   0%|          | 0/4 [00:00<?, ?it/s]

generation_config.json:   0%|          | 0.00/185 [00:00<?, ?B/s]

In [6]:
# Load Documents from Path using TextLoader

ds = load_dataset("gwenshap/sales-transcripts")

Resolving data files:   0%|          | 0/100 [00:00<?, ?it/s]

In [7]:
# Print the first few examples
print(ds['train'][:5])


{'text': ['**Sales Rep**: Hi there! Thank you for taking the time to speak with me today. My name is Jamie, and I’m a sales representative from ModaMart. How are you today?', '', "**Customer**: Hi Jamie. I'm doing well, thanks. How about you?", '', '**Sales Rep**: I’m great, thank you! So, I understand you’ve been browsing our online store. Is there anything specific you’re looking for today?']}


In [8]:
# Wrap the text in Document objects
train_texts = ds['train']['text']
documents = [Document(page_content=text) for text in train_texts]

In [9]:
# Split large documents into chunks
text_splitter = RecursiveCharacterTextSplitter(chunk_size=1000, chunk_overlap=200)
split_documents = text_splitter.split_documents(documents)

In [13]:
len(split_documents)

995

In [10]:
sentence_model = SentenceTransformer('all-MiniLM-L6-v2')

# Extract the text content from the split documents
document_texts = [doc.page_content for doc in split_documents]

#  Embed the documents
embeddings = sentence_model.encode(document_texts)



In [21]:
# Embed the documents and initialize Chroma vector store

embedding_model = HuggingFaceEmbeddings(model_name="sentence-transformers/all-MiniLM-L6-v2")
vector_store = Chroma(embedding_function=embedding_model, persist_directory="./vector_base")
vector_store.add_documents(split_documents)
vector_store.persist()

  vector_store.persist()


In [28]:
from transformers import pipeline
from langchain.llms import HuggingFacePipeline

llama_pipeline = pipeline(
    "text-generation", 
    model=llama_model, 
    tokenizer="meta-llama/Meta-Llama-3.1-8B",
)

llm = HuggingFacePipeline(pipeline=llama_pipeline)

retriever = vector_store.as_retriever()
qa_chain = RetrievalQA.from_chain_type(llm=llm, retriever=retriever)

Hardware accelerator e.g. GPU is available in the environment, but no `device` argument is passed to the `Pipeline` object. Model will be on CPU.


In [None]:
# Create a Retriever and QA Chain

retriever = vector_store.as_retriever()
qa_chain = RetrievalQA.from_chain_type(llm=llama_model, retriever=retriever)

In [29]:
# Set Up a Simple Query Function to Run RAG

def ask_query(query):
    response = qa_chain.run(query)
    return response

In [None]:
# Query the Model

query = "What is Hugging Face."
response = ask_query(query)

In [None]:
print(response)