In [51]:
import os
from dotenv import load_dotenv
from huggingface_hub import login, InferenceClient
from langchain.vectorstores import Chroma
from langchain.embeddings import HuggingFaceEmbeddings
from langchain.text_splitter import RecursiveCharacterTextSplitter
from langchain.docstore.document import Document
from sentence_transformers import SentenceTransformer
from datasets import load_dataset

In [52]:

load_dotenv('.env.local')
token = os.getenv('HF_TOKEN')
login(token="")

client = InferenceClient(model="meta-llama/Llama-2-7b")

The token has not been saved to the git credentials helper. Pass `add_to_git_credential=True` in this function directly or `--add-to-git-credential` if using via `huggingface-cli` if you want to set the git credential as well.
Token is valid (permission: write).
Your token has been saved to C:\Users\ASUS\.cache\huggingface\token
Login successful


In [3]:
# Load Documents from Path using TextLoader

# ds = load_dataset("gwenshap/sales-transcripts")
ds = load_dataset("Falah/story44kids_1_prompts")

In [4]:
# Print the first few examples
print(ds['train'][:5])


{'prompts': ['Once upon a time, in a small village nestled on the outskirts of a mystical forest, there lived a poor but content farmer named Ethan. He had a modest cottage and a small plot of land where he grew vegetables to sustain himself. Despite the hardships that came his way, he always wore a smile and greeted everyone with warmth.', "One sunny morning, as Ethan was tending to his crops, he heard a rustling in the bushes nearby. Curiosity piqued, he cautiously approached the sound and discovered a beautiful fox trapped in a hunter's snare. The fox looked at Ethan with pleading eyes, silently asking for help.", 'Without a second thought, Ethan rushed over to free the fox. Using his trusted pocket knife, he carefully cut through the tangled mess until the fox was liberated. Grateful for being saved, the fox introduced herself as Fiona. She explained that she had gotten lost while exploring the depths of the mysterious forest.', "Ethan, being a gentle soul, couldn't leave Fiona alo

In [5]:
# Wrap the text in Document objects
train_texts = [item["prompts"] for item in ds['train']]

# Create Document objects with just the prompt content
documents = [Document(page_content=f"Prompt: {text}") for text in train_texts]


In [6]:
# Split large documents into chunks
text_splitter = RecursiveCharacterTextSplitter(chunk_size=1000, chunk_overlap=200)
split_documents = text_splitter.split_documents(documents)

In [7]:
# Verify the number of document chunks
num_chunks = len(split_documents)
print(f"Number of document chunks: {num_chunks}")

# Print content of the chunks (optional)
for i, doc in enumerate(split_documents):
    print(f"Chunk {i+1}:\n{doc.page_content}\n{'-'*50}")


Number of document chunks: 10
Chunk 1:
Prompt: Once upon a time, in a small village nestled on the outskirts of a mystical forest, there lived a poor but content farmer named Ethan. He had a modest cottage and a small plot of land where he grew vegetables to sustain himself. Despite the hardships that came his way, he always wore a smile and greeted everyone with warmth.
--------------------------------------------------
Chunk 2:
Prompt: One sunny morning, as Ethan was tending to his crops, he heard a rustling in the bushes nearby. Curiosity piqued, he cautiously approached the sound and discovered a beautiful fox trapped in a hunter's snare. The fox looked at Ethan with pleading eyes, silently asking for help.
--------------------------------------------------
Chunk 3:
Prompt: Without a second thought, Ethan rushed over to free the fox. Using his trusted pocket knife, he carefully cut through the tangled mess until the fox was liberated. Grateful for being saved, the fox introduced he

In [8]:
sentence_model = SentenceTransformer('all-MiniLM-L6-v2')

# Extract the text content from the split documents
document_texts = [doc.page_content for doc in split_documents]

#  Embed the documents
embeddings = sentence_model.encode(document_texts)



In [9]:
# Embed the documents and initialize Chroma vector store

embedding_model = HuggingFaceEmbeddings(model_name="sentence-transformers/all-MiniLM-L6-v2")
vector_store = Chroma(embedding_function=embedding_model, persist_directory="./vector_base")
vector_store.add_documents(split_documents)
vector_store.persist()

  embedding_model = HuggingFaceEmbeddings(model_name="sentence-transformers/all-MiniLM-L6-v2")
  vector_store = Chroma(embedding_function=embedding_model, persist_directory="./vector_base")
  vector_store.persist()


In [10]:
# Check the number of embeddings generated
num_embeddings = len(embeddings)
print(f"Number of embeddings generated: {num_embeddings}")


Number of embeddings generated: 10


In [11]:
# Check the number of documents stored in the vector store
stored_embeddings = vector_store._collection.count()
print(f"Number of embeddings in the vector store: {stored_embeddings}")


Number of embeddings in the vector store: 20


In [42]:
def retrieve_documents(query, num_docs=1):
    # Implement your retrieval logic here (e.g., using vector store)
    retrieved_docs = vector_store.similarity_search(query)  
    print("Retrieved Documents:", retrieved_docs[:num_docs])
    return retrieved_docs[:num_docs]

In [48]:
def ask_query(query, model="meta-llama/Llama-2-7b"):
    # Retrieve relevant documents based on the query
    retrieved_docs = retrieve_documents(query)

    # combined_input = f"{query}\nContext: {retrieved_docs}"
    combined_input = f"{query}\nContext: {retrieved_docs}"

    # Generate response using LLM with context
    response = client.text_generation(combined_input, max_new_tokens=50, temperature=0.7)
    return response

In [50]:
# Query the Model

query = "Hi"
response = ask_query(query)
print(response)

Retrieved Documents: [Document(metadata={}, page_content='Prompt: Once upon a time, in a small village nestled on the outskirts of a mystical forest, there lived a poor but content farmer named Ethan. He had a modest cottage and a small plot of land where he grew vegetables to sustain himself. Despite the hardships that came his way, he always wore a smile and greeted everyone with warmth.')]


HfHubHTTPError: 504 Server Error: Gateway Timeout for url: https://api-inference.huggingface.co/models/meta-llama/Llama-2-7b (Request ID: TY3EbyA89FT-mYluOz7f1)

Model meta-llama/Llama-2-7b time out