In [1]:
import os
import nest_asyncio

from llama_index.core import Document, VectorStoreIndex, StorageContext
from llama_index.vector_stores.qdrant import QdrantVectorStore
from llama_index.core.vector_stores import VectorStoreQueryResult
from qdrant_client import QdrantClient, AsyncQdrantClient
from llama_index.embeddings.cohere import CohereEmbedding
from llama_index.core import Settings
from typing import List
from dotenv import load_dotenv
import json
import os
nest_asyncio.apply()
load_dotenv()

  from .autonotebook import tqdm as notebook_tqdm


True

## Setup function to transform JSON into documents.

In [2]:
def create_faq_docs(json_path: str, documents: List[Document]) -> int:
    initial_count = len(documents)  # Track initial number of documents
    
    with open(json_path, mode="r", encoding="utf-8") as file:
        faq_data = json.load(file)
    
    for i, entry in enumerate(faq_data):
        category = entry.get("category", "General").strip()
        question = entry.get("question", "").strip()
        answer = entry.get("answer", "").strip()
        
        if question and answer:
            json_data = {
                "Question": question,
                "Answer": answer,
            }
            
            metadata = {
                "category": category,
                "language": "English",
            }
            
            text_content = json.dumps(json_data, ensure_ascii=False)
            document = Document(text=text_content, metadata=metadata, doc_id=f"faq_{i}")
            documents.append(document)
    
    # Return the number of documents appended
    return len(documents) - initial_count

In [3]:
def print_documents_by_indices(documents: List[Document], indices: List[int]) -> None:
    """
    Print documents at specific indices.

    Parameters:
        documents (List[Document]): The list of documents.
        indices (List[int]): A list of indices of the documents to print.
    """
    for index in indices:
        if 0 <= index < len(documents):  # Check if index is within bounds
            print(f"Document {index}:")
            print(f"  doc_id: {documents[index].doc_id}")
            print(f"  text: {documents[index].text}")
            print(f"  metadata: {documents[index].metadata}")
            print()  # Add a blank line for readability
        else:
            print(f"Index {index} is out of bounds. Please specify a valid index.")

### Show some documents example.

In [4]:
# Example usage:
documents = []
faq_count = create_faq_docs("documents/walmart_faqs.json", documents)

# Specify the list of indices to print
print("Example documents:")
indices_to_print = [0, 10]
print_documents_by_indices(documents, indices_to_print)
print(f"{faq_count} FAQ documents have been appended.")

Example documents:
Document 0:
  doc_id: faq_0
  text: {"Question": "What is Walmart+?", "Answer": "Walmart+ is Walmart’s membership program helping members save more time & money with exclusive benefits. With Walmart+, you get free delivery on groceries & more, gas savings, video streaming + so much more!"}
  metadata: {'category': 'Walmart+ Membership', 'language': 'English'}

Document 10:
  doc_id: faq_10
  text: {"Question": "Which membership types are eligible for Walmart+ Assist?", "Answer": "New members, existing members, canceled members, paused members & trial members are all eligible for Walmart+ Assist. To transfer your membership from Walmart+ to Walmart+ Assist, [click here](https://www.walmart.com/plus/assist) & complete the Walmart+ Assist sign-up process. Note: If your Walmart+ membership is associated with a service you receive through another company (i.e., Amex, TracFone, etc.), you’ll need to end that association before you can sign up for Walmart+ Assist."}
  metad

## Setup Cohear Embedding service

In [5]:
embed_model = CohereEmbedding(
    api_key=os.getenv("COHEAR_API_KEY"),
    model_name=os.getenv("COHEAR_MODEL_ID"),
    input_type="search_document",
    embedding_type="float",
)

Settings.chunk_size = 512

## Innitiates VectorStore database (Qdrant)

In [6]:
# creates a persistant index to disk
client = QdrantClient(url="http://localhost:6334", api_key=os.getenv("QDRANT_API_KEY"),  prefer_grpc=True)

# # delete collection if it exists
if client.collection_exists(os.getenv("QDANT_COLLENCTION_NAME")):
    client.delete_collection(os.getenv("QDANT_COLLENCTION_NAME"))

# create our vector store with hybrid indexing enabled
vector_store = QdrantVectorStore(
    os.getenv("QDANT_COLLENCTION_NAME"),
    client=client,
    enable_hybrid=True,
    batch_size=20,
    prefer_grpc=True,
)

  client = QdrantClient(url="http://localhost:6334", api_key=os.getenv("QDRANT_API_KEY"),  prefer_grpc=True)


## Start embedding process.... into vector database

In [7]:
storage_context = StorageContext.from_defaults(vector_store=vector_store)
index = VectorStoreIndex.from_documents(
    documents=documents, embed_model=embed_model, storage_context=storage_context,
)


## Try to retrive relavent nodes with question.

In [8]:
embed_model = CohereEmbedding(
    api_key=os.getenv("COHEAR_API_KEY"),
    model_name=os.getenv("COHEAR_MODEL_ID"),
    input_type="search_query",
    embedding_type="float",
)

search_query_retriever = index.as_retriever()

search_query_retrieved_nodes = search_query_retriever.retrieve(
"Do all Walmart locations offer scan & go?"
)

In [9]:
from llama_index.core.response.notebook_utils import display_source_node
for n in search_query_retrieved_nodes:
    display_source_node(n, source_length=2000)

**Node ID:** b87202f8-b170-4d55-897f-0bbd3701fd12<br>**Similarity:** 0.7508881092071533<br>**Text:** {"Question": "Do all Walmart locations offer scan & go?", "Answer": "Yes, mobile scan & go works in all Walmart locations."}<br>

**Node ID:** 6f92da09-9189-46c0-a0dc-dcbb4cdb4cdc<br>**Similarity:** 0.6398637294769287<br>**Text:** {"Question": "How do I get started with mobile scan & go?", "Answer": "First, be sure to allow Location Access when you open the Walmart app for the first time (if you have previously not allowed this, you’ll need to update it in your phone’s settings). Then, open the Walmart app when you’re inside a Walmart store & select mobile scan & go. Simply scan items as you add them to your cart before checking out & paying directly from your phone."}<br>