In [1]:
import os

from langchain.text_splitter import CharacterTextSplitter
from langchain_community.document_loaders import WebBaseLoader
from langchain_community.vectorstores import Chroma
from langchain_community.embeddings import HuggingFaceInferenceAPIEmbeddings

USER_AGENT environment variable not set, consider setting it to identify your requests.


In [2]:
# Define the persistent directory
current_dir = os.getcwd()
db_dir = os.path.join(current_dir, "db")
persistent_directory = os.path.join(db_dir, "chroma_db_apple")

In [3]:
# Step 1: Scrape the content from apple.com using WebBaseLoader
# WebBaseLoader loads web pages and extracts their content
urls = ["https://www.apple.com/"]

# Create a loader for web content
loader = WebBaseLoader(urls)
documents = loader.load()

In [4]:
documents

[Document(metadata={'source': 'https://www.apple.com/', 'title': 'Apple', 'language': 'en-US'}, page_content='\n\n\n\n\n\n\n\n\n\n\n\n\nApple\n\n\n\n\n\n\n\n\n\n\n\n\n\n\n\n\n\n\nApple\n\nAppleStoreMaciPadiPhoneWatch\nVisionAirPodsTV & HomeEntertainmentAccessoriesSupport\n\n\n0+\n\n\n\n\n\n\n\n\xa0\n\t\t\t\t\t\t\t\t\t\n\nThere’s\xa0still\xa0time to\xa0work\xa0your\xa0magic.\nWrap up your holiday gift list.\n\nShop gifts\n\n\n\n\n\n\n\n\n\n\n\n\n\n\n\n\n\n\xa0\n\niPhone 16 Pro\nHello, Apple\xa0Intelligence.\n\nLearn more\nBuy\n\n\n\n\n\n\n\n\n\n\n\n\n\xa0\n\niPhone 16\nHello, Apple\xa0Intelligence.\n\nLearn more\nBuy\n\n\n\n\n\n\n\n\n\n\n\n\n\n\n\xa0\n\n\niPad Air\nTwo sizes. Faster chip. Does it all.\n\nLearn more\nBuy\n\n\n\nHello, Apple Intelligence.\n\n\n\n\n\n\n\n\n\n\n\n\n\xa0\n\nAirPods\xa0Pro\xa02\nNow with a Hearing\xa0Aid feature.1\n\n\nLearn more\nBuy\n\n\n\n\n\n\n\n\n\n\n\n\n\xa0\n\nApple Watch Series 10\nThinstant classic.\n\nLearn more\nBuy\n\n\n\n\n\n\n\n\n\n\n\n\n\xa0\n\

In [7]:
# Step 2: Split the scraped content into chunks
# CharacterTextSplitter splits the text into smaller chunks
text_splitter = CharacterTextSplitter(chunk_size=1000, chunk_overlap=0)
docs = text_splitter.split_documents(documents)

# Display information about the split documents
print("\n--- Document Chunks Information ---")
print(f"Number of document chunks: {len(docs)}")
print(f"Sample chunk:\n{docs[0].page_content}\n")

Created a chunk of size 1070, which is longer than the specified 1000
Created a chunk of size 1318, which is longer than the specified 1000



--- Document Chunks Information ---
Number of document chunks: 7
Sample chunk:
Apple


Apple

AppleStoreMaciPadiPhoneWatch
VisionAirPodsTV & HomeEntertainmentAccessoriesSupport


0+

 
									

There’s still time to work your magic.
Wrap up your holiday gift list.

Shop gifts

 

iPhone 16 Pro
Hello, Apple Intelligence.

Learn more
Buy


 

iPhone 16
Hello, Apple Intelligence.

Learn more
Buy


 


iPad Air
Two sizes. Faster chip. Does it all.

Learn more
Buy

Hello, Apple Intelligence.


 

AirPods Pro 2
Now with a Hearing Aid feature.1


Learn more
Buy


 

Apple Watch Series 10
Thinstant classic.

Learn more
Buy


 

App Store Awards
Celebrating the most innovative apps and games of 2024.

View the winners


 

Apple Trade In
Get $180–$650 in credit when you trade in iPhone 12 or higher.2

Get your estimate


 

Apple Card
Get up to 3% Daily Cash back with every purchase.

Learn more
Apply now
Apply now


Apple TV+

FAM Gallery

Watch now

Travel-Friendly Workouts

Play now

NBA 

In [None]:
embeddings = HuggingFaceInferenceAPIEmbeddings(
    api_key='<api-key>', model_name="sentence-transformers/all-MiniLM-l6-v2"
)

In [9]:
# Step 4: Create and persist the vector store with the embeddings
# Chroma stores the embeddings for efficient searching
if not os.path.exists(persistent_directory):
    print(f"\n--- Creating vector store in {persistent_directory} ---")
    db = Chroma.from_documents(docs, embeddings, persist_directory=persistent_directory)
    print(f"--- Finished creating vector store in {persistent_directory} ---")
else:
    print(f"Vector store {persistent_directory} already exists. No need to initialize.")
    db = Chroma(persist_directory=persistent_directory, embedding_function=embeddings)


--- Creating vector store in c:\Users\azeem.rom\OneDrive - Ideagen plc\Desktop\product-recommendation\rag-notebooks\db\chroma_db_apple ---
--- Finished creating vector store in c:\Users\azeem.rom\OneDrive - Ideagen plc\Desktop\product-recommendation\rag-notebooks\db\chroma_db_apple ---


In [10]:
# Step 5: Query the vector store
# Create a retriever for querying the vector store
retriever = db.as_retriever(
    search_type="similarity",
    search_kwargs={"k": 3},
)

# Define the user's question
query = "What new products are announced on Apple.com?"

# Retrieve relevant documents based on the query
relevant_docs = retriever.invoke(query)

In [12]:
# Display the relevant results with metadata
print("\n--- Relevant Documents ---")
for i, doc in enumerate(relevant_docs, 1):
    print(f"Document {i}:\n{doc.page_content}\n")
    if doc.metadata:
        print(f"Source: {doc.metadata.get('source', 'Unknown')}\n")


--- Relevant Documents ---
Document 1:
Apple


Apple

AppleStoreMaciPadiPhoneWatch
VisionAirPodsTV & HomeEntertainmentAccessoriesSupport


0+

 
									

There’s still time to work your magic.
Wrap up your holiday gift list.

Shop gifts

 

iPhone 16 Pro
Hello, Apple Intelligence.

Learn more
Buy


 

iPhone 16
Hello, Apple Intelligence.

Learn more
Buy


 


iPad Air
Two sizes. Faster chip. Does it all.

Learn more
Buy

Hello, Apple Intelligence.


 

AirPods Pro 2
Now with a Hearing Aid feature.1


Learn more
Buy


 

Apple Watch Series 10
Thinstant classic.

Learn more
Buy


 

App Store Awards
Celebrating the most innovative apps and games of 2024.

View the winners


 

Apple Trade In
Get $180–$650 in credit when you trade in iPhone 12 or higher.2

Get your estimate


 

Apple Card
Get up to 3% Daily Cash back with every purchase.

Learn more
Apply now
Apply now


Apple TV+

FAM Gallery

Watch now

Travel-Friendly Workouts

Play now

NBA 2K25 Arcade Edition

Listen now


Today’

In [13]:
relevant_docs

[Document(metadata={'language': 'en-US', 'source': 'https://www.apple.com/', 'title': 'Apple'}, page_content='Apple\n\n\nApple\n\nAppleStoreMaciPadiPhoneWatch\nVisionAirPodsTV & HomeEntertainmentAccessoriesSupport\n\n\n0+\n\n\xa0\n\t\t\t\t\t\t\t\t\t\n\nThere’s\xa0still\xa0time to\xa0work\xa0your\xa0magic.\nWrap up your holiday gift list.\n\nShop gifts\n\n\xa0\n\niPhone 16 Pro\nHello, Apple\xa0Intelligence.\n\nLearn more\nBuy\n\n\n\xa0\n\niPhone 16\nHello, Apple\xa0Intelligence.\n\nLearn more\nBuy\n\n\n\xa0\n\n\niPad Air\nTwo sizes. Faster chip. Does it all.\n\nLearn more\nBuy\n\nHello, Apple Intelligence.\n\n\n\xa0\n\nAirPods\xa0Pro\xa02\nNow with a Hearing\xa0Aid feature.1\n\n\nLearn more\nBuy\n\n\n\xa0\n\nApple Watch Series 10\nThinstant classic.\n\nLearn more\nBuy\n\n\n\xa0\n\nApp\xa0Store Awards\nCelebrating the most innovative apps and games of 2024.\n\nView the winners\n\n\n\xa0\n\nApple Trade In\nGet $180–$650 in credit when you trade in iPhone\xa012 or higher.2\n\nGet your esti