In [None]:
import os
from dotenv import load_dotenv
from openai import OpenAI

# initialize the OpenAI client
openai_client = OpenAI(
  base_url="https://openrouter.ai/api/v1",
  api_key=os.getenv("OPENAI_API_KEY"),
)

model = "text-embedding-3-large"

# define the function to generate embeddings
def get_embedding(text, input_type="document"):
    result = openai_client.embeddings.create(
        model=model,
        input=text,
    )
    return result.data[0].embedding


In [14]:
get_embedding("RAG Techniques")

[-0.010140662081539631,
 -0.027341054752469063,
 -0.031865619122982025,
 0.01603844203054905,
 -0.029154403135180473,
 0.024541810154914856,
 -0.03313320502638817,
 0.012279707938432693,
 0.0067120264284312725,
 0.02684810571372509,
 0.02848540060222149,
 -0.0009198777261190116,
 0.027640344575047493,
 -0.035157814621925354,
 -0.012614208273589611,
 0.033978257328271866,
 -0.007081738207489252,
 -0.009172369726002216,
 -0.0037081195041537285,
 -0.031548723578453064,
 0.03480570763349533,
 -0.00284765986725688,
 -0.024929126724600792,
 0.02536925859749317,
 -0.012860682792961597,
 0.00017206445045303553,
 0.007596693467348814,
 0.0032151706982403994,
 -0.047323089092969894,
 0.012033233419060707,
 0.06394250690937042,
 0.03434797003865242,
 0.0366542674601078,
 -0.003947991877794266,
 -0.02161932736635208,
 -0.025263627991080284,
 0.01229731272906065,
 0.02941848151385784,
 -0.007086139637976885,
 0.002502155490219593,
 0.006439144257456064,
 0.004214272368699312,
 -0.020210901275277138

In [38]:
# data ingestion
from langchain_community.document_loaders import PyPDFLoader
from langchain_text_splitters import RecursiveCharacterTextSplitter

# load the pdf
# loader = PyPDFLoader("https://investors.mongodb.com/node/12236/pdf")
loader = PyPDFLoader("Walmart Earnings Release (FY26 Q3).pdf")
data = loader.load()

# split data into chunks
text_splitter = RecursiveCharacterTextSplitter(chunk_size=400, chunk_overlap=20)
documents = text_splitter.split_documents(data)

In [39]:
documents[:2]  # display first 2 chunks

[Document(metadata={'producer': 'Wdesk Fidelity Content Translations Version 014.006.119', 'creator': 'Workiva', 'creationdate': '2025-11-19T19:30:49+00:00', 'author': 'anonymous', 'moddate': '2025-11-19T13:37:10-06:00', 'title': 'Earnings Release (FY26 Q3)', 'source': 'Walmart Earnings Release (FY26 Q3).pdf', 'total_pages': 19, 'page': 0, 'page_label': '1'}, page_content='Walmart reports \nthird quarter results\n• Revenue growth of 5.8%, up 6.0% in constant currency (cc) 1\n• Operating income decreased 0.2%, up 8.0% adjusted (cc) 1\n• eCommerce up 27% globally\n• GAAP EPS of $0.77; Adjusted EPS 1 of $0.62\n• Company raises outlook for FY26\n“\nBENTONVILLE, Ark., November 20, 2025 – Walmart \nInc. (NYSE: WMT) announces third-quarter results'),
 Document(metadata={'producer': 'Wdesk Fidelity Content Translations Version 014.006.119', 'creator': 'Workiva', 'creationdate': '2025-11-19T19:30:49+00:00', 'author': 'anonymous', 'moddate': '2025-11-19T13:37:10-06:00', 'title': 'Earnings Releas

In [40]:
# prepare documents for insertion
docs_to_insert = [{
    "text": doc.page_content,
    "embedding": get_embedding(doc.page_content)
} for doc in documents]

In [41]:
docs_to_insert[:2]  # display first 2 prepared documents

[{'text': 'Walmart reports \nthird quarter results\n• Revenue growth of 5.8%, up 6.0% in constant currency (cc) 1\n• Operating income decreased 0.2%, up 8.0% adjusted (cc) 1\n• eCommerce up 27% globally\n• GAAP EPS of $0.77; Adjusted EPS 1 of $0.62\n• Company raises outlook for FY26\n“\nBENTONVILLE, Ark., November 20, 2025 – Walmart \nInc. (NYSE: WMT) announces third-quarter results',
  'embedding': [-0.022048328071832657,
   -0.0009799256222322583,
   -0.006802945397794247,
   0.04982545226812363,
   0.008838175795972347,
   -0.01761353574693203,
   0.006366375833749771,
   0.051081765443086624,
   0.0014667477225884795,
   0.023091068491339684,
   0.04686054587364197,
   0.0012484629405662417,
   -0.003486274043098092,
   -0.03479992225766182,
   -0.06392130255699158,
   0.030453074723482132,
   0.013543074950575829,
   0.01706075668334961,
   0.041307635605335236,
   0.016621047630906105,
   -0.030553579330444336,
   0.002817286178469658,
   0.013304375112056732,
   0.03773970156908

In [None]:
from pymongo import MongoClient

# connect to your MongoDB deployment
mongo_client = MongoClient(os.getenv("MONGODB_URI"))
collection =  mongo_client["sample_mflix"]["wmt_earnings_pdf"]

# insert documents into the collection
result = collection.insert_many(docs_to_insert)

In [43]:
# query with search index
from pymongo.operations import SearchIndexModel
import time

index_name="vector_index"
search_index_model = SearchIndexModel(
  definition = {
    "fields": [
      {
        "type": "vector",
        "numDimensions": 3072,
        "path": "embedding",
        "similarity": "cosine"
      }
    ]
  },
  name = index_name,
  type = "vectorSearch"
)
collection.create_search_index(model=search_index_model)

# Wait for initial sync to complete
print("Polling to check if the index is ready. This may take up to a minute.")
predicate=None
if predicate is None:
   predicate = lambda index: index.get("queryable") is True

while True:
   indices = list(collection.list_search_indexes(index_name))
   if len(indices) and predicate(indices[0]):
      break
   time.sleep(5)
print(index_name + " is ready for querying.")

Polling to check if the index is ready. This may take up to a minute.
vector_index is ready for querying.


In [44]:
query_embedding = get_embedding("AI Technology")
query_embedding

[-0.014420961029827595,
 -0.010585022158920765,
 -0.01912541501224041,
 -0.025512617081403732,
 0.010015059262514114,
 0.023757493123412132,
 -0.013769575394690037,
 0.03231598064303398,
 -0.008359452709555626,
 0.04635696858167648,
 -0.00997887086123228,
 -0.02857051230967045,
 0.03535578027367592,
 0.010015059262514114,
 -0.03412538766860962,
 0.0003420907014515251,
 -0.0005128533230163157,
 -0.00955366063863039,
 -0.03139318525791168,
 -0.045452263206243515,
 -0.002007309114560485,
 0.006319348234683275,
 -0.027394399046897888,
 -0.012249674648046494,
 0.005111570470035076,
 -0.015461369417607784,
 -0.00691192876547575,
 -0.00016100880748126656,
 -0.03510246425867081,
 0.005265370011329651,
 0.03063323348760605,
 0.005667962599545717,
 0.039227910339832306,
 -0.015325663611292839,
 -0.009481284767389297,
 0.016537966206669807,
 0.018962569534778595,
 0.028118159621953964,
 0.0018863051664084196,
 0.02761152759194374,
 0.03025325946509838,
 -0.016492730006575584,
 -0.0267972946166992

In [45]:
collection.test.aggregate([
  {
    "$vectorSearch": {
      "index": "vector_index",
      "path": "embedding",
      "queryVector":query_embedding,
      "numCandidates":3072 ,
      "limit": 5
    }
  }
])

<pymongo.synchronous.command_cursor.CommandCursor at 0x16aaac7d0>

In [46]:
# Define a function to run vector search queries
def get_query_results(query):
  """Gets results from a vector search query."""

  query_embedding = get_embedding(query, input_type="query")
  pipeline = [
      {
            "$vectorSearch": {
              "index": "vector_index",
              "queryVector": query_embedding,
              "path": "embedding",
              "numCandidates":3072,
              "limit": 5
            }
      }, {
            "$project": {
              "_id": 0,
              "text": 1
         }
      }
  ]

  results = collection.aggregate(pipeline)
  # print(results)

  array_of_results = []
  for doc in results:
      array_of_results.append(doc)
  return array_of_results


In [47]:
# Test the function with a sample query
get_query_results("How was walmart's quarterly performance?")

[{'text': 'Walmart reports \nthird quarter results\n• Revenue growth of 5.8%, up 6.0% in constant currency (cc) 1\n• Operating income decreased 0.2%, up 8.0% adjusted (cc) 1\n• eCommerce up 27% globally\n• GAAP EPS of $0.77; Adjusted EPS 1 of $0.62\n• Company raises outlook for FY26\n“\nBENTONVILLE, Ark., November 20, 2025 – Walmart \nInc. (NYSE: WMT) announces third-quarter results'},
 {'text': 'Business Highlights\nand Strategic Initiatives\nDollars in billions, except as noted. Dollar and percentage changes may not \nrecalculate due to rounding.\nWalmart U.S. Q3 FY26 Q3 FY25 Change\nNet sales $120.7 $114.9 $5.8 5.1%\nComp sales (ex. fuel)2 4.5% 5.3% NP NP\nTransactions 1.8% 3.1% NP NP\nAverage ticket 2.7% 2.1% NP NP\neCommerce contribution to comp ~440 bps ~290 bps NP NP'},
 {'text': 'with strong growth in revenue and adjusted \noperating income. Globally, eCommerce grew 27% \nwith growth in each business segment of more than \n20%. Walmart U.S. comp sales2 up 4.5%, with \nstrength 

In [48]:
from openai import OpenAI

# Specify search query, retrieve relevant documents, and convert to string
query = "How was walmart's quarterly performance?"
context_docs = get_query_results(query)

context_string = " ".join([doc["text"] for doc in context_docs])

# Construct prompt for the LLM using the retrieved documents as the context
prompt = f"""Use the following pieces of context to answer the question at the end.
    {context_string}
    Question: {query}
"""

openai_client = OpenAI(
  base_url="https://openrouter.ai/api/v1",
  api_key="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",
)

completion = openai_client.chat.completions.create(
  extra_headers={
    "HTTP-Referer": "<YOUR_SITE_URL>", # Optional. Site URL for rankings on openrouter.ai.
    "X-Title": "<YOUR_SITE_NAME>", # Optional. Site title for rankings on openrouter.ai.
  },
  model="openai/gpt-oss-20b:free",
  messages=[
    {
      "role": "user",
      "content": prompt
    }
  ]
)


print(completion.choices[0].message.content)

Walmart’s third‑quarter results were broadly positive:

| Metric | Q3 FY26 | Q3 FY25 | Change |
|--------|---------|---------|--------|
| **Net sales** | $177.8 B | $168.0 B | +5.8 % (≈ 6.0 % in constant currency) |
| **Operating income** | ↓ 0.2 % (GAAP) | – | ↑ 8.0 % (adjusted, constant currency) |
| **e‑commerce** | +27 % globally | – | +20 %+ in every segment |
| **U.S. comp sales** | +4.5 % | – | – |
| **GAAP EPS** | $0.77 | – | – |
| **Adjusted EPS** | $0.62 | – | – |

**Key take‑aways**

* Revenue grew almost 6 % year‑over‑year, with the growth largely driven by e‑commerce, which expanded 27 % worldwide and contributed roughly 440 bps to comp sales.  
* While GAAP operating income slipped slightly, the adjusted operating income – which strips out non‑core items – rose 8 % in constant currency, underscoring stronger core profitability.  
* The company raised its FY26 outlook, projecting net‑sales growth of 4.8 %–5.1 % and adjusted operating‑income growth of 4.8 %–5.5 % in constan