In [None]:
## Fetch HTML from Firestore

In [1]:
import os
from dotenv import load_dotenv
from google.cloud import firestore
from google.oauth2 import service_account

# Load environment variables from .env file
load_dotenv()

def get_firestore_client():
    """
    Initialize a Firestore client using a service account file path
    stored in .env as FIREBASE_SERVICE_ACCOUNT_JSON.
    """
    service_account_path = os.getenv("FIREBASE_SERVICE_ACCOUNT_JSON")
    if not service_account_path or not os.path.exists(service_account_path):
        raise FileNotFoundError(
            f"Service account file not found. Check FIREBASE_SERVICE_ACCOUNT_JSON in .env: {service_account_path}"
        )

    credentials = service_account.Credentials.from_service_account_file(service_account_path)
    db = firestore.Client(credentials=credentials, project=credentials.project_id)
    return db


import os
from google.cloud import storage
from google.oauth2 import service_account
from dotenv import load_dotenv

load_dotenv()  # ensure .env is loaded for FIREBASE_SERVICE_ACCOUNT_JSON


def download_text_from_storage(ticker, service_account_path=None):
    """
    Download cleaned 10-K text content from Firebase Storage for a given ticker.
    Path: company_details/EDGAR (US)/filings/{ticker}_10K.txt
    """

    # Get service account path from .env if not provided
    if service_account_path is None:
        service_account_path = os.getenv("FIREBASE_SERVICE_ACCOUNT_JSON")

    if not service_account_path or not os.path.exists(service_account_path):
        raise FileNotFoundError(f"Service account file not found at {service_account_path}")

    # Initialize Firebase Storage client
    credentials = service_account.Credentials.from_service_account_file(service_account_path)
    client = storage.Client(credentials=credentials, project=credentials.project_id)

    bucket_name = "funwai-resume.firebasestorage.app"
    file_path = f"company_details/EDGAR (US)/filings/{ticker}_10K.txt"

    bucket = client.bucket(bucket_name)
    blob = bucket.blob(file_path)

    # Check if the text file exists
    if not blob.exists():
        raise FileNotFoundError(f"File not found for ticker '{ticker}' at path: {file_path}")

    # Download the text content
    text_content = blob.download_as_text(encoding="utf-8")

    return text_content



In [3]:
# test that we are able to pull data from firestore + firebase storage
if __name__ == "__main__":
    print("üîç Testing Firebase Storage connection...")

    try:
        # Check environment variable
        print("Service account path:", os.getenv("FIREBASE_SERVICE_ACCOUNT_JSON"))

        # Try to download a known file (update ticker if needed)
        ticker = "AAPL"
        html_content = download_text_from_storage(ticker)
        
        # Print preview
        print(f"‚úÖ Successfully downloaded 10k (as text) for {ticker}")
        print("First 500 characters:\n", html_content[:100])

    except Exception as e:
        print("‚ùå Error:", e)


üîç Testing Firebase Storage connection...
Service account path: C:/Users/hongn/idealy_new/idealy/backend/funwai-resume-firebase-adminsdk-fbsvc-a956eb6362.json
‚ùå Error: File not found for ticker 'AAPL' at path: company_details/EDGAR (US)/filings/AAPL_10K.txt


In [11]:
import os
from dotenv import load_dotenv

from openai import OpenAI
from langchain_openai import OpenAIEmbeddings, ChatOpenAI

# Create a new OpenAI client with your project key
client = OpenAI(api_key=os.getenv("OPENAI_API_KEY"))

## Select an embeddings model:
import getpass
import os

if not os.environ.get("OPENAI_API_KEY"):
    os.environ["OPENAI_API_KEY"] = getpass.getpass("Enter API key for OpenAI: ")

from langchain_openai import OpenAIEmbeddings

embeddings = OpenAIEmbeddings(model="text-embedding-3-large")

## Select an LLM model
llm = ChatOpenAI(
    model="gpt-4o",
    openai_api_key=os.getenv("OPENAI_API_KEY"),
)


In [27]:
import pinecone
from langchain_community.embeddings import HuggingFaceEmbeddings
from langchain_pinecone import PineconeVectorStore
import os
from dotenv import load_dotenv
from pinecone import Pinecone, ServerlessSpec

load_dotenv()

# Pinecone setup
pinecone_api_key = os.getenv("PINECONE_API_KEY")
index_name = os.getenv("PINECONE_INDEX_NAME")

print(index_name)

pc = Pinecone(api_key=pinecone_api_key)
print(pc.list_indexes())
index = pc.Index(index_name)
vector_store = PineconeVectorStore(embedding=embeddings, index=index)


10k-text-rag
[{
    "name": "10k-text-rag",
    "metric": "cosine",
    "host": "10k-text-rag-fqh5rav.svc.aped-4627-b74a.pinecone.io",
    "spec": {
        "serverless": {
            "cloud": "aws",
            "region": "us-east-1"
        }
    },
    "status": {
        "ready": true,
        "state": "Ready"
    },
    "vector_type": "dense",
    "dimension": 3072,
    "deletion_protection": "disabled",
    "tags": null
}]


In [25]:
## Create index if it does not exist already
import os
from langchain_pinecone import PineconeVectorStore
from pinecone import Pinecone, ServerlessSpec
from sentence_transformers import SentenceTransformer

# load env
pinecone_api_key = os.getenv("PINECONE_API_KEY")
pinecone_env = os.getenv("PINECONE_ENV")
index_name = os.getenv("PINECONE_INDEX_NAME")
load_dotenv()

pc = Pinecone(api_key=pinecone_api_key, environment=pinecone_env)

# Delete old index if exists
if index_name in pc.list_indexes():
    pc.delete_index(index_name)

index_name = "10k-text-rag"

if not pc.has_index(index_name):
    pc.create_index(
        name="10k-text-rag",
        dimension=3072,
        metric="cosine",
        spec=ServerlessSpec(cloud="aws", region="us-east-1")
    )

index = pc.Index(index_name)

In [3]:
from dotenv import load_dotenv
import os

load_dotenv()

QDRANT_URL = os.getenv("QDRANT_URL")
QDRANT_API_KEY = os.getenv("QDRANT_API_KEY")

In [13]:
## check connection to QDRANT
import numpy as np
import time
import uuid

from qdrant_client import QdrantClient
from qdrant_client.http.models import Distance, VectorParams, PointStruct, Filter, FieldCondition, Range, PointIdsList

client = QdrantClient(
    url=QDRANT_URL,
    api_key=QDRANT_API_KEY,
    timeout=60,  # Optional: Increase timeout for slow connections or large operations
)

# Optional: Check connection by listing collections or getting cluster info
try:
    # Attempt to get cluster info as a basic health check
    cluster_info = client.info()
    print("Successfully connected to Qdrant.")
    print(f"Qdrant cluster info: {cluster_info}")
except Exception as e:
    print(f"Failed to connect to Qdrant or get cluster info: {e}")
    # Depending on the error, you might want to stop execution here
    # raise e

Successfully connected to Qdrant.
Qdrant cluster info: title='qdrant - vector search engine' version='1.15.5' commit='48203e414e4e7f639a6d394fb6e4df695f808e51'


In [11]:
COLLECTION_NAME = "10k-text-rag"  # Renamed for clarity
DIM = 3072
fmt = "\n=== {:30} ===\n"
search_latency_fmt = "search latency = {:.4f}s"

# Ensure the collection doesn't already exist for a clean run
try:
    print(f"Checking if collection '{COLLECTION_NAME}' exists...")
    collection_exists = client.collection_exists(collection_name=COLLECTION_NAME)

    if collection_exists:
        print(f"Collection '{COLLECTION_NAME}' already exists. Deleting it first.")
        client.delete_collection(collection_name=COLLECTION_NAME)
        print(f"Collection '{COLLECTION_NAME}' deleted. Waiting a moment...")
        time.sleep(2)  # Give Qdrant a moment to process the deletion
    else:
        print(f"Collection '{COLLECTION_NAME}' does not exist. Proceeding to create.")

    # Create the collection
    print(f"Creating collection '{COLLECTION_NAME}'...")
    client.create_collection(
        collection_name=COLLECTION_NAME,
        vectors_config=VectorParams(size=DIM, distance=Distance.COSINE),
        # Optional: Add optimizers_config, hnsw_config, quantization_config etc. here if needed
        # Example: optimizers_config=models.OptimizersConfigDiff(memmap_threshold=20000),
        # Example: hnsw_config=models.HnswConfigDiff(m=16, ef_construct=100)
    )
    print(f"Collection '{COLLECTION_NAME}' created successfully.")

except Exception as e:
    print(f"An error occurred during collection setup: {e}")
    # raise e

# Verify creation by getting collection info
try:
    collection_info = client.get_collection(collection_name=COLLECTION_NAME)
    print(fmt.format("Collection Info"))
    print(collection_info)
except Exception as e:
    print(f"Error getting collection info for '{COLLECTION_NAME}': {e}")

Checking if collection '10k-text-rag' exists...
Collection '10k-text-rag' already exists. Deleting it first.
Collection '10k-text-rag' deleted. Waiting a moment...
Creating collection '10k-text-rag'...
Collection '10k-text-rag' created successfully.

=== Collection Info                ===



In [15]:
import os
import glob
from dotenv import load_dotenv

from qdrant_client import QdrantClient
from qdrant_client.models import PointStruct, VectorParams, Distance

from langchain_text_splitters import RecursiveCharacterTextSplitter
from openai import OpenAI

load_dotenv()

# -------------------------
# 1. Environment variables
# -------------------------
QDRANT_URL = os.getenv("QDRANT_URL")
QDRANT_API_KEY = os.getenv("QDRANT_API_KEY")
COLLECTION_NAME = os.getenv("QDRANT_COLLECTION_NAME", "10k-text-rag")

OPENAI_API_KEY = os.getenv("OPENAI_API_KEY")
EMBED_MODEL = "text-embedding-3-large"   # 3072 dimensions

client_openai = OpenAI(api_key=OPENAI_API_KEY)

# -------------------------
# 2. Connect to Qdrant
# -------------------------
qdrant = QdrantClient(
    url=QDRANT_URL,
    api_key=QDRANT_API_KEY
)

# Create or recreate the collection
qdrant.recreate_collection(
    collection_name=COLLECTION_NAME,
    vectors_config=VectorParams(
        size=3072,               # embedding dimension
        distance=Distance.COSINE
    )
)

print(f"Created collection: {COLLECTION_NAME}")

# -------------------------
# 3. Load and chunk the 10-K text files
# -------------------------
paths = glob.glob("clean_10k_texts/*.txt")

text_splitter = RecursiveCharacterTextSplitter(
    chunk_size=1000,
    chunk_overlap=200
)

documents = []

for path in paths:
    with open(path, "r", encoding="utf-8") as f:
        text = f.read()

    chunks = text_splitter.split_text(text)

    for i, chunk in enumerate(chunks):
        documents.append({
            "text": chunk,
            "source": os.path.basename(path),
            "chunk": i
        })

print("Total chunks prepared:", len(documents))

  qdrant.recreate_collection(


Created collection: 10k-text-rag


In [21]:
from tqdm import tqdm
import time
from qdrant_client.models import PointStruct

BATCH_SIZE = 128
point_id = str(uuid.uuid4())  # ALWAYS valid for Qdrant

batch_points.append(
    PointStruct(
        id=point_id,
        vector=emb,
        payload=doc
    )
)
all_point_ids.append(point_id)

for start in tqdm(range(0, len(documents), BATCH_SIZE), desc="Uploading chunks"):
    batch_docs = documents[start:start + BATCH_SIZE]
    batch_texts = [d["text"] for d in batch_docs]

    # Retry logic for embedding + upsert
    for attempt in range(3):
        try:
            
            # -----------------------------
            # 4. OpenAI Embed the batch
            # -----------------------------
            response = client_openai.embeddings.create(
                model=EMBED_MODEL,
                input=batch_texts
            )
            embeddings = [item.embedding for item in response.data]

            # -----------------------------
            # 5. Convert to Qdrant points
            # -----------------------------
            batch_points = []

            for doc, emb in zip(batch_docs, embeddings):
                batch_points.append(
                    PointStruct(
                        id=str(point_id),
                        vector=emb,
                        payload=doc
                    )
                )
                all_point_ids.append(point_id)
                point_id += 1

            # -----------------------------
            # 6. Upsert into Qdrant
            # -----------------------------
            qdrant.upsert(
                collection_name=COLLECTION_NAME,
                points=batch_points
            )

            break  # success ‚Üí exit retry loop

        except Exception as e:
            print(f"‚ö†Ô∏è Error uploading batch {start//BATCH_SIZE}, attempt {attempt+1}: {e}")
            time.sleep(2)

print("Done!")
print(f"Total vectors inserted: {len(all_point_ids)}")


Uploading chunks:   0%|                                                                        | 0/730 [00:00<?, ?it/s]

‚ö†Ô∏è Error uploading batch 0, attempt 1: can only concatenate str (not "int") to str
‚ö†Ô∏è Error uploading batch 0, attempt 2: can only concatenate str (not "int") to str


Uploading chunks:   0%|                                                                        | 0/730 [00:07<?, ?it/s]


KeyboardInterrupt: 

In [11]:
## Convert HTML to text file and store
import os

ticker = "ABBV"  # or pass dynamically
text_content = download_text_from_storage(ticker)

output_folder = "C:/Users/hongn/idealy_new/idealy/backend/rag-api/clean_10k_texts"

# Save cleaned text
output_path = os.path.join(output_folder, f"{ticker}_10K.txt")

with open(output_path, "w", encoding="utf-8") as f:
    f.write(text_content)

print(f"‚úÖ Saved cleaned text to: {output_path}")

FileNotFoundError: File not found for ticker 'ABBV' at path: company_details/EDGAR (US)/filings/ABBV_10K.txt

In [29]:
import os
from langchain_community.document_loaders import DirectoryLoader, TextLoader

# Path to the folder containing your cleaned text files
TEXT_FOLDER = "./clean_10k_texts"

# Load all .txt files
loader = DirectoryLoader(
    TEXT_FOLDER,
    glob="*.txt",
    loader_cls=TextLoader,
    loader_kwargs={"encoding": "utf-8"}
)

docs = loader.load()

print(f"‚úÖ Loaded {len(docs)} documents.\n")

# Preview first document
print("First document metadata:", docs[0].metadata)
print("\n--- First 500 characters ---\n")
print(docs[0].page_content[:500])


‚úÖ Loaded 124 documents.

First document metadata: {'source': 'clean_10k_texts\\AAPL_2025_10K.txt'}

--- First 500 characters ---

| |   
---|---|---  

UNITED STATES

SECURITIES AND EXCHANGE COMMISSION

Washington, D.C. 20549

| |   
---|---|---  

FORM 10-K

| |   
---|---|---  

(Mark One)

‚òí ANNUAL REPORT PURSUANT TO SECTION 13 OR 15(d) OF THE SECURITIES EXCHANGE ACT OF 1934

For the fiscal year ended September 27, 2025

or

‚òê TRANSITION REPORT PURSUANT TO SECTION 13 OR 15(d) OF THE SECURITIES EXCHANGE ACT OF 1934

For the transition period from  to  .

Commission File Number: 001-36743

| |   
---|---|---  

Apple Inc.


In [29]:
## Load documents
import os
from langchain_community.document_loaders import TextLoader
from langchain_community.document_loaders import DirectoryLoader

# Path to the folder containing your cleaned text files
TEXT_FOLDER = "./clean_10k_texts"  # e.g., output of clean_10k_html()

# Option 1: Load a single file
single_file_path = os.path.join(TEXT_FOLDER, "NFLX_10K.txt")
loader = TextLoader(single_file_path, encoding="utf-8")
docs = loader.load()

print(f"‚úÖ Loaded 1 document with {len(docs[0].page_content)} characters.")
print(docs[0].page_content[:500])

# Option 2: Load all text files in a folder
# loader = DirectoryLoader(TEXT_FOLDER, glob="*.txt", loader_cls=TextLoader, loader_kwargs={"encoding": "utf-8"})
# docs = loader.load()

# print(f"‚úÖ Loaded {len(docs)} documents.")
# print(docs[0].metadata)
# print(docs[0].page_content[:500])


‚úÖ Loaded 1 document with 279148 characters.
| |   
---|---|---  
UNITED STATES SECURITIES AND EXCHANGE COMMISSION

Washington, D.C. 20549

_____________________________________________________________________

FORM 10-K

_____________________________________________________________________

(Mark One)

| | | | |   
---|---|---|---|---|---  
‚òí| ANNUAL REPORT PURSUANT TO SECTION 13 OR 15(d) OF THE SECURITIES EXCHANGE ACT OF 1934  

For the fiscal year ended December 31, 2024

OR

| | | | |   
---|---|---|---|---|---  
‚òê| TRANSITION REPORT P


In [31]:
## Docs to Chunks via textsplitter functions
from langchain_text_splitters import RecursiveCharacterTextSplitter

text_splitter = RecursiveCharacterTextSplitter(
    chunk_size=1000, 
    chunk_overlap=200, 
    add_start_index=True)

chunks = text_splitter.split_documents(docs)
print(f"‚úÖ Split into {len(chunks)} chunks.")

‚úÖ Split into 93427 chunks.


In [33]:
## load chunks into the Pinecone Vector store ('vector_store' defined earlier)
from tqdm import tqdm
import time

BATCH_SIZE = 200  # You can tune this (100‚Äì300 is ideal)
document_ids = []

for i in tqdm(range(0, len(chunks), BATCH_SIZE), desc="Uploading chunks"):
    batch = chunks[i:i + BATCH_SIZE]

    # Retry logic (recommended for Pinecone)
    for attempt in range(3):
        try:
            ids = vector_store.add_documents(documents=batch)
            document_ids.extend(ids)
            break  # break retry loop
        except Exception as e:
            print(f"‚ö†Ô∏è Error uploading batch {i//BATCH_SIZE}, attempt {attempt+1}: {e}")
            time.sleep(2)  # wait + retry


Uploading chunks:  67%|‚ñà‚ñà‚ñà‚ñà‚ñà‚ñà‚ñà‚ñà‚ñà‚ñà‚ñà‚ñà‚ñà‚ñà‚ñà‚ñà‚ñà‚ñà‚ñà‚ñà‚ñà‚ñà‚ñà‚ñà‚ñà‚ñà‚ñà‚ñà‚ñà‚ñà‚ñà‚ñà‚ñà‚ñà‚ñà‚ñà‚ñà‚ñà‚ñà‚ñà‚ñà‚ñå                    | 314/468 [25:49<12:17,  4.79s/it]

‚ö†Ô∏è Error uploading batch 314, attempt 1: (429)
Reason: Too Many Requests
HTTP response headers: HTTPHeaderDict({'Date': 'Mon, 17 Nov 2025 18:46:47 GMT', 'Content-Type': 'application/json', 'Content-Length': '166', 'Connection': 'keep-alive', 'x-pinecone-request-latency-ms': '96', 'x-pinecone-request-id': '7982488119216157449', 'x-envoy-upstream-service-time': '28', 'server': 'envoy'})
HTTP response body: {"code":8,"message":"Request failed. You've reached your write unit limit for the current month (2000000). To continue writing data, upgrade your plan.","details":[]}

‚ö†Ô∏è Error uploading batch 314, attempt 2: (429)
Reason: Too Many Requests
HTTP response headers: HTTPHeaderDict({'Date': 'Mon, 17 Nov 2025 18:46:52 GMT', 'Content-Type': 'application/json', 'Content-Length': '166', 'Connection': 'keep-alive', 'x-pinecone-request-latency-ms': '66', 'x-pinecone-request-id': '1847917596060236855', 'x-envoy-upstream-service-time': '6', 'server': 'envoy'})
HTTP response body: {"code":8

Uploading chunks:  67%|‚ñà‚ñà‚ñà‚ñà‚ñà‚ñà‚ñà‚ñà‚ñà‚ñà‚ñà‚ñà‚ñà‚ñà‚ñà‚ñà‚ñà‚ñà‚ñà‚ñà‚ñà‚ñà‚ñà‚ñà‚ñà‚ñà‚ñà‚ñà‚ñà‚ñà‚ñà‚ñà‚ñà‚ñà‚ñà‚ñà‚ñà‚ñà‚ñà‚ñà‚ñà‚ñã                    | 315/468 [26:07<22:43,  8.91s/it]

‚ö†Ô∏è Error uploading batch 315, attempt 1: (429)
Reason: Too Many Requests
HTTP response headers: HTTPHeaderDict({'Date': 'Mon, 17 Nov 2025 18:47:03 GMT', 'Content-Type': 'application/json', 'Content-Length': '166', 'Connection': 'keep-alive', 'x-pinecone-request-latency-ms': '99', 'x-pinecone-request-id': '6887225196364178439', 'x-envoy-upstream-service-time': '4', 'server': 'envoy'})
HTTP response body: {"code":8,"message":"Request failed. You've reached your write unit limit for the current month (2000000). To continue writing data, upgrade your plan.","details":[]}

‚ö†Ô∏è Error uploading batch 315, attempt 2: (429)
Reason: Too Many Requests
HTTP response headers: HTTPHeaderDict({'Date': 'Mon, 17 Nov 2025 18:47:09 GMT', 'Content-Type': 'application/json', 'Content-Length': '166', 'Connection': 'keep-alive', 'x-pinecone-request-latency-ms': '163', 'x-pinecone-request-id': '1152302126835574217', 'x-envoy-upstream-service-time': '5', 'server': 'envoy'})
HTTP response body: {"code":8

Uploading chunks:  68%|‚ñà‚ñà‚ñà‚ñà‚ñà‚ñà‚ñà‚ñà‚ñà‚ñà‚ñà‚ñà‚ñà‚ñà‚ñà‚ñà‚ñà‚ñà‚ñà‚ñà‚ñà‚ñà‚ñà‚ñà‚ñà‚ñà‚ñà‚ñà‚ñà‚ñà‚ñà‚ñà‚ñà‚ñà‚ñà‚ñà‚ñà‚ñà‚ñà‚ñà‚ñà‚ñä                    | 316/468 [26:26<29:52, 11.79s/it]

‚ö†Ô∏è Error uploading batch 316, attempt 1: (429)
Reason: Too Many Requests
HTTP response headers: HTTPHeaderDict({'Date': 'Mon, 17 Nov 2025 18:47:22 GMT', 'Content-Type': 'application/json', 'Content-Length': '166', 'Connection': 'keep-alive', 'x-pinecone-request-latency-ms': '74', 'x-pinecone-request-id': '3443433195536649323', 'x-envoy-upstream-service-time': '8', 'server': 'envoy'})
HTTP response body: {"code":8,"message":"Request failed. You've reached your write unit limit for the current month (2000000). To continue writing data, upgrade your plan.","details":[]}

‚ö†Ô∏è Error uploading batch 316, attempt 2: (429)
Reason: Too Many Requests
HTTP response headers: HTTPHeaderDict({'Date': 'Mon, 17 Nov 2025 18:47:29 GMT', 'Content-Type': 'application/json', 'Content-Length': '166', 'Connection': 'keep-alive', 'x-pinecone-request-latency-ms': '81', 'x-pinecone-request-id': '4455966097111190929', 'x-envoy-upstream-service-time': '5', 'server': 'envoy'})
HTTP response body: {"code":8,

Uploading chunks:  68%|‚ñà‚ñà‚ñà‚ñà‚ñà‚ñà‚ñà‚ñà‚ñà‚ñà‚ñà‚ñà‚ñà‚ñà‚ñà‚ñà‚ñà‚ñà‚ñà‚ñà‚ñà‚ñà‚ñà‚ñà‚ñà‚ñà‚ñà‚ñà‚ñà‚ñà‚ñà‚ñà‚ñà‚ñà‚ñà‚ñà‚ñà‚ñà‚ñà‚ñà‚ñà‚ñâ                    | 317/468 [26:46<36:27, 14.49s/it]

‚ö†Ô∏è Error uploading batch 317, attempt 1: (429)
Reason: Too Many Requests
HTTP response headers: HTTPHeaderDict({'Date': 'Mon, 17 Nov 2025 18:47:43 GMT', 'Content-Type': 'application/json', 'Content-Length': '166', 'Connection': 'keep-alive', 'x-pinecone-request-latency-ms': '79', 'x-pinecone-request-id': '5074216660346096774', 'x-envoy-upstream-service-time': '8', 'server': 'envoy'})
HTTP response body: {"code":8,"message":"Request failed. You've reached your write unit limit for the current month (2000000). To continue writing data, upgrade your plan.","details":[]}

‚ö†Ô∏è Error uploading batch 317, attempt 2: (429)
Reason: Too Many Requests
HTTP response headers: HTTPHeaderDict({'Date': 'Mon, 17 Nov 2025 18:47:49 GMT', 'Content-Type': 'application/json', 'Content-Length': '166', 'Connection': 'keep-alive', 'x-pinecone-request-latency-ms': '85', 'x-pinecone-request-id': '4451538800431435109', 'x-envoy-upstream-service-time': '8', 'server': 'envoy'})
HTTP response body: {"code":8,

Uploading chunks:  68%|‚ñà‚ñà‚ñà‚ñà‚ñà‚ñà‚ñà‚ñà‚ñà‚ñà‚ñà‚ñà‚ñà‚ñà‚ñà‚ñà‚ñà‚ñà‚ñà‚ñà‚ñà‚ñà‚ñà‚ñà‚ñà‚ñà‚ñà‚ñà‚ñà‚ñà‚ñà‚ñà‚ñà‚ñà‚ñà‚ñà‚ñà‚ñà‚ñà‚ñà‚ñà‚ñà‚ñè                   | 318/468 [27:06<40:20, 16.13s/it]

‚ö†Ô∏è Error uploading batch 318, attempt 1: (429)
Reason: Too Many Requests
HTTP response headers: HTTPHeaderDict({'Date': 'Mon, 17 Nov 2025 18:48:04 GMT', 'Content-Type': 'application/json', 'Content-Length': '166', 'Connection': 'keep-alive', 'x-pinecone-request-latency-ms': '103', 'x-pinecone-request-id': '4980604788259433491', 'x-envoy-upstream-service-time': '5', 'server': 'envoy'})
HTTP response body: {"code":8,"message":"Request failed. You've reached your write unit limit for the current month (2000000). To continue writing data, upgrade your plan.","details":[]}

‚ö†Ô∏è Error uploading batch 318, attempt 2: (429)
Reason: Too Many Requests
HTTP response headers: HTTPHeaderDict({'Date': 'Mon, 17 Nov 2025 18:48:11 GMT', 'Content-Type': 'application/json', 'Content-Length': '166', 'Connection': 'keep-alive', 'x-pinecone-request-latency-ms': '64', 'x-pinecone-request-id': '3450590172140288414', 'x-envoy-upstream-service-time': '5', 'server': 'envoy'})
HTTP response body: {"code":8

Uploading chunks:  68%|‚ñà‚ñà‚ñà‚ñà‚ñà‚ñà‚ñà‚ñà‚ñà‚ñà‚ñà‚ñà‚ñà‚ñà‚ñà‚ñà‚ñà‚ñà‚ñà‚ñà‚ñà‚ñà‚ñà‚ñà‚ñà‚ñà‚ñà‚ñà‚ñà‚ñà‚ñà‚ñà‚ñà‚ñà‚ñà‚ñà‚ñà‚ñà‚ñà‚ñà‚ñà‚ñà‚ñé                   | 319/468 [27:27<43:21, 17.46s/it]

‚ö†Ô∏è Error uploading batch 319, attempt 1: (429)
Reason: Too Many Requests
HTTP response headers: HTTPHeaderDict({'Date': 'Mon, 17 Nov 2025 18:48:23 GMT', 'Content-Type': 'application/json', 'Content-Length': '166', 'Connection': 'keep-alive', 'x-pinecone-request-latency-ms': '107', 'x-pinecone-request-id': '5810758915277509526', 'x-envoy-upstream-service-time': '4', 'server': 'envoy'})
HTTP response body: {"code":8,"message":"Request failed. You've reached your write unit limit for the current month (2000000). To continue writing data, upgrade your plan.","details":[]}

‚ö†Ô∏è Error uploading batch 319, attempt 2: (429)
Reason: Too Many Requests
HTTP response headers: HTTPHeaderDict({'Date': 'Mon, 17 Nov 2025 18:48:29 GMT', 'Content-Type': 'application/json', 'Content-Length': '166', 'Connection': 'keep-alive', 'x-pinecone-request-latency-ms': '110', 'x-pinecone-request-id': '8702142190872492001', 'x-envoy-upstream-service-time': '5', 'server': 'envoy'})
HTTP response body: {"code":

Uploading chunks:  68%|‚ñà‚ñà‚ñà‚ñà‚ñà‚ñà‚ñà‚ñà‚ñà‚ñà‚ñà‚ñà‚ñà‚ñà‚ñà‚ñà‚ñà‚ñà‚ñà‚ñà‚ñà‚ñà‚ñà‚ñà‚ñà‚ñà‚ñà‚ñà‚ñà‚ñà‚ñà‚ñà‚ñà‚ñà‚ñà‚ñà‚ñà‚ñà‚ñà‚ñà‚ñà‚ñà‚ñç                   | 320/468 [27:45<43:39, 17.70s/it]

‚ö†Ô∏è Error uploading batch 320, attempt 1: (429)
Reason: Too Many Requests
HTTP response headers: HTTPHeaderDict({'Date': 'Mon, 17 Nov 2025 18:48:42 GMT', 'Content-Type': 'application/json', 'Content-Length': '166', 'Connection': 'keep-alive', 'x-pinecone-request-latency-ms': '141', 'x-pinecone-request-id': '2918533118264848584', 'x-envoy-upstream-service-time': '4', 'server': 'envoy'})
HTTP response body: {"code":8,"message":"Request failed. You've reached your write unit limit for the current month (2000000). To continue writing data, upgrade your plan.","details":[]}

‚ö†Ô∏è Error uploading batch 320, attempt 2: (429)
Reason: Too Many Requests
HTTP response headers: HTTPHeaderDict({'Date': 'Mon, 17 Nov 2025 18:48:48 GMT', 'Content-Type': 'application/json', 'Content-Length': '166', 'Connection': 'keep-alive', 'x-pinecone-request-latency-ms': '115', 'x-pinecone-request-id': '217203292585814881', 'x-envoy-upstream-service-time': '5', 'server': 'envoy'})
HTTP response body: {"code":8

Uploading chunks:  69%|‚ñà‚ñà‚ñà‚ñà‚ñà‚ñà‚ñà‚ñà‚ñà‚ñà‚ñà‚ñà‚ñà‚ñà‚ñà‚ñà‚ñà‚ñà‚ñà‚ñà‚ñà‚ñà‚ñà‚ñà‚ñà‚ñà‚ñà‚ñà‚ñà‚ñà‚ñà‚ñà‚ñà‚ñà‚ñà‚ñà‚ñà‚ñà‚ñà‚ñà‚ñà‚ñà‚ñå                   | 321/468 [28:04<44:09, 18.02s/it]

‚ö†Ô∏è Error uploading batch 321, attempt 1: (429)
Reason: Too Many Requests
HTTP response headers: HTTPHeaderDict({'Date': 'Mon, 17 Nov 2025 18:49:01 GMT', 'Content-Type': 'application/json', 'Content-Length': '166', 'Connection': 'keep-alive', 'x-pinecone-request-latency-ms': '95', 'x-pinecone-request-id': '2395756261778790895', 'x-envoy-upstream-service-time': '5', 'server': 'envoy'})
HTTP response body: {"code":8,"message":"Request failed. You've reached your write unit limit for the current month (2000000). To continue writing data, upgrade your plan.","details":[]}

‚ö†Ô∏è Error uploading batch 321, attempt 2: (429)
Reason: Too Many Requests
HTTP response headers: HTTPHeaderDict({'Date': 'Mon, 17 Nov 2025 18:49:07 GMT', 'Content-Type': 'application/json', 'Content-Length': '166', 'Connection': 'keep-alive', 'x-pinecone-request-latency-ms': '105', 'x-pinecone-request-id': '5033429704561069676', 'x-envoy-upstream-service-time': '3', 'server': 'envoy'})
HTTP response body: {"code":8

Uploading chunks:  69%|‚ñà‚ñà‚ñà‚ñà‚ñà‚ñà‚ñà‚ñà‚ñà‚ñà‚ñà‚ñà‚ñà‚ñà‚ñà‚ñà‚ñà‚ñà‚ñà‚ñà‚ñà‚ñà‚ñà‚ñà‚ñà‚ñà‚ñà‚ñà‚ñà‚ñà‚ñà‚ñà‚ñà‚ñà‚ñà‚ñà‚ñà‚ñà‚ñà‚ñà‚ñà‚ñà‚ñã                   | 322/468 [28:23<45:00, 18.50s/it]

‚ö†Ô∏è Error uploading batch 322, attempt 1: (429)
Reason: Too Many Requests
HTTP response headers: HTTPHeaderDict({'Date': 'Mon, 17 Nov 2025 18:49:20 GMT', 'Content-Type': 'application/json', 'Content-Length': '166', 'Connection': 'keep-alive', 'x-pinecone-request-latency-ms': '63', 'x-pinecone-request-id': '7884871746501029296', 'x-envoy-upstream-service-time': '7', 'server': 'envoy'})
HTTP response body: {"code":8,"message":"Request failed. You've reached your write unit limit for the current month (2000000). To continue writing data, upgrade your plan.","details":[]}

‚ö†Ô∏è Error uploading batch 322, attempt 2: (429)
Reason: Too Many Requests
HTTP response headers: HTTPHeaderDict({'Date': 'Mon, 17 Nov 2025 18:49:26 GMT', 'Content-Type': 'application/json', 'Content-Length': '166', 'Connection': 'keep-alive', 'x-pinecone-request-latency-ms': '59', 'x-pinecone-request-id': '4345499180109554876', 'x-envoy-upstream-service-time': '5', 'server': 'envoy'})
HTTP response body: {"code":8,

Uploading chunks:  69%|‚ñà‚ñà‚ñà‚ñà‚ñà‚ñà‚ñà‚ñà‚ñà‚ñà‚ñà‚ñà‚ñà‚ñà‚ñà‚ñà‚ñà‚ñà‚ñà‚ñà‚ñà‚ñà‚ñà‚ñà‚ñà‚ñà‚ñà‚ñà‚ñà‚ñà‚ñà‚ñà‚ñà‚ñà‚ñà‚ñà‚ñà‚ñà‚ñà‚ñà‚ñà‚ñà‚ñä                   | 323/468 [28:43<45:12, 18.71s/it]

‚ö†Ô∏è Error uploading batch 323, attempt 1: (429)
Reason: Too Many Requests
HTTP response headers: HTTPHeaderDict({'Date': 'Mon, 17 Nov 2025 18:49:40 GMT', 'Content-Type': 'application/json', 'Content-Length': '166', 'Connection': 'keep-alive', 'x-pinecone-request-latency-ms': '113', 'x-pinecone-request-id': '1783150408390401669', 'x-envoy-upstream-service-time': '7', 'server': 'envoy'})
HTTP response body: {"code":8,"message":"Request failed. You've reached your write unit limit for the current month (2000000). To continue writing data, upgrade your plan.","details":[]}

‚ö†Ô∏è Error uploading batch 323, attempt 2: (429)
Reason: Too Many Requests
HTTP response headers: HTTPHeaderDict({'Date': 'Mon, 17 Nov 2025 18:49:46 GMT', 'Content-Type': 'application/json', 'Content-Length': '166', 'Connection': 'keep-alive', 'x-pinecone-request-latency-ms': '91', 'x-pinecone-request-id': '957619681063049265', 'x-envoy-upstream-service-time': '8', 'server': 'envoy'})
HTTP response body: {"code":8,

Uploading chunks:  69%|‚ñà‚ñà‚ñà‚ñà‚ñà‚ñà‚ñà‚ñà‚ñà‚ñà‚ñà‚ñà‚ñà‚ñà‚ñà‚ñà‚ñà‚ñà‚ñà‚ñà‚ñà‚ñà‚ñà‚ñà‚ñà‚ñà‚ñà‚ñà‚ñà‚ñà‚ñà‚ñà‚ñà‚ñà‚ñà‚ñà‚ñà‚ñà‚ñà‚ñà‚ñà‚ñà‚ñâ                   | 324/468 [29:02<45:02, 18.77s/it]

‚ö†Ô∏è Error uploading batch 324, attempt 1: (429)
Reason: Too Many Requests
HTTP response headers: HTTPHeaderDict({'Date': 'Mon, 17 Nov 2025 18:49:58 GMT', 'Content-Type': 'application/json', 'Content-Length': '166', 'Connection': 'keep-alive', 'x-pinecone-request-latency-ms': '128', 'x-pinecone-request-id': '7256124030153667348', 'x-envoy-upstream-service-time': '4', 'server': 'envoy'})
HTTP response body: {"code":8,"message":"Request failed. You've reached your write unit limit for the current month (2000000). To continue writing data, upgrade your plan.","details":[]}

‚ö†Ô∏è Error uploading batch 324, attempt 2: (429)
Reason: Too Many Requests
HTTP response headers: HTTPHeaderDict({'Date': 'Mon, 17 Nov 2025 18:50:05 GMT', 'Content-Type': 'application/json', 'Content-Length': '166', 'Connection': 'keep-alive', 'x-pinecone-request-latency-ms': '70', 'x-pinecone-request-id': '7280234316967944236', 'x-envoy-upstream-service-time': '4', 'server': 'envoy'})
HTTP response body: {"code":8

Uploading chunks:  69%|‚ñà‚ñà‚ñà‚ñà‚ñà‚ñà‚ñà‚ñà‚ñà‚ñà‚ñà‚ñà‚ñà‚ñà‚ñà‚ñà‚ñà‚ñà‚ñà‚ñà‚ñà‚ñà‚ñà‚ñà‚ñà‚ñà‚ñà‚ñà‚ñà‚ñà‚ñà‚ñà‚ñà‚ñà‚ñà‚ñà‚ñà‚ñà‚ñà‚ñà‚ñà‚ñà‚ñà                   | 325/468 [29:20<44:46, 18.79s/it]

‚ö†Ô∏è Error uploading batch 325, attempt 1: (429)
Reason: Too Many Requests
HTTP response headers: HTTPHeaderDict({'Date': 'Mon, 17 Nov 2025 18:50:18 GMT', 'Content-Type': 'application/json', 'Content-Length': '166', 'Connection': 'keep-alive', 'x-pinecone-request-latency-ms': '110', 'x-pinecone-request-id': '7934570794545337204', 'x-envoy-upstream-service-time': '6', 'server': 'envoy'})
HTTP response body: {"code":8,"message":"Request failed. You've reached your write unit limit for the current month (2000000). To continue writing data, upgrade your plan.","details":[]}

‚ö†Ô∏è Error uploading batch 325, attempt 2: (429)
Reason: Too Many Requests
HTTP response headers: HTTPHeaderDict({'Date': 'Mon, 17 Nov 2025 18:50:24 GMT', 'Content-Type': 'application/json', 'Content-Length': '166', 'Connection': 'keep-alive', 'x-pinecone-request-latency-ms': '92', 'x-pinecone-request-id': '6497272980675131265', 'x-envoy-upstream-service-time': '5', 'server': 'envoy'})
HTTP response body: {"code":8

Uploading chunks:  69%|‚ñà‚ñà‚ñà‚ñà‚ñà‚ñà‚ñà‚ñà‚ñà‚ñà‚ñà‚ñà‚ñà‚ñà‚ñà‚ñà‚ñà‚ñà‚ñà‚ñà‚ñà‚ñà‚ñà‚ñà‚ñà‚ñà‚ñà‚ñà‚ñà‚ñà‚ñà‚ñà‚ñà‚ñà‚ñà‚ñà‚ñà‚ñà‚ñà‚ñà‚ñà‚ñà‚ñà                   | 325/468 [29:37<13:02,  5.47s/it]


KeyboardInterrupt: 

In [None]:
print(len(document_ids))
vector_store.similarity_search("What does Apple say about revenue recognition?", k=2)
# the above returns 2 relevant chunks

In [43]:
from langchain.tools import tool

@tool(response_format="content_and_artifact")
def retrieve_context(query: str):
    """Retrieve information to help answer a query."""
    retrieved_docs = vector_store.similarity_search(query, k=2)
    serialized = "\n\n".join(
        (f"Source: {doc.metadata}\nContent: {doc.page_content}")
        for doc in retrieved_docs
    )
    return serialized, retrieved_docs

In [45]:
from langchain_openai import ChatOpenAI

# Example using local API key or .env
llm = ChatOpenAI(model="gpt-4.1", temperature=0)

In [47]:
from langchain.agents import create_agent

tools = [retrieve_context]

# System prompt / instructions for the agent
system_prompt = (
    "You are a helpful assistant that answers user queries using the provided context. "
    "Use the retrieval tool when necessary."
    "provide succinct answers and as quantitative answers as available from retrieval"
)

# Create the agent
agent = create_agent(llm, tools=tools, system_prompt=system_prompt)


In [59]:
query = (
    "How does Morgan Stanley make money??\n\n"
    "Once you get the answer, please find some of the products and services they sell and how much money each product made.\n\n"
    "Who are Morgan Stanley's main competitors?"
)

for event in agent.stream(
    {"messages": [{"role": "user", "content": query}]},
    stream_mode="values",
):
    msg = event["messages"][-1]

    # Only print the final AI messages
    if getattr(msg, "type", None) == "ai": # do not get human, tool messages, only the AI response
        print(msg.content)




Morgan Stanley makes money primarily through the following business segments:

1. Investment Banking: Revenue from financial advisory and underwriting assignments.
2. Investment Management: Revenue from providing asset management and wealth advisory services.
3. Commissions and Fees: Revenue from executing and clearing client transactions on stock, options, and futures exchanges, as well as over-the-counter (OTC) transactions.
4. Markets: Revenue from trading activities, including equity and fixed income markets.

Products and Services & Revenue (latest available):
- Markets (trading, including equities and fixed income): $19.8 billion in revenue.
  - Equity Markets: Growth driven by cash equities, equity derivatives, and prime services.
  - Fixed Income Markets: Growth in asset-backed financing, securitization, and underwriting fees.
- Investment Banking and Investment Management: Specific revenue figures for these segments are not provided in the retrieved context, but they are maj