In [5]:
import os
import getpass
os.environ["LANGSMITH_TRACING"] = "true"

if 'LANGSMITH_API_KEY' not in os.environ: 
    os.environ["LANGSMITH_API_KEY"]=getpass.getpass('Enter your Langsmith Api Key')

Enter your Langsmith Api Key ········


In [16]:
# Documents

from langchain_core.documents import Document 

doc = [ 
    Document( 
        page_content='Dogs are great companions, known for their loyalty and friendliness.',
        metadata={'source':'mammal-pets-doc'},
    ),
    Document( 
        page_content='Cats are independent pets that often enjoy their own space.',
        metadata={'source':'mammal-pets-doc'},
    ), 
]

In [20]:
# Document Loaders - Let's load whole pdf into sequence of document object. 

from langchain_community.document_loaders import PyPDFLoader 

file_path = '../example_data/nke-10k-2023.pdf' 
loader = PyPDFLoader(file_path) 
docs = loader.load() 
print(len(docs))

107


In [33]:
print(f'{docs[0].page_content[:1000]}\n\n')
print(docs[0].metadata)

Table of Contents
UNITED STATES
SECURITIES AND EXCHANGE COMMISSION
Washington, D.C. 20549
FORM 10-K
(Mark One)
☑ ANNUAL REPORT PURSUANT TO SECTION 13 OR 15(D) OF THE SECURITIES EXCHANGE ACT OF 1934
FOR THE FISCAL YEAR ENDED MAY 31, 2023
OR
☐ TRANSITION REPORT PURSUANT TO SECTION 13 OR 15(D) OF THE SECURITIES EXCHANGE ACT OF 1934
FOR THE TRANSITION PERIOD FROM                         TO                         .
Commission File No. 1-10635
NIKE, Inc.
(Exact name of Registrant as specified in its charter)
Oregon 93-0584541
(State or other jurisdiction of incorporation) (IRS Employer Identification No.)
One Bowerman Drive, Beaverton, Oregon 97005-6453
(Address of principal executive offices and zip code)
(503) 671-6453
(Registrant's telephone number, including area code)
SECURITIES REGISTERED PURSUANT TO SECTION 12(B) OF THE ACT:
Class B Common Stock NKE New York Stock Exchange
(Title of each class) (Trading symbol) (Name of each exchange on which registered)
SECURITIES REGISTERED PURSUAN

In [27]:
from langchain_text_splitters import RecursiveCharacterTextSplitter 

text_splitter = RecursiveCharacterTextSplitter(chunk_size=1000,chunk_overlap=200,add_start_index=True) 

all_splits = text_splitter.split_documents(docs) 
print(len(all_splits))

516


In [43]:
# for i,text in enumerate(all_splits): 
#     print(i,text.page_content,'\n\n\n\n\n')

In [44]:
import getpass 
import os 

if not os.environ.get('GOOGLE_API_KEY'): 
    os.environ['GOOGLE_API_KEY'] = getpass.getpass('Enter Google gemini api key')

In [38]:
from langchain_google_genai import GoogleGenerativeAIEmbeddings 

embeddings = GoogleGenerativeAIEmbeddings(model='models/gemini-embedding-001')

In [56]:
vector_1 = embeddings.embed_query(all_splits[0].page_content) 
print('v1 len',len(vector_1))
vector_2 = embeddings.embed_query(all_splits[1].page_content) 
print('v2 len',len(vector_2))

assert len(vector_1) == len(vector_2) 
print(vector_1[:10])

v1 len 3072
v2 len 3072
[0.0028428142, -0.0102483705, 0.0021823694, -0.025355693, -0.010879941, -0.0044366363, 0.012312499, -0.02148518, -0.013332739, 0.010780239]


In [57]:
from langchain_core.vectorstores import InMemoryVectorStore

vector_store = InMemoryVectorStore(embeddings)

In [59]:
import time

def add_documents_with_rate_limit(vector_store, documents, batch_size=10, sleep_time=2):
    """
    Adds documents to the vector store in small batches to avoid hitting rate limits.
    """
    total_docs = len(documents)
    
    for i in range(0, total_docs, batch_size):
        # 1. Get a small slice of documents (e.g., 10 at a time)
        batch = documents[i : i + batch_size]
        
        # 2. Add this small batch
        print(f"Adding batch {i} to {i+batch_size}...")
        vector_store.add_documents(batch)
        
        # 3. Wait (sleep) for a few seconds before the next batch
        time.sleep(sleep_time)
        
    print("All documents added successfully!")

# --- HOW TO USE IT ---
# Replace your original line: 
# ids = vector_store.add_documents(documents=all_splits)


# With this line:
add_documents_with_rate_limit(vector_store, all_splits, batch_size=10, sleep_time=5)

Adding batch 0 to 10...
Adding batch 10 to 20...
Adding batch 20 to 30...
Adding batch 30 to 40...
Adding batch 40 to 50...
Adding batch 50 to 60...
Adding batch 60 to 70...
Adding batch 70 to 80...
Adding batch 80 to 90...
Adding batch 90 to 100...
Adding batch 100 to 110...
Adding batch 110 to 120...
Adding batch 120 to 130...
Adding batch 130 to 140...
Adding batch 140 to 150...
Adding batch 150 to 160...
Adding batch 160 to 170...
Adding batch 170 to 180...
Adding batch 180 to 190...
Adding batch 190 to 200...
Adding batch 200 to 210...
Adding batch 210 to 220...
Adding batch 220 to 230...
Adding batch 230 to 240...
Adding batch 240 to 250...
Adding batch 250 to 260...
Adding batch 260 to 270...
Adding batch 270 to 280...
Adding batch 280 to 290...
Adding batch 290 to 300...
Adding batch 300 to 310...
Adding batch 310 to 320...
Adding batch 320 to 330...
Adding batch 330 to 340...
Adding batch 340 to 350...
Adding batch 350 to 360...
Adding batch 360 to 370...
Adding batch 370 to 3

In [62]:
# vector_store.store

In [63]:
# Similarity Search from the agent 

results = vector_store.similarity_search( 
    'How many ditribution centers does Nike have in the Us?' 
) 
print(results[0])

page_content='operations. We also lease an office complex in Shanghai, China, our headquarters for our Greater China geography, occupied by employees focused on implementing our
wholesale, NIKE Direct and merchandising strategies in the region, among other functions.
In the United States, NIKE has eight significant distribution centers. Five are located in or near Memphis, Tennessee, two of which are owned and three of which are
leased. Two other distribution centers, one located in Indianapolis, Indiana and one located in Dayton, Tennessee, are leased and operated by third-party logistics
providers. One distribution center for Converse is located in Ontario, California, which is leased. NIKE has a number of distribution facilities outside the United States,
some of which are leased and operated by third-party logistics providers. The most significant distribution facilities outside the United States are located in Laakdal,' metadata={'producer': 'EDGRpdf Service w/ EO.Pdf 22.0.40.0', 

In [64]:
results = await vector_store.asimilarity_search('when was nike incorporated') 
print(results)

[Document(id='c62207f4-295c-478f-9168-d2d8c8988903', metadata={'producer': 'EDGRpdf Service w/ EO.Pdf 22.0.40.0', 'creator': 'EDGAR Filing HTML Converter', 'creationdate': '2023-07-20T16:22:00-04:00', 'title': '0000320187-23-000039', 'author': 'EDGAR Online, a division of Donnelley Financial Solutions', 'subject': 'Form 10-K filed on 2023-07-20 for the period ending 2023-05-31', 'keywords': '0000320187-23-000039; ; 10-K', 'moddate': '2023-07-20T16:22:08-04:00', 'source': '../example_data/nke-10k-2023.pdf', 'total_pages': 107, 'page': 3, 'page_label': '4', 'start_index': 0}, page_content='Table of Contents\nPART I\nITEM 1. BUSINESS\nGENERAL\nNIKE, Inc. was incorporated in 1967 under the laws of the State of Oregon. As used in this Annual Report on Form 10-K (this "Annual Report"), the terms "we," "us," "our,"\n"NIKE" and the "Company" refer to NIKE, Inc. and its predecessors, subsidiaries and affiliates, collectively, unless the context indicates otherwise.\nOur principal business activ

In [65]:
results = vector_store.similarity_search_with_score("What was Nike's revenue in 2023 ?") 

doc,score = results[0] 
print('score :',score) 
print(doc)

score : 0.7706123611043615
page_content='Table of Contents
FISCAL 2023 NIKE BRAND REVENUE HIGHLIGHTSThe following tables present NIKE Brand revenues disaggregated by reportable operating segment, distribution channel and major product line:
FISCAL 2023 COMPARED TO FISCAL 2022
• NIKE, Inc. Revenues were $51.2 billion in fiscal 2023, which increased 10% and 16% compared to fiscal 2022 on a reported and currency-neutral basis, respectively.
The increase was due to higher revenues in North America, Europe, Middle East & Africa ("EMEA"), APLA and Greater China, which contributed approximately 7, 6,
2 and 1 percentage points to NIKE, Inc. Revenues, respectively.
• NIKE Brand revenues, which represented over 90% of NIKE, Inc. Revenues, increased 10% and 16% on a reported and currency-neutral basis, respectively. This
increase was primarily due to higher revenues in Men's, the Jordan Brand, Women's and Kids' which grew 17%, 35%,11% and 10%, respectively, on a wholesale
equivalent basis.' metad

In [66]:
embedding = embeddings.embed_query("How were Nike's margins impacted in 2023?") 

results = vector_store.similarity_search_by_vector(embedding) 
print(results[0])

page_content='Table of Contents
GROSS MARGIN
FISCAL 2023 COMPARED TO FISCAL 2022
For fiscal 2023, our consolidated gross profit increased 4% to $22,292 million compared to $21,479 million for fiscal 2022. Gross margin decreased 250 basis points to
43.5% for fiscal 2023 compared to 46.0% for fiscal 2022 due to the following:
*Wholesale equivalent
The decrease in gross margin for fiscal 2023 was primarily due to:
• Higher NIKE Brand product costs, on a wholesale equivalent basis, primarily due to higher input costs and elevated inbound freight and logistics costs as well as
product mix;
• Lower margin in our NIKE Direct business, driven by higher promotional activity to liquidate inventory in the current period compared to lower promotional activity in
the prior period resulting from lower available inventory supply;
• Unfavorable changes in net foreign currency exchange rates, including hedges; and
• Lower off-price margin, on a wholesale equivalent basis.
This was partially offset by:'

In [67]:
from typing import List 

from langchain_core.documents import Document 
from langchain_core.runnables import chain 

@chain 
def retriever(query:str) -> List[Document]: 
    return vector_store.similarity_search(query,k=1) 

retriever.batch( 
    [
        "How many distribution centers does Nike have in US?",
        "When was Nike incorporated?"
    ],
) 

[[Document(id='e8bdb1dc-f8c3-48dd-8644-e1a34bac30bc', metadata={'producer': 'EDGRpdf Service w/ EO.Pdf 22.0.40.0', 'creator': 'EDGAR Filing HTML Converter', 'creationdate': '2023-07-20T16:22:00-04:00', 'title': '0000320187-23-000039', 'author': 'EDGAR Online, a division of Donnelley Financial Solutions', 'subject': 'Form 10-K filed on 2023-07-20 for the period ending 2023-05-31', 'keywords': '0000320187-23-000039; ; 10-K', 'moddate': '2023-07-20T16:22:08-04:00', 'source': '../example_data/nke-10k-2023.pdf', 'total_pages': 107, 'page': 26, 'page_label': '27', 'start_index': 804}, page_content='operations. We also lease an office complex in Shanghai, China, our headquarters for our Greater China geography, occupied by employees focused on implementing our\nwholesale, NIKE Direct and merchandising strategies in the region, among other functions.\nIn the United States, NIKE has eight significant distribution centers. Five are located in or near Memphis, Tennessee, two of which are owned an

In [70]:
# We can replicate the above code with the following code 

retriever = vector_store.as_retriever( 
    search_type='similarity',
    search_kwargs={'k':1},
)

retriever.batch(
     [
        "How many distribution centers does Nike have in US?",
        "When was Nike incorporated?"
     ],
) 

[[Document(id='e8bdb1dc-f8c3-48dd-8644-e1a34bac30bc', metadata={'producer': 'EDGRpdf Service w/ EO.Pdf 22.0.40.0', 'creator': 'EDGAR Filing HTML Converter', 'creationdate': '2023-07-20T16:22:00-04:00', 'title': '0000320187-23-000039', 'author': 'EDGAR Online, a division of Donnelley Financial Solutions', 'subject': 'Form 10-K filed on 2023-07-20 for the period ending 2023-05-31', 'keywords': '0000320187-23-000039; ; 10-K', 'moddate': '2023-07-20T16:22:08-04:00', 'source': '../example_data/nke-10k-2023.pdf', 'total_pages': 107, 'page': 26, 'page_label': '27', 'start_index': 804}, page_content='operations. We also lease an office complex in Shanghai, China, our headquarters for our Greater China geography, occupied by employees focused on implementing our\nwholesale, NIKE Direct and merchandising strategies in the region, among other functions.\nIn the United States, NIKE has eight significant distribution centers. Five are located in or near Memphis, Tennessee, two of which are owned an