In [None]:
import getpass
import os

try:
    # load environment variables from .env file (requires `python-dotenv`)
    from dotenv import load_dotenv

    load_dotenv()
except ImportError:
    pass

os.environ["LANGSMITH_TRACING"] = "true"
if "LANGSMITH_API_KEY" not in os.environ:
    os.environ["LANGSMITH_API_KEY"] = getpass.getpass(
        prompt="Enter your LangSmith API key (optional): "
    )
if "LANGSMITH_PROJECT" not in os.environ:
    os.environ["LANGSMITH_PROJECT"] = getpass.getpass(
        prompt='Enter your LangSmith Project Name (default = "default"): '
    )
    if not os.environ.get("LANGSMITH_PROJECT"):
        os.environ["LANGSMITH_PROJECT"] = "default"
if "OPENAI_API_KEY" not in os.environ:
    os.environ["OPENAI_API_KEY"] = getpass.getpass(
        prompt="Enter your OpenAI API key (required if using OpenAI): "
    )
print(os.environ["OPENAI_API_KEY"])

base_url = os.environ.get("BASE_URL")
model_name = os.environ.get("MODEL_NAME")

print(base_url, model_name)
print("langsmith api key", os.environ["LANGSMITH_API_KEY"])

In [2]:
if not os.environ.get("OPENAI_API_KEY"):
  os.environ["OPENAI_API_KEY"] = getpass.getpass("Enter API key for OpenAI: ")

from langchain.chat_models import init_chat_model

model = init_chat_model(model_name, model_provider="openai", base_url=base_url)

In [3]:
# 文档
from langchain_core.documents import Document

documents = [
    Document(
        page_content="Dogs are great companions, known for their loyalty and friendliness.",
        metadata={"source": "mammal-pets-doc"},
    ),
    Document(
        page_content="Cats are independent pets that often enjoy their own space.",
        metadata={"source": "mammal-pets-doc"},
    ),
]

In [4]:
# 通过文件加载文档
from langchain_community.document_loaders import PyPDFLoader

file_path = "../files/nke-10k-2023.pdf"
loader = PyPDFLoader(file_path)

docs = loader.load()

print(len(docs))

107


In [5]:
# 分割文档
from langchain_text_splitters import RecursiveCharacterTextSplitter

text_splitter = RecursiveCharacterTextSplitter(
    chunk_size=1000, chunk_overlap=200, add_start_index=True
)
all_splits = text_splitter.split_documents(docs)

len(all_splits)

516

In [24]:
# 向量化处理
import os
from openai import OpenAI

client = OpenAI(
    api_key=os.getenv("OPENAI_API_KEY"),  # 如果您没有配置环境变量，请在此处用您的API Key进行替换
    base_url=base_url  # 百炼服务的base_url
)

completion = client.embeddings.create(
    model="text-embedding-v3",
    input='The clothes are of good quality and look good, definitely worth the wait. I love them.',
    dimensions=1024,
    encoding_format="float"
)

print(completion.model_dump_json())

{"data":[{"embedding":[-0.0575992688536644,-0.017321519553661346,-0.10115918517112732,0.037109408527612686,-0.025251852348446846,-0.000966983730904758,-0.004726422019302845,0.06256995350122452,-0.03327704593539238,0.031019367277622223,0.016714412719011307,0.06879279762506485,-0.0160029586404562,0.012028305791318417,0.021969681605696678,0.022880341857671738,0.032271526753902435,-0.05046576261520386,-0.021855847910046577,-0.03308732435107231,-0.037773434072732925,0.007825987413525581,-0.0286858007311821,0.004071884788572788,-0.0026560926344245672,0.05004837363958359,-0.039120450615882874,-0.04439469054341316,0.03762165457010269,-0.0073327128775417805,-0.027604391798377037,0.08704394847154617,0.07281488180160522,-0.03636949881911278,-0.0035169513430446386,-0.03528808802366257,-0.01865905150771141,0.02692139707505703,-0.062228456139564514,0.025327740237116814,-0.0239617507904768,-0.016325483098626137,0.002095230156555772,0.003111422760412097,-0.010055207647383213,-0.05486728623509407,-0.04

In [35]:
from langchain_community.embeddings import DashScopeEmbeddings

# 使用阿里的方法，OPENAI 兼容模式有问题
embeddings = DashScopeEmbeddings(
    model="text-embedding-v3", dashscope_api_key=os.getenv("OPENAI_API_KEY")
)
vector_1 = embeddings.embed_query(all_splits[0].page_content)
vector_2 = embeddings.embed_query(all_splits[1].page_content)

assert len(vector_1) == len(vector_2)
print(f"Generated vectors of length {len(vector_1)}\n")
print(vector_1[:10])

Generated vectors of length 1024

[-0.05425441637635231, -0.016869427636265755, -0.06312180310487747, -0.0004044165834784508, -0.014613688923418522, -0.02983020432293415, 0.025027036666870117, 0.06109941750764847, 0.024365872144699097, 0.07712294161319733]


In [36]:
from langchain_core.vectorstores import InMemoryVectorStore

vector_store = InMemoryVectorStore(embeddings)

In [37]:
ids = vector_store.add_documents(documents=all_splits)

In [38]:
ids

['9948365d-4dca-4aac-9ad4-ad25fc8ae05d',
 '44056d62-c222-42fa-ad73-a8273b3d1754',
 '59e70484-0bfb-4cce-b1ef-9b8b301bc6c1',
 '907ebfdc-2d92-4368-a7de-e001f86bc3f6',
 '0d8288d6-badc-4684-8929-a436fa5e341a',
 'cc4afd85-407e-496c-b11a-423efb3a62ec',
 '6f201767-5031-442d-bd2b-c96d3f4b234a',
 '26727f12-1db3-4dc3-80bb-c0e5da6441a9',
 '63016ccf-c0fa-4467-815d-3522ba35bd66',
 '31ac80be-c7e5-4e43-b032-904653afda6d',
 '30cfe017-782f-48e0-b2ab-aab45bb84b2d',
 'a8c52968-7e2e-456b-85e4-58870854a476',
 '73863b3c-eb44-4d32-b6af-b560231296aa',
 '862faf61-8e13-4302-aa8a-3a1c934dd170',
 '938ef75a-f750-4aea-98ac-9186993e5aa2',
 'c3cda120-59e6-46c2-9b06-6c7dac74e30b',
 '23b52fdc-7619-4dc3-8d5d-80c07675cc96',
 'bd92cd61-9660-4f0f-8abb-52de3fb536af',
 'ade0cbe8-ad4d-410c-80d7-4c11d251ebca',
 'dfaf5e9f-349b-4f47-b3c3-792f5b84edd0',
 'f8352a7c-278c-4a9d-a231-e19e9aeaa93d',
 '8cc08828-617a-4d08-a123-c616e6a8b8c5',
 '438df7a7-7f58-4a38-813c-b9a937a00790',
 '3e55f9c3-1e2b-4bd2-8c81-b1db28d7f8a2',
 'a4ba0614-36a7-

In [39]:
results = vector_store.similarity_search(
    "How many distribution centers does Nike have in the US?"
)

print(results[0])

page_content='direct to consumer operations sell products through the following number of retail stores in the United States:
U.S. RETAIL STORES NUMBER
NIKE Brand factory stores 213 
NIKE Brand in-line stores (including employee-only stores) 74 
Converse stores (including factory stores) 82 
TOTAL 369 
In the United States, NIKE has eight significant distribution centers. Refer to Item 2. Properties for further information.
2023 FORM 10-K 2' metadata={'producer': 'EDGRpdf Service w/ EO.Pdf 22.0.40.0', 'creator': 'EDGAR Filing HTML Converter', 'creationdate': '2023-07-20T16:22:00-04:00', 'title': '0000320187-23-000039', 'author': 'EDGAR Online, a division of Donnelley Financial Solutions', 'subject': 'Form 10-K filed on 2023-07-20 for the period ending 2023-05-31', 'keywords': '0000320187-23-000039; ; 10-K', 'moddate': '2023-07-20T16:22:08-04:00', 'source': '../files/nke-10k-2023.pdf', 'total_pages': 107, 'page': 4, 'page_label': '5', 'start_index': 3125}


In [40]:
results

[Document(id='dfaf5e9f-349b-4f47-b3c3-792f5b84edd0', metadata={'producer': 'EDGRpdf Service w/ EO.Pdf 22.0.40.0', 'creator': 'EDGAR Filing HTML Converter', 'creationdate': '2023-07-20T16:22:00-04:00', 'title': '0000320187-23-000039', 'author': 'EDGAR Online, a division of Donnelley Financial Solutions', 'subject': 'Form 10-K filed on 2023-07-20 for the period ending 2023-05-31', 'keywords': '0000320187-23-000039; ; 10-K', 'moddate': '2023-07-20T16:22:08-04:00', 'source': '../files/nke-10k-2023.pdf', 'total_pages': 107, 'page': 4, 'page_label': '5', 'start_index': 3125}, page_content='direct to consumer operations sell products through the following number of retail stores in the United States:\nU.S. RETAIL STORES NUMBER\nNIKE Brand factory stores 213 \nNIKE Brand in-line stores (including employee-only stores) 74 \nConverse stores (including factory stores) 82 \nTOTAL 369 \nIn the United States, NIKE has eight significant distribution centers. Refer to Item 2. Properties for further in

In [41]:
results = await vector_store.asimilarity_search("When was Nike incorporated?")

results

[Document(id='31ac80be-c7e5-4e43-b032-904653afda6d', metadata={'producer': 'EDGRpdf Service w/ EO.Pdf 22.0.40.0', 'creator': 'EDGAR Filing HTML Converter', 'creationdate': '2023-07-20T16:22:00-04:00', 'title': '0000320187-23-000039', 'author': 'EDGAR Online, a division of Donnelley Financial Solutions', 'subject': 'Form 10-K filed on 2023-07-20 for the period ending 2023-05-31', 'keywords': '0000320187-23-000039; ; 10-K', 'moddate': '2023-07-20T16:22:08-04:00', 'source': '../files/nke-10k-2023.pdf', 'total_pages': 107, 'page': 3, 'page_label': '4', 'start_index': 0}, page_content='Table of Contents\nPART I\nITEM 1. BUSINESS\nGENERAL\nNIKE, Inc. was incorporated in 1967 under the laws of the State of Oregon. As used in this Annual Report on Form 10-K (this "Annual Report"), the terms "we," "us," "our,"\n"NIKE" and the "Company" refer to NIKE, Inc. and its predecessors, subsidiaries and affiliates, collectively, unless the context indicates otherwise.\nOur principal business activity is 

In [42]:
# Note that providers implement different scores; the score here
# is a distance metric that varies inversely with similarity.

results = vector_store.similarity_search_with_score("What was Nike's revenue in 2023?")
doc, score = results[0]
print(f"Score: {score}\n")
print(doc)

Score: 0.7591022307649886

page_content='Table of Contents
FISCAL 2023 NIKE BRAND REVENUE HIGHLIGHTSThe following tables present NIKE Brand revenues disaggregated by reportable operating segment, distribution channel and major product line:
FISCAL 2023 COMPARED TO FISCAL 2022
• NIKE, Inc. Revenues were $51.2 billion in fiscal 2023, which increased 10% and 16% compared to fiscal 2022 on a reported and currency-neutral basis, respectively.
The increase was due to higher revenues in North America, Europe, Middle East & Africa ("EMEA"), APLA and Greater China, which contributed approximately 7, 6,
2 and 1 percentage points to NIKE, Inc. Revenues, respectively.
• NIKE Brand revenues, which represented over 90% of NIKE, Inc. Revenues, increased 10% and 16% on a reported and currency-neutral basis, respectively. This
increase was primarily due to higher revenues in Men's, the Jordan Brand, Women's and Kids' which grew 17%, 35%,11% and 10%, respectively, on a wholesale
equivalent basis.' metad

In [43]:
# 使用 embedded query
embedding = embeddings.embed_query("How were Nike's margins impacted in 2023?")

results = vector_store.similarity_search_by_vector(embedding)
print(results[0])

page_content='Table of Contents
GROSS MARGIN
FISCAL 2023 COMPARED TO FISCAL 2022
For fiscal 2023, our consolidated gross profit increased 4% to $22,292 million compared to $21,479 million for fiscal 2022. Gross margin decreased 250 basis points to
43.5% for fiscal 2023 compared to 46.0% for fiscal 2022 due to the following:
*Wholesale equivalent
The decrease in gross margin for fiscal 2023 was primarily due to:
• Higher NIKE Brand product costs, on a wholesale equivalent basis, primarily due to higher input costs and elevated inbound freight and logistics costs as well as
product mix;
• Lower margin in our NIKE Direct business, driven by higher promotional activity to liquidate inventory in the current period compared to lower promotional activity in
the prior period resulting from lower available inventory supply;
• Unfavorable changes in net foreign currency exchange rates, including hedges; and
• Lower off-price margin, on a wholesale equivalent basis.
This was partially offset by:'

In [44]:
# Retrievers
from typing import List

from langchain_core.documents import Document
from langchain_core.runnables import chain


@chain
def retriever(query: str) -> List[Document]:
    return vector_store.similarity_search(query, k=1)


retriever.batch(
    [
        "How many distribution centers does Nike have in the US?",
        "When was Nike incorporated?",
    ],
)

[[Document(id='dfaf5e9f-349b-4f47-b3c3-792f5b84edd0', metadata={'producer': 'EDGRpdf Service w/ EO.Pdf 22.0.40.0', 'creator': 'EDGAR Filing HTML Converter', 'creationdate': '2023-07-20T16:22:00-04:00', 'title': '0000320187-23-000039', 'author': 'EDGAR Online, a division of Donnelley Financial Solutions', 'subject': 'Form 10-K filed on 2023-07-20 for the period ending 2023-05-31', 'keywords': '0000320187-23-000039; ; 10-K', 'moddate': '2023-07-20T16:22:08-04:00', 'source': '../files/nke-10k-2023.pdf', 'total_pages': 107, 'page': 4, 'page_label': '5', 'start_index': 3125}, page_content='direct to consumer operations sell products through the following number of retail stores in the United States:\nU.S. RETAIL STORES NUMBER\nNIKE Brand factory stores 213 \nNIKE Brand in-line stores (including employee-only stores) 74 \nConverse stores (including factory stores) 82 \nTOTAL 369 \nIn the United States, NIKE has eight significant distribution centers. Refer to Item 2. Properties for further i

In [45]:
retriever = vector_store.as_retriever(
    search_type="similarity",
    search_kwargs={"k": 1},
)

retriever.batch(
    [
        "How many distribution centers does Nike have in the US?",
        "When was Nike incorporated?",
    ],
)

[[Document(id='dfaf5e9f-349b-4f47-b3c3-792f5b84edd0', metadata={'producer': 'EDGRpdf Service w/ EO.Pdf 22.0.40.0', 'creator': 'EDGAR Filing HTML Converter', 'creationdate': '2023-07-20T16:22:00-04:00', 'title': '0000320187-23-000039', 'author': 'EDGAR Online, a division of Donnelley Financial Solutions', 'subject': 'Form 10-K filed on 2023-07-20 for the period ending 2023-05-31', 'keywords': '0000320187-23-000039; ; 10-K', 'moddate': '2023-07-20T16:22:08-04:00', 'source': '../files/nke-10k-2023.pdf', 'total_pages': 107, 'page': 4, 'page_label': '5', 'start_index': 3125}, page_content='direct to consumer operations sell products through the following number of retail stores in the United States:\nU.S. RETAIL STORES NUMBER\nNIKE Brand factory stores 213 \nNIKE Brand in-line stores (including employee-only stores) 74 \nConverse stores (including factory stores) 82 \nTOTAL 369 \nIn the United States, NIKE has eight significant distribution centers. Refer to Item 2. Properties for further i