# 准备数据

In [24]:
from langchain_qdrant import QdrantVectorStore
from qdrant_client import QdrantClient
# from qdrant_client.http.models import VectorParams, Distance
from qdrant_client.models import VectorParams, Distance

from langchain_ollama import OllamaEmbeddings

from langchain_community.document_loaders import CSVLoader

embeddings = OllamaEmbeddings(model="nomic-embed-text", base_url="http://localhost:11434")

# client = QdrantClient(":memory:")
client = QdrantClient("http://localhost:6333")
collection_name = "mlb_teams_2012"
if not client.collection_exists(collection_name):
    client.create_collection(collection_name, vectors_config=VectorParams(size=768, distance=Distance.COSINE))
vector_store = QdrantVectorStore(
    client=client,
    collection_name=collection_name,
    embedding=embeddings,
)


def load_data():
    file_path = "./data/mlb_teams_2012.csv"
    loader = CSVLoader(file_path,
                       source_column="Team",
                       metadata_columns=("Team", "Payroll (millions)", "Wins"),
                       content_columns=("Team", "Payroll (millions)", "Wins"),
                       )
    return loader.load()


documents = load_data()
ids = vector_store.add_documents(documents)
print(ids)

['67c98d0ff09f4c2cba3035333e5fbb25', 'e60cd782555f41b698e82fb7262345ad', 'e4341965dba440468a4484ff84d2bc52', '1b5a340cd07b46f983c2d354d0a1ba1f', '354c63578dad4c188e60c84febda2eeb', 'a890e2877b4d465e8a3bb533a4cc601f', '79019492743643b883ea4ea8acffd8b9', '9b072fa8cd724e3aac20f37247d1ef26', '93b72feab61e4f1fa5bbc8fe26352ace', '1361766cce5b4387a03bb61a162ba80a', '490e7e35c05748c5b7c85c7112361f5b', '14ab7a6387524922ba95d953df7ebead', 'f52411b3bb5540aeacd9a01391384510', 'b640b9fe71fe4e6686163250ca7884bf', '962faeafe8554dfe93c21ed4126d7ae8', '54cace1e1b0b4a18a54e989b8657cf95', 'f2f80a2a43b54a3cbbe36d7e12816854', 'e3e37bba76984611a7fef4f5966882b7', '2fa17794ef99414da2e30d7b83a40deb', 'ac497c946ff84d7da7a9846e49253cf2', '16e0e6232d384cab901862b96bea1ead', 'a1b09c8952fd4b87aa93125fc5afff6e', '5106ff1fce0c4ff4be5f8e23bc7e8f90', '375aee5157fa4e0cb97ce5026965ebd6', '8653266a1b654c0b8844c29eab0420b7', '571a1a2b0c8c4c26b7e110fd451b00b5', '23610d43fdf1478eb2fe6132f092c670', '033cea96fd1e41758bc600e3c8

In [36]:
from qdrant_client.models import Filter, FieldCondition, MatchValue

res = vector_store.similarity_search("",
                                     k=100,
                                     filter=Filter(
                                         must=[FieldCondition(key="metadata.Team", match=MatchValue(value="Rockies"))],
                                     )
                                     )
print(len(res), res, sep="\n")

1
[Document(metadata={'Payroll (millions)': '       78.06', 'Team': 'Rockies', 'Wins': ' 64', 'row': 27, 'source': 'Rockies', '_id': '033cea96-fd1e-4175-8bc6-00e3c82a9a04', '_collection_name': 'mlb_teams_2012'}, page_content='Team: Rockies\nPayroll (millions): 78.06\nWins: 64')]


In [46]:
retriever = vector_store.as_retriever(search_type="similarity", search_kwargs={
    "k": 100,
    "filter": Filter(
        must=[FieldCondition(key="metadata.Team", match=MatchValue(value="Rockies"))],
    )
})
res = retriever.invoke("")
print(len(res), res, sep="\n")
print(retriever.search_kwargs)

1
[Document(metadata={'Payroll (millions)': '       78.06', 'Team': 'Rockies', 'Wins': ' 64', 'row': 27, 'source': 'Rockies', '_id': '033cea96-fd1e-4175-8bc6-00e3c82a9a04', '_collection_name': 'mlb_teams_2012'}, page_content='Team: Rockies\nPayroll (millions): 78.06\nWins: 64')]
{'k': 100, 'filter': Filter(should=None, min_should=None, must=[FieldCondition(key='metadata.Team', match=MatchValue(value='Rockies'), range=None, geo_bounding_box=None, geo_radius=None, geo_polygon=None, values_count=None)], must_not=None)}


In [53]:
retriever = vector_store.as_retriever()

retriever.search_kwargs={
    "k": 100,
    "filter": Filter(
        must=[FieldCondition(key="metadata.Team", match=MatchValue(value="Rockies"))],
    )
}

# retriever2 = retriever.bind(search_kwargs={"k":1})
# print(retriever2.search_kwargs)


res = retriever.invoke("")
print(len(res), res, sep="\n")

1
[Document(metadata={'Payroll (millions)': '       78.06', 'Team': 'Rockies', 'Wins': ' 64', 'row': 27, 'source': 'Rockies', '_id': '033cea96-fd1e-4175-8bc6-00e3c82a9a04', '_collection_name': 'mlb_teams_2012'}, page_content='Team: Rockies\nPayroll (millions): 78.06\nWins: 64')]
