# 准备数据

In [5]:
from langchain_qdrant import QdrantVectorStore
from qdrant_client import QdrantClient
# from qdrant_client.http.models import VectorParams, Distance
from qdrant_client.models import VectorParams, Distance

from langchain_ollama import OllamaEmbeddings

from langchain_community.document_loaders import CSVLoader

embeddings = OllamaEmbeddings(model="nomic-embed-text", base_url="http://localhost:11434")

# client = QdrantClient(":memory:")
client = QdrantClient("http://localhost:6333")
collection_name = "mlb_teams_2012"
if not client.collection_exists(collection_name):
    client.create_collection(collection_name, vectors_config=VectorParams(size=768, distance=Distance.COSINE))
vector_store = QdrantVectorStore(
    client=client,
    collection_name=collection_name,
    embedding=embeddings,
)


def load_data():
    file_path = "./data/mlb_teams_2012.csv"
    loader = CSVLoader(file_path,
                       source_column="Team",
                       metadata_columns=("Team", "Payroll (millions)", "Wins"),
                       content_columns=("Team", "Payroll (millions)", "Wins"),
                       )
    return loader.load()


documents = load_data()
ids = vector_store.add_documents(documents)
print(ids)

['87a0cb5ee364426da1517674a3cc5942', '887f7674220e4e1a9c1d330f10060052', 'a39d82f9ca2b41739b76b2d6bc9eeba6', 'ab8afe7b10424d638c4ee16811c4760d', 'a3e8f42e9105428c80c148c579a6a2e8', 'f6210a6095ac48788e97cced2ca75cc3', '215695f171344732991e406ddbdfcac8', '65233e29927d4eeca594bf256e123037', '66e391ae8cdf47ba8326b03293d1f314', '7cea7031e12844c2ac9e16cbc8e87316', 'de3d064d337446a09962120455546d32', '8b6ecd9b091e46ce9d5a2602d933a8c1', '291a39d654fa4a0895217554a1381174', 'e462bcaa1d474750921cb826bcd5405a', 'd43e6fb687144fe9861c9ea4699a53f6', '45e81736b2a3466282e4a6d28d69d168', 'd6a03e1247a64fefaab905313440fcd5', 'd98a3dbddcd749748dc213d442c24738', 'a4de0700282d44ca9fbd41c7069ffd23', '11dc9411bae74e83944d99488c6a4def', '2f42b7c7cd584603bf96c050fd743ff5', '0ca975f1f74b4cd5875c07114b3260c7', '01b3584adee7444ba8d094909c613466', '58de0906f880494aad3eb6c13b3663cc', '134c81a2cecf4884b0b91716dcdf4f89', '7d8aec3ecd2a41a285b2e048ced32666', 'd0cd95a30c3244c1ba377cd062dd23e8', '9ba240d67a2a40fbaf80de4dd2

In [9]:
from qdrant_client.models import Filter, FieldCondition, MatchValue

client = QdrantClient("http://localhost:6333")
collection_name = "mlb_teams_2012"
if not client.collection_exists(collection_name):
    client.create_collection(collection_name, vectors_config=VectorParams(size=768, distance=Distance.COSINE))
vector_store = QdrantVectorStore(
    client=client,
    collection_name=collection_name,
    embedding=embeddings,
)

res = vector_store.similarity_search("",
                                     k=5,
                                     filter=Filter(
                                         must=[FieldCondition(key="metadata.Team", match=MatchValue(value="Rockies"))],
                                     )
                                     )
print(len(res), res, sep="\n")
res = vector_store.similarity_search_with_score("",
                                     k=5,
                                     filter=Filter(
                                         must=[FieldCondition(key="metadata.Team", match=MatchValue(value="Rockies"))],
                                     )
                                     )
print(len(res), res, sep="\n")

1
[Document(metadata={'source': 'Rockies', 'row': 27, 'Team': 'Rockies', 'Payroll (millions)': '       78.06', 'Wins': ' 64', '_id': '9ba240d6-7a2a-40fb-af80-de4dd28fb051', '_collection_name': 'mlb_teams_2012'}, page_content='Team: Rockies\nPayroll (millions): 78.06\nWins: 64')]
1
[(Document(metadata={'source': 'Rockies', 'row': 27, 'Team': 'Rockies', 'Payroll (millions)': '       78.06', 'Wins': ' 64', '_id': '9ba240d6-7a2a-40fb-af80-de4dd28fb051', '_collection_name': 'mlb_teams_2012'}, page_content='Team: Rockies\nPayroll (millions): 78.06\nWins: 64'), 0.55020905)]


In [10]:
retriever = vector_store.as_retriever(search_type="similarity", search_kwargs={
    "k": 100,
    "filter": Filter(
        must=[FieldCondition(key="metadata.Team", match=MatchValue(value="Rockies"))],
    )
})
res = retriever.invoke("")
print(len(res), res, sep="\n")
print(retriever.search_kwargs)

1
[Document(metadata={'source': 'Rockies', 'row': 27, 'Team': 'Rockies', 'Payroll (millions)': '       78.06', 'Wins': ' 64', '_id': '9ba240d6-7a2a-40fb-af80-de4dd28fb051', '_collection_name': 'mlb_teams_2012'}, page_content='Team: Rockies\nPayroll (millions): 78.06\nWins: 64')]
{'k': 100, 'filter': Filter(should=None, min_should=None, must=[FieldCondition(key='metadata.Team', match=MatchValue(value='Rockies'), range=None, geo_bounding_box=None, geo_radius=None, geo_polygon=None, values_count=None)], must_not=None)}


In [13]:
retriever = vector_store.as_retriever()

retriever.search_kwargs={
    "k": 100,
    "filter": Filter(
        must=[FieldCondition(key="metadata.Team", match=MatchValue(value="Rockies"))],
    )
}

retriever2 = retriever.bind(search_kwargs={"k":1})
print(retriever2.search_kwargs)



res = retriever.invoke("")
print(len(res), res, sep="\n")

{'k': 100, 'filter': Filter(should=None, min_should=None, must=[FieldCondition(key='metadata.Team', match=MatchValue(value='Rockies'), range=None, geo_bounding_box=None, geo_radius=None, geo_polygon=None, values_count=None)], must_not=None)}
1
[Document(metadata={'source': 'Rockies', 'row': 27, 'Team': 'Rockies', 'Payroll (millions)': '       78.06', 'Wins': ' 64', '_id': '9ba240d6-7a2a-40fb-af80-de4dd28fb051', '_collection_name': 'mlb_teams_2012'}, page_content='Team: Rockies\nPayroll (millions): 78.06\nWins: 64')]
