Resources used:
- https://python.langchain.com/v0.1/docs/integrations/vectorstores/milvus/
- https://milvus.io/docs/integrate_with_langchain.md

In [1]:
import os
from dotenv import load_dotenv

load_dotenv()

#OPENAI_API_KEY = os.getenv("OPENAPI_KEY") # (Optional), if OpenAI Model is used

MODEL = "mistral:latest" # Name of the model used by Ollama
EMBEDDING_MODEL = "nomic-embed-text"
COLLECTION_NAME = 'scouting' # Name of the Collection to be created
DIMENSION = 768 # Dimension of the embeddings

URI = 'http://localhost:19530' # Connection parameters for the Milvus Server

In [2]:
from langchain_community.llms import Ollama
from langchain_community.embeddings import OllamaEmbeddings

model = Ollama(model=MODEL)
embeddings = OllamaEmbeddings(model=EMBEDDING_MODEL)

In [23]:
# Configure the prompt template that is used to ask the LLM

from langchain_core.prompts import PromptTemplate

PROMPT_TEMPLATE = """
Human: You are an AI assistant in football (soccer) scouting, and provides answers to questions by using fact based and statistical information when possible.
Use the following pieces of information to provide a concise answer to the question enclosed in <question> tags.
If you don't know the answer from the context, just say that you don't know, don't try to make up an answer.

<context>
{context}
</context>

<question>
{question}
</question>

The response should be specific and use statistics or numbers when possible.
The structure the response should be that you rank the players based on their reports and provide a short summary of the reports from the context.

Assistant:"""

prompt = PromptTemplate(
    template=PROMPT_TEMPLATE, input_variables=["context", "question"]
)
print(prompt.format(context="Here is some context", question="Here is a question"))

In [4]:
# Use Milvus as Vectorstore

from langchain_community.vectorstores import Milvus

connection_args = {'uri': URI }

vectorstore = Milvus(
    embedding_function=embeddings,
    connection_args=connection_args,
    collection_name=COLLECTION_NAME,
    vector_field="embeddings",
    primary_field="id",
    auto_id=True
)


In [5]:
# Convert the vector store to a retriever
# k:2 --> Limit to 2 documents
retriever = vectorstore.as_retriever(search_kwargs={'k': 2})
# Define a function to format the retrieved documents
def format_docs(docs):
    return "\n\n".join(doc.page_content for doc in docs)

In [19]:
# Convert the vector store to a retriever
# k:2 --> Limit to 2 documents
retriever = vectorstore.as_retriever(search_kwargs={'k': 2})
# Define a function to format the retrieved documents
def format_docs(docs):
    returnString = "\n\n".join(f"Player ID: {doc.metadata['player_transfermarkt_id']}, Report-Content: " + doc.page_content for doc in docs)
    print(returnString)
    return returnString

In [24]:
from langchain_core.runnables import RunnablePassthrough
from langchain_core.output_parsers import StrOutputParser

rag_chain = (
    {"context": retriever | format_docs, "question": RunnablePassthrough()}
    | prompt
    | model
    | StrOutputParser()
)

# rag_chain.get_graph().print_ascii()

# Invoke the RAG chain with a specific question and retrieve the response
query = "I need a right-back who can play consistently throughout the game. He should also be good going forward"
res = rag_chain.invoke(query)
res

In [16]:
from langchain_core.documents.base import Document

def extract_metadata(doc: Document) -> dict:
    return doc.metadata

In [17]:
# How to work with meta data from our query
retrived_documents = retriever.invoke(query)
retrived_documents

In [18]:
metadata = extract_metadata(retrived_documents[0])
metadata

In [19]:
# Transfermarkt Link
print(f"Transfermarkt.com Link: https://www.transfermarkt.com/player-name/profil/spieler/{metadata['player_transfermarkt_id']}")

In [20]:
from langchain_core.documents.base import Document

def print_texts(doc: Document):
    print(doc.page_content)

In [21]:
# get original texts
print_texts(retrived_documents[0])

In [22]:
irrelevant_query = "Ich will einen Kuchen backen. Welche Rezepte kannst du mir vorschlagen?"
res = rag_chain.invoke(irrelevant_query)
res

In [23]:
# Demonstrate retrival based on metadata
other_retriver = vectorstore.as_retriever(search_kwargs={"expr": 'scout_id == "3456"'})
expr_res = other_retriver.invoke(query)
expr_res