In [4]:
import os
from dotenv import load_dotenv

#load_dotenv()

#OPENAI_API_KEY = os.getenv("OPENAI_API_KEY") # (Optional), if OpenAI Model is used

MODEL = "mistral:latest" # Name of the model used by Ollama

COLLECTION_NAME = 'doc_qa_db' # Name of the Collection to be created

DIMENSION = 768 # Dimension of the embeddings

URI = 'http://localhost:19530' # Connection parameters for the Milvus Server

In [3]:
# Import LLM and Embeddings

from langchain_community.llms import Ollama
from langchain_community.embeddings import OllamaEmbeddings

model = Ollama(model=MODEL)
# print(model.invoke("Who is the fastest football player in the world?"))
embeddings = OllamaEmbeddings(model="mistral:latest")

 Determining the fastest footballer in the world can be challenging due to the lack of official speed tests during matches. However, one of the players often cited as one of the quickest is former English and Manchester City winger, Jacob Söderberg Bergström, known professionally as Jacob Bergstrom or simply Jacob. He set a world record for sprinting over 100 meters with a football in 2014, completing it in 10.39 seconds.

In terms of in-game speed, Chelsea's current player, Christian Pulisic, and Manchester City's Raheem Sterling are often highlighted due to their impressive pace on the pitch.

It is essential to note that these players are among the fastest, but the game requires a combination of speed, agility, acceleration, and change of direction abilities, which can sometimes be difficult to quantify accurately.


In [20]:
# (Optional) When you use Open AI Model, you have to parse the output

from langchain_core.output_parsers import StrOutputParser

parser = StrOutputParser()

chain = model | parser
chain.invoke("Tell me a joke")

' Of course! Here\'s a classic one:\n\nWhy don\'t scientists trust atoms?\n\nBecause they make up everything! (Everything being the element symbol for Erbium in periodic table, which is fun to remember because it sounds like "every thing")\n\nHope that made you smile today! Let me know if you need anything else.'

In [6]:
# (Example) Use a PDF and split it to later save it into the Vector Store and do Question Answering with it

from langchain_community.document_loaders import PyPDFLoader

loader = PyPDFLoader("Optimierung Cloud-zentrierter Anwendungslandschaften durch Application Portfolio Management.pdf")
pages = loader.load_and_split()
pages

[Document(page_content='Universität Leipzig  \nWirtschaftswissenschaftliche Fakultät  \nInstitut für Wirtschaftsinformatik  \nProf. Dr. Rainer Alt  \n \nThema  \nOptimierung Cloud -zentrierter Anwendungslandschaften durch \nApplication Portfolio Management – Herausforderungen und \nLösungen am Beispiel eines Enterprise Architecture Management \nTools   \nBachelorarbeit zur Erlangung des akademischen Grades  \nBachelor of Science – Wirtschaftsinformatik  \n \nvorgelegt von: Schmohl, Marvin   \nMatrikelnummer:  3719466  \nE-Mail-Adresse:  marvin.schmohl@gmx.de  \nTelefonnummer:  +49 176 41492574  \nAnschrift:  Steindamm 17  \n20099  Hamburg  \nLeipzig, den 30.09.2023', metadata={'source': 'Optimierung Cloud-zentrierter Anwendungslandschaften durch Application Portfolio Management.pdf', 'page': 0}),
 Document(page_content='Abstract  \nUm Anforderungen an eine moderne Anwendungslandschaft gerecht zu werden, kommt \nzunehmend Cloud Computing zum Einsatz. Trotz vieler Vorteile birgt die Tech

In [7]:
# Configure the prompt template that is used to ask the LLM

from langchain.prompts import PromptTemplate

template = """
Answer the question based on the context below. If you can't answer the question, reply "I don't know".

Context: {context}

Question: {question}
"""

prompt = PromptTemplate.from_template(template)
print(prompt.format(context="Here is some context", question="Here is a question"))


Answer the question based on the context below. If you can't answer the question, reply "I don't know".

Context: Here is some context

Question: Here is a question



In [8]:
# Build a LangChain chain containing the prompt and the model

chain = prompt | model 
chain.input_schema.schema()

In [10]:
# Testing the chain

chain.invoke(
    {
        "context": "My name is Marvin",
        "question": "What is my girlfriends name?"
    }
)

" I don't know, as there is no information provided about your girlfriend in the given context."

In [13]:
# Use Milvus as Vectorstore

from langchain_community.vectorstores import Milvus

connection_args = {'uri': URI }

vectorstore = Milvus(
    embedding_function=embeddings,
    connection_args=connection_args,
    collection_name=COLLECTION_NAME,
    drop_old=True
).from_documents(
    pages,
    embedding=embeddings,
    collection_name=COLLECTION_NAME,
    connection_args=connection_args
)

In [16]:
# (Optional) When not using Milvus, this is a simple alternative for a basic vector store

from langchain_community.vectorstores import DocArrayInMemorySearch

vectorstore = DocArrayInMemorySearch.from_documents(
    pages,
    embedding=embeddings
)

In [22]:
# Do a similarity search for a query

query = "Software Lifecycle"
docs = vectorstore.similarity_search(query)

print(len(docs))

[93m[search] retry:4, cost: 0.27s, reason: <_InactiveRpcError: StatusCode.UNAVAILABLE, failed to connect to all addresses; last error: UNKNOWN: ipv4:127.0.0.1:19530: Failed to connect to remote host: Connection refused>[0m
[93m[search] retry:5, cost: 0.81s, reason: <_InactiveRpcError: StatusCode.UNAVAILABLE, failed to connect to all addresses; last error: UNKNOWN: ipv4:127.0.0.1:19530: Failed to connect to remote host: Connection refused>[0m
[93m[search] retry:6, cost: 2.43s, reason: <_InactiveRpcError: StatusCode.UNAVAILABLE, failed to connect to all addresses; last error: UNKNOWN: ipv4:127.0.0.1:19530: Failed to connect to remote host: Connection refused>[0m
[93m[search] retry:7, cost: 3.00s, reason: <_InactiveRpcError: StatusCode.UNAVAILABLE, failed to connect to all addresses; last error: UNKNOWN: ipv6:%5B::1%5D:19530: Failed to connect to remote host: Connection refused>[0m
[93m[search] retry:8, cost: 3.00s, reason: <_InactiveRpcError: StatusCode.UNAVAILABLE, failed to co

4


In [23]:
# Test by using the vectorstore as a retriever. The retriever gives back the relevant pages based on the query invoked

retriever = vectorstore.as_retriever()

retriever.invoke("LeanIX")

[93m[search] retry:4, cost: 0.27s, reason: <_InactiveRpcError: StatusCode.UNAVAILABLE, failed to connect to all addresses; last error: UNKNOWN: ipv4:127.0.0.1:19530: Failed to connect to remote host: Connection refused>[0m
[93m[search] retry:5, cost: 0.81s, reason: <_InactiveRpcError: StatusCode.UNAVAILABLE, failed to connect to all addresses; last error: UNKNOWN: ipv4:127.0.0.1:19530: Failed to connect to remote host: Connection refused>[0m
[93m[search] retry:6, cost: 2.43s, reason: <_InactiveRpcError: StatusCode.UNAVAILABLE, failed to connect to all addresses; last error: UNKNOWN: ipv4:127.0.0.1:19530: Failed to connect to remote host: Connection refused>[0m
[93m[search] retry:7, cost: 3.00s, reason: <_InactiveRpcError: StatusCode.UNAVAILABLE, failed to connect to all addresses; last error: UNKNOWN: ipv4:127.0.0.1:19530: Failed to connect to remote host: Connection refused>[0m
[93m[search] retry:8, cost: 3.00s, reason: <_InactiveRpcError: StatusCode.UNAVAILABLE, failed to co

[Document(page_content='Abkürzungsverzeichnis  IV \n Abkürzungsverzeichnis  \nAPM  Application Portfolio Management  \nCCCC  Cloud Computing Considerations for Companies  \nEAM  Enterprise Architecture Management  \nEOL  End-of-Life \nESG  Environmental, Social and Governance  \nIaaS Infrastructure -as-a-Service \nIoT Internet of Things  \nNIST  National Institute of Standards and Technology  \nPaaS  Platform -as-a-Service  \nPAYG  Pay-as-you-go \nSaaS  Software -as-a-Service  \nSASE  Secure Access Service Edge  \nSLA  Service Level Agreement  \nSMP  SaaS  Management  Platform  \nTRM  Technology Risk Management', metadata={'source': 'Optimierung Cloud-zentrierter Anwendungslandschaften durch Application Portfolio Management.pdf', 'page': 5, 'pk': 450076633657247765}),
 Document(page_content='Literaturverzeichnis  VI \n Goasduff, L. (2022, November 10). Gartner Says Cloud Will Be the Centerpiece of New \nDigital Experiences . https://www.gartner.com/en/newsroom/press -releases/2021 -\n1

In [21]:
# Build a chain additionally context information from the Vector database

from operator import itemgetter
chain = (
    {
        "context": itemgetter("question") | retriever, 
        "question": itemgetter("question")
    }
    | prompt 
    | model
)

print(chain.invoke({"question": "Was ist LeanIX?"}))

 LeanIX ist kein direkt erwähnter Begriff in den angezeigten Texten. Es scheint sich um eine Software zur Application Portfolio Management (APM) zu handeln, die helfen kann, die Anwendungslandschaft einer Organisation von einer Portfolioperspektive aus anzusehen und zu analysieren. LeanIX ist eine Marke für das Unternehmen LeanIX AG, das Software-as-a-Service (SaaS) anbietet, um Anwendungsportfolios effizienter und transparenter zu verwalten.


In [28]:
# Alternative way to print the result so every character is printed single in a stream as in ChatGPT

for s in chain.stream({"question": "Was ist Cloud Computing?"}):
    print(s, end="", flush=True)

 Cloud Computing ist ein Dienst, bei dem Rechenleistung, Speicherkapazität und IT-Dienste über das Internet bereitgestellt werden. Es bietet Vorteile wie Kosteneffizienz, hohe Flexibilität und Skalierbarkeit sowie den Zugang zu neuen Technologien. Allerdings gibt es auch Risiken und Herausforderungen bei der Nutzung von Cloud Computing, z.B. bei Datenschutz, Netzausfall, strategischen Aspekten, Abhängigkeit vom Dienstleister, Anbieter-Lock-Ins und rechtlichen Regularien. Um die Vorteile nutzen zu können und Risiken zu minimieren sollte es eine Zusammenarbeit zwischen Unternehmensführung, Fachbereich und IT geben sowie die Entwicklung einer Cloud-Strategie.