### Install required packages

In [None]:
# %pip install -r ../requirement.txt

## Import Secret

In [None]:
import os
from dotenv import load_dotenv

load_dotenv(override=True)

# Confluence
CONFLUENCE_TOKEN = os.getenv('CONFLUENCE_TOKEN')

# Database
COSMOSDB_VCORE_CONNECTION_STRING = os.getenv('COSMOSDB_VCORE_CONNECTION_STRING')
COSMOSDB_NAMESPACE = os.getenv('COSMOSDB_NAMESPACE')

# Azure OpenAI API
AZURE_OPENAI_ENDPOINT = os.getenv('AZURE_OPENAI_ENDPOINT')
OPENAI_API_KEY = os.getenv('OPENAI_API_KEY')
OPENAI_API_VERSION = os.getenv('OPENAI_API_VERSION')

# LLM
AZURE_OPENAI_LLM_DEPLOYMENT_NAME = os.getenv('AZURE_OPENAI_LLM_DEPLOYMENT_NAME')
AZURE_OPENAI_LLM_MODEL_NAME = os.getenv('AZURE_OPENAI_LLM_MODEL_NAME')

# Embedding
AZURE_OPENAI_EMBEDDING_DEPLOYMENT_NAME = os.getenv('AZURE_OPENAI_EMBEDDING_DEPLOYMENT_NAME')
AZURE_OPENAI_EMBEDDING_MODEL_NAME = os.getenv('AZURE_OPENAI_EMBEDDING_MODEL_NAME')

<img src="images/confluence-Loader-vectorstore.png" alt="confluence loader" height="100">


## Pull Confluence Documents

In [None]:
from langchain.document_loaders import ConfluenceLoader

loader = ConfluenceLoader(
    url="https://strive.devops.t-systems.net/confluence/",
    token=CONFLUENCE_TOKEN
)

confluence_documents = loader.load(
    space_key="SDV", 
    include_attachments=False, 
    limit=1000,
    max_pages=1000,
)

print(confluence_documents[0])
print(f'{len(confluence_documents)} documents read from Confluence.')

## Split Confluence Documents

In [None]:
from langchain.text_splitter import RecursiveCharacterTextSplitter

text_splitter = RecursiveCharacterTextSplitter(
    chunk_size = 1024, 
    chunk_overlap = 300,
    length_function = len)

splitted_documents = text_splitter.split_documents(confluence_documents)

print(f'{len(splitted_documents)} chunks generated.')
print(splitted_documents[0])

## Setup Openai Embeddings

In [None]:
from langchain_openai import AzureOpenAIEmbeddings

openai_embeddings = AzureOpenAIEmbeddings(
    azure_deployment=AZURE_OPENAI_EMBEDDING_DEPLOYMENT_NAME,
    openai_api_version=OPENAI_API_VERSION,
    model=AZURE_OPENAI_EMBEDDING_MODEL_NAME,
    embedding_ctx_length=8191,
    chunk_size= 1024 
)
print(openai_embeddings)

<img src="images/vectorstore.png" alt="Example Image" width="72" height="72">

## Store Documents to COSMOSDB vCore

In [None]:
from langchain_community.vectorstores.azure_cosmos_db import (
    AzureCosmosDBVectorSearch,
    CosmosDBSimilarityType,
)
from pymongo import MongoClient

_indexName = 'km-index'
_dbName, _collectionName = COSMOSDB_NAMESPACE.split(".")

client: MongoClient = MongoClient(COSMOSDB_VCORE_CONNECTION_STRING)
collection = client[_dbName][_collectionName]

collection.database.drop_collection(_collectionName)

vectorstore = AzureCosmosDBVectorSearch.from_documents(
    splitted_documents,
    openai_embeddings,
    collection=collection,
    index_name=_indexName,
)

num_lists = 100
dimensions = 1536
similarity_algorithm = CosmosDBSimilarityType.COS

vectorstore.create_index(num_lists, dimensions, similarity_algorithm)


<img src="images/llm.png" alt="llm" width="100" height="100">

# RAG

In [None]:
import time
from langchain.chat_models import AzureChatOpenAI
from langchain.chains.qa_with_sources import load_qa_with_sources_chain

# Setup of the LLM and the chain
llm = AzureChatOpenAI(
    azure_deployment=AZURE_OPENAI_LLM_DEPLOYMENT_NAME, 
    model=AZURE_OPENAI_LLM_MODEL_NAME,
    temperature=0.8
)

chain = load_qa_with_sources_chain(
    llm, 
    chain_type="stuff"
)

# query = "Does the context provide who the first man on the moon was?"
query = "Who is the team lead of knowledge management?"
# query = "Who is Sirin Tiryaki?"
# query = "What is Knowledge Management?"
# query = "What is Knowledge Management? Explain in German."
# query = "Who is Jörg Tischler?"
# query = "Who is the Business Unit Lead in Connected Mobility at T-Systems?"
# query = "How can I book an external training?"
# query = "Explain SDV to a 10-year-old."
# query = "what are hypercubes?"

matching_docs = vectorstore.similarity_search(query, k=5)

answer = chain.run(input_documents=matching_docs, question=query)

print(f"Found {len(matching_docs)} matching documents.")
print(f"Answer: {answer}")