In [None]:
# from langchain.llms import Ollama
import json, re, os
from langchain_chroma import Chroma
from docx import Document

In [None]:
from langchain_community.embeddings import OllamaEmbeddings
from langchain_community.llms import Ollama
from langchain_core.prompts import PromptTemplate, ChatPromptTemplate
from langchain_core.runnables import RunnablePassthrough
from langchain_core.output_parsers import StrOutputParser
from langchain_core.messages import AIMessage, HumanMessage, get_buffer_string
from langchain_core.prompts import format_document
from langchain_core.runnables import RunnableParallel
from operator import itemgetter
from langchain_community.chat_models import ChatOllama
from langchain_openai import OpenAI, ChatOpenAI
from langchain_openai import OpenAIEmbeddings
from dotenv import load_dotenv, find_dotenv
from langchain.schema.runnable import RunnableLambda
from langchain.callbacks.tracers import ConsoleCallbackHandler

In [None]:
from langchain.text_splitter import CharacterTextSplitter
from langchain_community.document_loaders import TextLoader
from langchain_community.embeddings import OllamaEmbeddings
import io

In [None]:
from IPython.display import Markdown, display

In [None]:
def display_markdown(md_text):
    display(Markdown(md_text))

In [None]:
_ = load_dotenv(find_dotenv())  # read local .env file

In [None]:
# https://levelup.gitconnected.com/introduction-to-ollama-part-1-1156f9563b8d
# https://levelup.gitconnected.com/introduction-to-ollama-part-2-e8516105f600
# https://ollama.com/library
# https://stackoverflow.com/questions/77550506/what-is-the-right-way-to-do-system-prompting-with-ollama-in-langchain-using-pyth
# https://python.langchain.com/v0.2/docs/integrations/chat/openai/

In [None]:
# ollama list
# NAME            ID              SIZE    MODIFIED
# aya:latest      7ef8c4942023    4.8 GB  3 hours ago
# phi3:latest     64c1188f2485    2.4 GB  3 hours ago
# llama3:latest   365c0bd3c000    4.7 GB  3 hours ago
# mistral:latest  2ae6f6dd7a3d    4.1 GB  4 hours ago

In [None]:
docs_path = '/mnt/c/Users/alexb/OneDrive/Energy Regulation/process/'

In [None]:
def read_docx(file_path_):
    doc = Document(file_path_)
    full_text = []
    for para in doc.paragraphs:
        full_text.append(para.text)
    return '\n'.join(full_text)

In [None]:
docs = []

In [None]:
for dirpath, dirnames, filenames in os.walk(docs_path):
    for fidx, file in enumerate(filenames):
            # print(file)
        if file.endswith(".docx"):
            file_path = os.path.join(dirpath, file)
            try:
                text_from_doc = read_docx(file_path)
                with open(file_path+f'_{fidx:02d}'+'.txt', 'wt+') as f:
                    f.write(text_from_doc)
            except Exception as e:
                print(e)
                pass

In [None]:
# source_path = '/home/alexb/projects/AI-developer/7_RAG-2/2_vector_db/1_chromadb/source'
source_path = docs_path

In [None]:
for dirpath, dirnames, filenames in os.walk(source_path):
    for file in filenames:
        # print(file)
        if file.endswith(".txt"):
            try:
                loader = TextLoader(os.path.join(source_path, file), encoding="utf-8")
                docs.extend(loader.load_and_split())
            except Exception as e:
                # print(e)
                pass

# print(f"{len(docs)}")

In [None]:
text_splitter = CharacterTextSplitter(chunk_size=1000, chunk_overlap=200)
texts = text_splitter.split_documents(docs)

In [None]:
# embedding function
# embeddings = OllamaEmbeddings(model="mistral")
embeddings = OpenAIEmbeddings(model="text-embedding-3-large")

In [None]:
#storing to the db
db = Chroma.from_documents(texts, embeddings, persist_directory="./chroma_db")

In [None]:
#getting DB for use
db = Chroma(persist_directory="./chroma_db", embedding_function=embeddings)

In [None]:
retriever = db.as_retriever()

In [None]:
# model
# llm = Ollama(model="mistral") # returns TEXT
# llm = Ollama(model="mistral:instruct") # returns TEXT
# llm = ChatOllama(model="mistral:v0.2") # returns MESSAGE object
# llm = ChatOllama(model="mistral:instruct") # returns MESSAGE object
llm = ChatOpenAI(model="gpt-4o", temperature=0.9)

In [None]:
with open('../../openai_api_key.txt') as f:
    openai_api_key = f.read().strip('\n')

In [None]:
# openai_api_key

In [None]:
def _combine_documents(docs, document_separator="\n\n"):
    document_prompt = PromptTemplate.from_template(template="{page_content}")
    doc_strings = [format_document(doc, document_prompt) for doc in docs]
    return document_separator.join(doc_strings)

In [None]:
_context = {
    "chat_history": itemgetter("chat_history"),
    "context": itemgetter("question") | retriever | _combine_documents,
    "question": lambda x: x["question"],
}

# FULL Chain ----------------------------

In [None]:
template = """Answer the question from the user, consider the Chat history and provided context. Use your knowledge as well.

Chat History:
===
{chat_history}
===

Context:
===
{context}
===

Question: {question}
"""
ANSWER_PROMPT = ChatPromptTemplate.from_template(template)

In [None]:
conversational_qa_chain = _context | ANSWER_PROMPT | llm

In [None]:
# -----------------------------------------------------------------
# Invoke 1
question = "What are the primary stakeholders mentioned in the context provided?"
chat_history = []
result = conversational_qa_chain.invoke(
    {
        "question": question,
        "chat_history": chat_history,
    },
    config={"callbacks": [ConsoleCallbackHandler()]},
)
print("-------------------------Invoke 1-------------------------")
# print(result)

In [None]:
# -----------------------------------------------------------------
chat_history.extend([HumanMessage(content=question), AIMessage(content=result.content)])
# -----------------------------------------------------------------
# Invoke 2
result = conversational_qa_chain.invoke(
    {
        "question": "What about A, B, C, D, etc.? The stakeholders that approve and/or get consulted?",
        # "chat_history": [
        #     HumanMessage(content="What is the best AI Agents framework?"),
        #     AIMessage(content="Autogen."),
        # ],
        "chat_history": chat_history,
    },
    config={"callbacks": [ConsoleCallbackHandler()]},
)
print("-------------------------Invoke 2-------------------------")
print(result)

In [None]:
# -----------------------------------------------------------------
chat_history.extend([HumanMessage(content=question), AIMessage(content=result.content)])
# -----------------------------------------------------------------
# Invoke 2
result = conversational_qa_chain.invoke(
    {
        "question": "Can you describe the process established in the context - for TCMs",
        # "chat_history": [
        #     HumanMessage(content="What is the best AI Agents framework?"),
        #     AIMessage(content="Autogen."),
        # ],
        "chat_history": chat_history,
    },
    config={"callbacks": [ConsoleCallbackHandler()]},
)
print("-------------------------Invoke 2-------------------------")
print(result)

In [None]:
# -----------------------------------------------------------------
chat_history.extend([HumanMessage(content=question), AIMessage(content=result.content)])
# -----------------------------------------------------------------
# Invoke 2
result = conversational_qa_chain.invoke(
    {
        "question": "Can you create a step-by-step process, with timelines, stakeholders and their roles, and description of steps - for each major step in the process for TCMs",
        # "chat_history": [
        #     HumanMessage(content="What is the best AI Agents framework?"),
        #     AIMessage(content="Autogen."),
        # ],
        "chat_history": chat_history,
    },
    config={"callbacks": [ConsoleCallbackHandler()]},
)
print("-------------------------Invoke 2-------------------------")
print(result)

In [None]:
chat_history[-1].content

In [None]:
print(result.content)

In [None]:
display_markdown(result.content)

In [None]:
# -----------------------------------------------------------------
chat_history.extend([HumanMessage(content=question), AIMessage(content=result.content)])
# -----------------------------------------------------------------
# Invoke 2
result = conversational_qa_chain.invoke(
    {
        "question": "Thanks. What about the conflict or disagreement resolution cases - roles of A, B, C. Can you enhance the answer please.",
        # "chat_history": [
        #     HumanMessage(content="What is the best AI Agents framework?"),
        #     AIMessage(content="Autogen."),
        # ],
        "chat_history": chat_history,
    },
    config={"callbacks": [ConsoleCallbackHandler()]},
)
print("-------------------------Invoke 2-------------------------")
print(result)

In [None]:
display_markdown(result.content)

In [None]:
# -----------------------------------------------------------------
chat_history.extend([HumanMessage(content=question), AIMessage(content=result.content)])
# -----------------------------------------------------------------
# Invoke 2
result = conversational_qa_chain.invoke(
    {
        "question": "Thanks. I need to really build the timeline - with all minimum/maximum terms or periods or deadlines mentioned in the text, and the stakeholders those timeframes relate to - broght together onto the timeline",
        # "chat_history": [
        #     HumanMessage(content="What is the best AI Agents framework?"),
        #     AIMessage(content="Autogen."),
        # ],
        "chat_history": chat_history,
    },
    config={"callbacks": [ConsoleCallbackHandler()]},
)
print("-------------------------Invoke 2-------------------------")
print(result)

In [None]:
display_markdown(result.content)

In [None]:
# -----------------------------------------------------------------
chat_history.extend([HumanMessage(content=question), AIMessage(content=result.content)])
# -----------------------------------------------------------------
# Invoke 2
result = conversational_qa_chain.invoke(
    {
        "question": "Thanks. Can you create one timeline for the steps, as you have just done - but adding citing of the relevant paragraphs of the text, mentioning the timelines",
        # "chat_history": [
        #     HumanMessage(content="What is the best AI Agents framework?"),
        #     AIMessage(content="Autogen."),
        # ],
        "chat_history": chat_history,
    },
    config={"callbacks": [ConsoleCallbackHandler()]},
)
print("-------------------------Invoke 2-------------------------")
print(result)

In [None]:
display_markdown(result.content)