In [None]:
import os
from dotenv import load_dotenv
from pathlib import Path
import streamlit as st

# Load .env from the project root
env_path = Path('..') / '.env'  # Go one directory up to locate .env
load_dotenv(dotenv_path=env_path)
os.environ['LANGCHAIN_TRACING_V2'] = 'true'
os.environ['LANGCHAIN_ENDPOINT'] = 'https://api.smith.langchain.com'
os.environ['LANGCHAIN_API_KEY']= st.secrets["EXTRA_Langchain_key"]
# os.environ['LANGCHAIN_PROJECT']="pr-advanced-theism-85"

# Chunk Retrieval using Metadata filter

In [None]:
from langchain_openai import ChatOpenAI
from utils.chunk_doc import get_retriever, get_vector_store



# llm = ChatOpenAI(
#     model="gpt-4o-mini",
#     api_key=st.secrets["OpenAI_key"]
# )

vector_store = get_vector_store()

query = "What is Insertion Sort?"
metadata_filter = {"keywords": "Insertion"}
# response = vector_store.search(query,search_type="mmr", k=5, fetch_k=10)
# print(response)
found_docs = vector_store.max_marginal_relevance_search(query, filter=metadata_filter)
print(found_docs)
for i, doc in enumerate(found_docs):
    print(f"{i + 1}.", doc.page_content, "\n")

# Keyword Generation using LLAMA 3.2

In [None]:
from langchain_ollama import OllamaLLM
from langchain_openai import ChatOpenAI
from utils.custom_embeddings import MyEmbeddings
from utils.chunk_doc import get_vector_store

embedding_func = MyEmbeddings()

# Initialize Ollama LLM
llm = OllamaLLM(
    # model="gemma2:2b",
    model = "llama3.2:latest",
    base_url="http://localhost:11434"  # Adjust this URL if needed
)

from langchain_core.prompts import ChatPromptTemplate

# Define a prompt template for Ollama to generate keywords
# Gemma2:2b Template
# keyword_prompt_template = ChatPromptTemplate.from_messages(
#     [
#         ("system", "You are an assistant that generates keywords for a chunk of text. The keywords must be single words or two-word phrases. Format the output as: ['keyword1', 'keyword2']"),
#         ("human", "Extract relevant keywords for the following chunk:\n\n{chunk_text}")
#     ]
# )

# Llama3.2:1b Template
keyword_prompt_template = ChatPromptTemplate.from_messages(
    [
        ("system", "You are an assistant that generates keywords for a chunk of text. "
                   "Your response should only contain keywords in json format."),
        ("human", "Extract relevant keywords from the following chunk:\n\n{chunk_text}")
    ]
)

chain = keyword_prompt_template | llm

# Initialize Vector Store
vector_store = get_vector_store()

def split_chunks():
    try:
        from pathlib import Path
        from langchain_core.documents import Document
        from langchain_text_splitters import RecursiveCharacterTextSplitter as Rec
        
        # Path to markdown directory
        md_dir = Path("../data/md/")
        chunk_id_counter = 1  # Initialize a counter for unique chunk IDs
        ids = []
        documents = []

        # Loop through all markdown files in the md directory
        for md_file in md_dir.glob("*.md"):
            with open(md_file, "r") as f:
                md_content = f.read()

            # Chunk the markdown content
            text_splitter = Rec(
                chunk_size=2000,
                chunk_overlap=500,
                length_function=len,
                add_start_index=True
            )
            chunks = text_splitter.split_text(md_content)
            
            for chunk in chunks:
                # Generate keywords for the chunk using Ollama
                response = chain.invoke({"chunk_text": chunk})
                
                import json
                # Convert the JSON string to a Python dictionary
                dictionary_output = json.loads(response)

                # Access the "keywords" list
                keywords_list = dictionary_output["keywords"]
                
                # Create a Document object with metadata for the chunk, including keywords
                document_to_add = Document(
                    page_content=chunk,
                    metadata={"source": str(md_file), "keywords": str(keywords_list)}
                )
                
                documents.append(document_to_add)
                ids.append(str(chunk_id_counter))  # Add document ID to the list
                chunk_id_counter += 1  # Increment the ID counter
        
        # Assuming vector_store is defined and initialized elsewhere
        vector_store.add_documents(documents=documents, ids=ids)
    except Exception as e:
        print(f"Error: {e}")
        
if __name__ == "__main__":
    # split_chunks()
    
    # print(response)

    response = chain.invoke({"chunk_text": """ ###### 2.1.1 Insertion

In general when people talk about insertion with respect to linked lists of any
form they implicitly refer to the adding of a node to the tail of the list. When
you use an API like that of DSA and you see a general purpose method that
adds a node to the list, you can assume that you are adding the node to the tail
of the list not the head.

Adding a node to a singly linked list has only two cases:

1. head = in which case the node we are adding is now both the head and
_∅_
_tail of the list; or_

2. we simply need to append our node onto the end of the list updating the
_tail reference appropriately._

1) algorithm Add(value)
2) **Pre: value is the value to add to the list**
3) **Post: value has been placed at the tail of the list**
4) _n_ node(value)
_←_
5) **if head =**
_∅_
6) _head_ _n_
_←_
7) _tail_ _n_
_←_
8) **else**
9) _tail.Next_ _n_
_←_
10) _tail_ _n_
_←_
11) **end if**
12) end Add

As an example of the previous algorithm consider adding the following sequence of integers to the list: 1, 45, 60, and 12, the resulting list is that of
Figure 2.2.

###### 2.1.2 Searching

Searching a linked list is straightforward: we simply traverse the list checking
the value we are looking for with the value of each node in the linked list. The
algorithm listed in this section is very similar to that used for traversal in 2.1.4.
_§_


-----

_CHAPTER 2. LINKED LISTS_ 11

1) algorithm Contains(head, value)
2) **Pre: head is the head node in the list**
3) _value is the value to search for_
4) **Post: the item is either in the linked list, true; otherwise false**
5) _n_ _head_
_←_
6) **while n** = **and n.Value** = value
_̸_ _∅_ _̸_
7) _n_ _n.Next_
_←_
8) **end while**
9) **if n =**
_∅_
10) **return false**
11) **end if**
12) **return true**
13) end Contains

###### 2.1.3 Deletion

Deleting a node from a linked list is straightforward but there are a few cases
we need to account for:

1. the list is empty; or

2. the node to remove is the only node in the linked list; or

3. we are removing the head node; or

4. we are removing the tail node; or

5. the node to remove is somewhere in between the head and tail; or

6. the item to remove doesn’t exist in the linked list

The algorithm whose cases we have described will remove a node from anywhere within a list irrespective of whether the node is the head etc. If you know
that items will only ever be removed from the head or tail of the list then you
can create much more concise algorithms. In the case of always removing from
the front of the linked list deletion becomes an O(1) operation."""})

    # print(dictionary_output)
    print(response)

# Multi Query

In [None]:
from langchain_core.prompts import ChatPromptTemplate
from utils.custom_embeddings import MyEmbeddings
import os
import streamlit as st
from langchain_openai import ChatOpenAI
from langchain_ollama import OllamaLLM
from utils.chunk_doc import get_vector_store

os.environ['LANGCHAIN_TRACING_V2'] = 'true'
os.environ['LANGCHAIN_ENDPOINT'] = 'https://api.smith.langchain.com'
os.environ['LANGCHAIN_API_KEY']= st.secrets["EXTRA_Langchain_key"]
os.environ['LANGCHAIN_PROJECT']="chatbot-test"



embedding_func = MyEmbeddings()


# Multi Query: Different Perspectives
template = """You are an AI language model assistant. Your task is to generate five 
different versions of the given user question to retrieve relevant documents from a vector 
database. By generating multiple perspectives on the user question, your goal is to help
the user overcome some of the limitations of the distance-based similarity search. 
Provide these alternative questions separated by a single newline. Original question: {question}"""
prompt_perspectives = ChatPromptTemplate.from_template(template)

from langchain_core.output_parsers import StrOutputParser

gpt4 = ChatOpenAI(
    model="gpt-4o-mini",
    # api_key=os.environ.get("OPENAI_API_KEY"),
    api_key=st.secrets["OpenAI_key"],
    temperature=0
)

# llm = OllamaLLM(model="gemma2:2b", base_url="http://localhost:11434")

generate_queries = (
    prompt_perspectives 
    | gpt4
    | StrOutputParser() 
    | (lambda x: [line for line in x.split("\n") if line.strip() != ""])  # Ensure empty strings are removed
)

from langchain.load import dumps, loads

def get_unique_union(documents: list[list]):
    """ Unique union of retrieved docs """
    # Flatten list of lists, and convert each Document to string
    flattened_docs = [dumps(doc) for sublist in documents for doc in sublist]
    # Get unique documents
    unique_docs = list(set(flattened_docs))
    # Return
    return [loads(doc) for doc in unique_docs]

# Initialize ChromaDB client
vector_store = get_vector_store()

retriever = vector_store.as_retriever()

# Retrieve
question = "What is Insertion Sort?"
retrieval_chain = generate_queries | retriever.map() | get_unique_union
docs = retrieval_chain.invoke({"question":question})
# print(docs)
len(docs)

from operator import itemgetter

# RAG
template = """Answer the following question based on this context:

{context}

Question: {question}
"""

prompt = ChatPromptTemplate.from_template(template)

final_rag_chain = (
    {"context": retrieval_chain, 
     "question": itemgetter("question")} 
    | prompt
    | gpt4
    | StrOutputParser()
)

final_rag_chain.invoke({"question":question})


# RAG-Fusion

In [None]:
from langchain_core.prompts import ChatPromptTemplate
from langchain_chroma import Chroma
from utils.custom_embeddings import MyEmbeddings
import os
from langchain_openai import ChatOpenAI
from utils.chunk_doc import get_retriever, get_vector_store


embedding_func = MyEmbeddings()


# RAG-Fusion: Related
template = """You are a helpful assistant that generates multiple search queries based on a single input query. \n
Generate multiple search queries related to: {question} \n
Output (4 queries):"""
prompt_rag_fusion = ChatPromptTemplate.from_template(template)

from langchain_core.output_parsers import StrOutputParser

llm = ChatOpenAI(
    model="gpt-4o-mini",
    api_key=os.environ.get("OPENAI_API_KEY"),
    # api_key=st.secrets["OpenAI_key"],
    temperature=0
)

generate_queries = (
    prompt_rag_fusion 
    | llm
    | StrOutputParser() 
    | (lambda x: x.split("\n"))
)

from langchain.load import dumps, loads

def reciprocal_rank_fusion(results: list[list], k=60):
    """ Reciprocal_rank_fusion that takes multiple lists of ranked documents 
        and an optional parameter k used in the RRF formula """
    
    # Initialize a dictionary to hold fused scores for each unique document
    fused_scores = {}

    # Iterate through each list of ranked documents
    for docs in results:
        # Iterate through each document in the list, with its rank (position in the list)
        for rank, doc in enumerate(docs):
            # Convert the document to a string format to use as a key (assumes documents can be serialized to JSON)
            doc_str = dumps(doc)
            # If the document is not yet in the fused_scores dictionary, add it with an initial score of 0
            if doc_str not in fused_scores:
                fused_scores[doc_str] = 0
            # Retrieve the current score of the document, if any
            previous_score = fused_scores[doc_str]
            # Update the score of the document using the RRF formula: 1 / (rank + k)
            fused_scores[doc_str] += 1 / (rank + k)

    # Sort the documents based on their fused scores in descending order to get the final reranked results
    reranked_results = [
        (loads(doc), score)
        for doc, score in sorted(fused_scores.items(), key=lambda x: x[1], reverse=True)
    ]

    # Return the reranked results as a list of tuples, each containing the document and its fused score
    return reranked_results

# Initialize Vector Store
# vector_store = get_vector_store()

retriever = get_retriever()

# Retrieve
question = "What is Insertion Sort?"
retrieval_chain_rag_fusion  = generate_queries | retriever.map() | reciprocal_rank_fusion
docs = retrieval_chain.invoke({"question":question})
# print(docs)
len(docs)

from operator import itemgetter

# RAG
template = """Answer the following question based on this context:

{context}

Question: {question}
"""

prompt = ChatPromptTemplate.from_template(template)

final_rag_chain = (
    {"context": retrieval_chain, 
     "question": itemgetter("question")} 
    | prompt
    | llm
    | StrOutputParser()
)

final_rag_chain.invoke({"question":question})


# Retrieval Check

In [None]:
from langchain_core.prompts import ChatPromptTemplate
from langchain_chroma import Chroma
from utils.custom_embeddings import MyEmbeddings
import os
from langchain_openai import ChatOpenAI
from utils.chunk_doc import get_retriever, get_vector_store
from prompt_templates.retrieval_check import get_rc_chain
import streamlit as st
from langchain_core.messages import HumanMessage
from utils.image_processing import process_image, encode_image
from prompt_templates.image_template import get_image_chain

embedding_func = MyEmbeddings()

def read_image_bytes(image_path):
    with open(image_path, 'rb') as image_file:
        return image_file.read()


from langchain_core.output_parsers import StrOutputParser

llm = ChatOpenAI(
    model="gpt-4o-mini",
    # api_key=os.environ.get("OPENAI_API_KEY"),
    api_key=st.secrets["OpenAI_key"],
    temperature=0
)
import base64


img = read_image_bytes('46bfac9.png')


image_data = base64.b64encode(img).decode("utf-8")



image_chain = get_image_chain(llm)

message_content = [
                    {
                        "type": "image_url",
                        "image_url": {"url": f"data:image/jpeg;base64,{image_data}"}
                    }
                ]

prompt = ChatPromptTemplate(
    [
        ("system", ""),
        (
            "human",
            [
                {
                    "type": "text",
                    "text": "Describe the image provided",
                },
                {
                    "type": "image_url",
                    "image_url": {"url": "data:image/jpeg;base64,{image_data}"},
                }
            ],
        ),
    ]
)
chain = prompt | llm

response = chain.invoke({"image_data": image_data})
print(response.content)



In [None]:
# Retrieve
question = "Okay thank you so much thats all?"
retrieval_chain  = get_rc_chain(llm)
response = retrieval_chain.invoke({"input":question})
print(response)
print(response.content)

# DB

In [None]:
from db.db_connection import ChatDatabase

db = ChatDatabase('chat.db')

print(db.load_chat_history(chat_id="6fcf537a-8e1e-496b-be68-84841722fa57_1",user_id="6fcf537a-8e1e-496b-be68-84841722fa57"))



## Edit User Details

In [None]:
db.save_user_data("6fcf537a-8e1e-496b-be68-84841722fa57","","elroy7602@gmail.com")

## Delete All

In [None]:

users = db.get_all_users()

for user in users:
    print(user['user_id']," ",db.get_user_level(user['user_id']))


## Delete Specific User

In [None]:
db.delete_user("5b2429f2-7dde-4469-8b88-75910f2e5a5b")

In [None]:
user_level = "intermediate"
levels = ("Beginner", "Intermediate", "Advanced")
print(levels.index(user_level.capitalize()))

In [None]:
def get_user_level_tool(user_id: str) -> str:
    """Get user level for given user_id"""
    try:
        user_level = db.get_user_level(user_id)
        return f"Current user level is {user_level}." if user_level else "User not found."
    except Exception as e:
        return f"Error retrieving user level: {str(e)}"
    

hello = get_user_level_tool("6fcf537a-8e1e-496b-be68-84841722fa57")

print(hello)

# Chat History using LangGraph
[Langgraph][https://python.langchain.com/docs/how_to/message_history/]

In [12]:
from langchain.agents import initialize_agent
from langchain.chat_models import ChatOpenAI
import streamlit as st

# Initialize the LLM
llm = ChatOpenAI(
    model="gpt-4o-mini",
    api_key=st.secrets["OpenAI_key"],
    temperature=0
)

## Define a graph

In [2]:
from langchain_core.messages import HumanMessage
from langgraph.checkpoint.memory import MemorySaver
from langgraph.graph import START, MessagesState, StateGraph

# Define a new graph
workflow = StateGraph(state_schema=MessagesState)


# Define the function that calls the model
def call_model(state: MessagesState):
    response = llm.invoke(state["messages"])
    # Update message history with response:
    return {"messages": response}


# Define the (single) node in the graph
workflow.add_edge(START, "model")
workflow.add_node("model", call_model)

# Add memory
memory = MemorySaver()
app = workflow.compile(checkpointer=memory)



In [17]:
config = {"configurable": {"thread_id": "abc123"}}

In [None]:
query = "Hi! I'm Bob."

input_messages = [HumanMessage(query)]
# for chunk in app.stream({"messages": input_messages}, config):
#     print(chunk)
    

output = app.invoke({"messages": input_messages}, config)
output["messages"][-1].content
# output["messages"][-1].pretty_print()  # output contains all messages in state

In [None]:
query = "What's my name?"

input_messages = [HumanMessage(query)]
output = app.invoke({"messages": input_messages}, config)
output["messages"][-1].pretty_print()

In [None]:
messages = app.get_state(config).values["messages"]
messages

## Removal of messages

In [None]:
from langchain_core.messages import RemoveMessage

updated_messages = [RemoveMessage(m.id) for m in messages]
app.update_state(values = {"messages": updated_messages}, config = config)



## Different Threads

In [None]:
query = "What's my name?"
config = {"configurable": {"thread_id": "abc234"}}

input_messages = [HumanMessage(query)]
output = app.invoke({"messages": input_messages}, config)
output["messages"][-1].pretty_print()

## Chain with Prompt Template

In [8]:
from langchain_core.prompts import ChatPromptTemplate, MessagesPlaceholder

prompt = ChatPromptTemplate.from_messages(
    [
        ("system", "Answer in {language}."),
        MessagesPlaceholder(variable_name="messages"),
    ]
)

runnable = prompt | llm

In [9]:
from typing import Sequence

from langchain_core.messages import BaseMessage
from langgraph.graph.message import add_messages
from typing_extensions import Annotated, TypedDict


class State(TypedDict):
    messages: Annotated[Sequence[BaseMessage], add_messages]
    language: str


workflow = StateGraph(state_schema=State)


def call_model(state: State):
    response = runnable.invoke(state)
    # Update message history with response:
    return {"messages": [response]}


workflow.add_edge(START, "model")
workflow.add_node("model", call_model)

memory = MemorySaver()
app = workflow.compile(checkpointer=memory)



In [None]:
config = {"configurable": {"thread_id": "abc123"}}

# input_dict = {
#     "messages": [HumanMessage("Hi, I'm Bob.")],
#     "language": "Spanish",
# }

# output = app.invoke(input_dict, config)
print(output)
output["messages"][-1].pretty_print()

## Message History

In [None]:
config = {"configurable": {"thread_id": "abc123"}}
state = app.get_state(config).values

# print(f'Language: {state["language"]}')
for message in state["messages"]:
    message.pretty_print()

## Append new messages manually

In [13]:
from langchain_core.messages import HumanMessage

_ = app.update_state(config, {"messages": [HumanMessage("Test")]})
_ = app.

In [None]:
state = app.get_state(config).values

print(f'Language: {state["language"]}')
for message in state["messages"]:
    message.pretty_print()

# Agentic Retrieval

In [None]:
from langchain.tools.retriever import create_retriever_tool
from utils.chunk_doc import get_retriever
import os
import streamlit as st

os.environ['LANGCHAIN_TRACING_V2'] = 'true'
os.environ['LANGCHAIN_ENDPOINT'] = 'https://api.smith.langchain.com'
os.environ['LANGCHAIN_API_KEY']= st.secrets["New_Langsmith_key"]
os.environ['LANGCHAIN_PROJECT']="default"

retriever = get_retriever()

retriever_tool = create_retriever_tool(
    retriever,
    "retrieve_documents",
    "Search and return relevant documents based on user's query.",
)

tools = [retriever_tool]

## State

In [2]:
from typing import Annotated, Sequence
from typing_extensions import TypedDict

from langchain_core.messages import BaseMessage

from langgraph.graph.message import add_messages


class AgentState(TypedDict):
    # The add_messages function defines how an update should be processed
    # Default is to replace. add_messages says "append"
    messages: Annotated[Sequence[BaseMessage], add_messages]

In [8]:
from typing import Annotated, Literal, Sequence
from typing_extensions import TypedDict

from langchain import hub
from langchain_core.messages import BaseMessage, HumanMessage
from langchain_core.output_parsers import StrOutputParser
from langchain_core.prompts import PromptTemplate
from langchain_openai import ChatOpenAI

from pydantic import BaseModel, Field


from langgraph.prebuilt import tools_condition

### Edges


def grade_documents(state) -> Literal["generate", "rewrite"]:
    """
    Determines whether the retrieved documents are relevant to the question.

    Args:
        state (messages): The current state

    Returns:
        str: A decision for whether the documents are relevant or not
    """

    print("---CHECK RELEVANCE---")

    # Data model
    class grade(BaseModel):
        """Binary score for relevance check."""

        binary_score: str = Field(description="Relevance score 'yes' or 'no'")

    # LLM
    model = ChatOpenAI(model_name="gpt-4o-mini", temperature=0, streaming=True)

    # LLM with tool and validation
    llm_with_tool = model.with_structured_output(grade)

    # Prompt
    prompt = PromptTemplate(
        template="""You are a grader assessing relevance of a retrieved document to a user question. \n 
        Here is the retrieved document: \n\n {context} \n\n
        Here is the user question: {question} \n
        If the document contains keyword(s) or semantic meaning related to the user question, grade it as relevant. \n
        Give a binary score 'yes' or 'no' score to indicate whether the document is relevant to the question.""",
        input_variables=["context", "question"],
    )

    # Chain
    chain = prompt | llm_with_tool

    messages = state["messages"]
    last_message = messages[-1]

    question = messages[0].content
    docs = last_message.content

    scored_result = chain.invoke({"question": question, "context": docs})

    score = scored_result.binary_score

    if score == "yes":
        print("---DECISION: DOCS RELEVANT---")
        return "generate"

    else:
        print("---DECISION: DOCS NOT RELEVANT---")
        print(score)
        return "rewrite"

def validate_dsa_question(state) -> Literal["proceed", "redirect"]:
    """
    Determines whether the question is DSA-related before proceeding with retrieval.
    
    Args:
        state (messages): The current state containing the user's question
        
    Returns:
        str: A decision for whether to proceed with the question or redirect
    """
    
    print("---VALIDATING DSA TOPIC---")
    
    # Data model for structured output
    class ValidationResult(BaseModel):
        """Binary validation for DSA-related questions."""
        is_dsa: str = Field(description="Binary 'yes' or 'no' indicating if question is DSA-related")
        explanation: str = Field(description="Brief explanation of why the question is or isn't DSA-related")
        redirect_message: str = Field(description="Polite redirect message if question isn't DSA-related")

    # LLM setup
    model = ChatOpenAI(model_name="gpt-4o-mini", temperature=0, streaming=True)
    llm_with_validation = model.with_structured_output(ValidationResult)
    
    # Validation prompt
    prompt = PromptTemplate(
        template="""You are validating whether questions are related to Data Structures and Algorithms (DSA).\n
If question is non-DSA, redirect politely: \n
"While [topic] is interesting, I specialise in data structures and algorithms. Could you ask me about DSA concepts instead?"\n
\n

Question to validate: {question}\n
Determine if this is a DSA-related question. If it's not, redirect user to ask about DSA concepts instead.
""",
        input_variables=["question"]
    )
    
    # Get the user's question from state
    messages = state["messages"]
    question = messages[0].content
    
    # Run validation
    chain = prompt | llm_with_validation
    result = chain.invoke({"question": question})
    
    if result.is_dsa.lower() == "yes":
        print("---DECISION: VALID DSA QUESTION---")
        return {"messages": state["messages"], "next": "proceed"}
    else:
        print("---DECISION: NON-DSA QUESTION---")
        # Add the redirect message to the state
        messages = state["messages"].copy()
        messages.append(HumanMessage(content=result.redirect_message))
        return {"messages": messages, "next": "redirect"}
### Nodes


def agent(state):
    """
    Invokes the agent model to generate a response based on the current state. Given
    the question, it will decide to retrieve using the retriever tool, or simply end.

    Args:
        state (messages): The current state

    Returns:
        dict: The updated state with the agent response appended to messages
    """
    print("---CALL AGENT---")
    messages = state["messages"]
    model = ChatOpenAI(model_name="gpt-4o-mini", temperature=0, streaming=True)
    model = model.bind_tools(tools)
    print('messages:\n',messages)
    # msg = [
    #     HumanMessage(
    #         content=f""" \n 
    # Look at the input and try to reason about the underlying semantic intent / meaning. \n 
    # Here is the initial question:
    # \n ------- \n
    # {question} 
    # \n ------- \n
    # Formulate an improved question: """,
    #     )
    # ]
    
    response = model.invoke(messages)
    # We return a list, because this will get added to the existing list
    return {"messages": [response]}


def rewrite(state):
    """
    Transform the query to produce a better question.

    Args:
        state (messages): The current state

    Returns:
        dict: The updated state with re-phrased question
    """

    print("---TRANSFORM QUERY---")
    messages = state["messages"]
    question = messages[0].content

    msg = [
        HumanMessage(
            content=f""" \n 
    Look at the input and try to reason about the underlying semantic intent / meaning. \n 
    Here is the initial question:
    \n ------- \n
    {question} 
    \n ------- \n
    Formulate an improved question: """,
        )
    ]

    # Grader
    model = ChatOpenAI(model_name="gpt-4o-mini", temperature=0, streaming=True)
    response = model.invoke(msg)
    return {"messages": [response]}


def generate(state):
    """
    Generate answer

    Args:
        state (messages): The current state

    Returns:
         dict: The updated state with re-phrased question
    """
    print("---GENERATE---")
    messages = state["messages"]
    question = messages[0].content
    user_level = "beginner"
    last_message = messages[-1]

    docs = last_message.content

    # Prompt
    # prompt = hub.pull("rlm/rag-prompt")
    prompt = PromptTemplate(
        template="""
You are an assistant for question-answering tasks. Use the following pieces of retrieved context and answer the question based on user's level of competency.\n
\n
User Level: {user_level}\n
Question: {question}\n
Context: {context}\n
\n
TEACHING APPROACH:\n
[beginner]\n
- Use analogies (arrays as parking lots, stacks as plates)\n
- Focus on fundamentals\n
- Avoid complexity discussions\n
- Break down step-by-step\n
\n
[intermediate]\n
- Include implementation details\n
- Basic complexity analysis\n
- Compare approaches\n
- Code examples when relevant\n

[advanced]\n
- Deep optimization discussion\n
- Edge cases and tradeoffs\n
- Advanced implementation details\n
- System design considerations\n

RULES:\n
1. Strictly state if using general knowledge:\n
   "While the context doesn't cover this specifically, from my general knowledge..."\n

2. Stay within user's level:\n
   - Beginner: No complexity, focus on intuition\n
   - Intermediate: Basic complexity, simple implementations\n
   - Advanced: Deep technical details, optimizations\n

5. One concept at a time:\n
   "Let's focus on [concept] first. Would you like to explore [related concept] after?"\n
\n
""",
        input_variables=["context", "question","user_level"],
    )

    # LLM
    llm = ChatOpenAI(model_name="gpt-4o-mini", temperature=0, streaming=True)

    # Post-processing
    def format_docs(docs):
        return "\n\n".join(doc.page_content for doc in docs)

    # Chain
    rag_chain = prompt | llm | StrOutputParser()

    # Run
    response = rag_chain.invoke({"context": docs, "question": question,"user_level": user_level})
    return {"messages": [response]}


# print("*" * 20 + "Prompt[rlm/rag-prompt]" + "*" * 20)
# prompt = hub.pull("rlm/rag-prompt").pretty_print()  # Show what the prompt looks like

In [9]:

from langgraph.graph import END, StateGraph, START
from langgraph.prebuilt import ToolNode

# Define a new graph
workflow = StateGraph(AgentState)

# Add all nodes
workflow.add_node("validate_topic", validate_dsa_question)  # New validation node
workflow.add_node("agent", agent)
retrieve = ToolNode([retriever_tool])
workflow.add_node("retrieve", retrieve)
workflow.add_node("rewrite", rewrite)
workflow.add_node("generate", generate)

# Start with topic validation
workflow.add_edge(START, "validate_topic")

# Add conditional edges from validation
workflow.add_conditional_edges(
    "validate_topic",
    lambda x: x["next"],  # Extract the next state from the returned dict
    {
        "proceed": "agent",  # If DSA-related, proceed to agent
        "redirect": END      # If not DSA-related, end with redirect message
    }
)

# Decide whether to retrieve
workflow.add_conditional_edges(
    "agent",
    # Assess agent decision
    tools_condition,
    {
        # Translate the condition outputs to nodes in our graph
        "tools": "retrieve",
        END: END,
    },
)

# Edges taken after the `action` node is called.
workflow.add_conditional_edges(
    "retrieve",
    grade_documents,
    {
        "generate": "generate",
        "rewrite": "rewrite"
    }
)
workflow.add_edge("generate", END)
workflow.add_edge("rewrite", "agent")

from test_templates.intial_template import memory

# Compile
graph = workflow.compile(checkpointer=memory)

In [None]:
from IPython.display import Image, display

try:
    display(Image(graph.get_graph(xray=True).draw_mermaid_png()))
except Exception:
    # This requires some extra dependencies and is optional
    pass

In [None]:
import pprint

# inputs = {
#     "messages": [
#         HumanMessage("What is quick sort")
#     ]
# }

inputs = {
    "messages": [
        HumanMessage("What is insertion sort?"),
    ]
}

output = graph.invoke(inputs)
print('output:',output["messages"][-1].content)
# for output in graph.stream(inputs):
#     for key, value in output.items():
#         pprint.pprint(f"Output from node '{key}':")
#         pprint.pprint("---")
#         pprint.pprint(value, indent=2, width=80, depth=None)
#     pprint.pprint("\n---\n")

In [None]:
print(output["messages"][-1].content)

# Image Chain

In [1]:
from langchain.agents import initialize_agent
from langchain.chat_models import ChatOpenAI
import streamlit as st

# Initialize the LLM
llm = ChatOpenAI(
    model="gpt-4o-mini",
    api_key=st.secrets["OpenAI_key"],
    temperature=0
)

  llm = ChatOpenAI(


In [12]:
from test_templates.image_template import image_app
def read_image_bytes(image_path):
    with open(image_path, 'rb') as image_file:
        return image_file.read()
img = read_image_bytes('test_image_1.jpeg')
import base64

image_data = base64.b64encode(img).decode("utf-8")



# image_chain = get_image_chain()

message_content = [{"type": "text", "text": "Describe the image provided"}]
message_content.append({
                    "type": "image_url",
                    "image_url": {"url": f"data:image/jpeg;base64,{image_data}"}
                })

final_message = {
    "messages":[
    {
        "role": "user",
        "content": message_content,
    }],
    "user_level": "beginner"}

config = {"configurable": {"thread_id": "abc123"}}
image_app.invoke(input = final_message,config=config)

KeyError: 'user_level'