## Core Components
First we build our llm, our SQL database, our embedding model, and our document vector store.

In [1]:
from langchain_ollama import ChatOllama
from langchain_community.utilities.sql_database import SQLDatabase
from langchain_experimental.sql import SQLDatabaseChain
from langchain_chroma import Chroma
from langchain_ollama import OllamaEmbeddings
from langchain import hub
from langchain_core.output_parsers import StrOutputParser
from langchain_core.runnables import RunnablePassthrough
from langchain.output_parsers import PydanticOutputParser
from pydantic import BaseModel, Field, validator
from langchain.prompts import PromptTemplate
from langgraph.graph import StateGraph
from typing import TypedDict, Annotated
from langgraph.graph import add_messages, START, END
from langchain_core.messages import BaseMessage, HumanMessage, SystemMessage
from langgraph.checkpoint.memory import MemorySaver
from langgraph.config import get_stream_writer

### Build the Main Components
Create the LLM, the SQL database, the embeddings model, the RAG vector store, and the RAG chain.

In [2]:
# chat model
llm = ChatOllama(model="mistral:latest")

# SQL database ---------------------------------------
db = SQLDatabase.from_uri(
    f"postgresql+psycopg2://postgres:password@localhost:5432/ta_database",
)

db_chain = SQLDatabaseChain(llm=llm, database=db, verbose=True)
# --------------------------------------------------------

# embeddings for vector store retriever -----------------
embeddings = OllamaEmbeddings(model="nomic-embed-text")

# the vector store, course content
vector_store = Chroma(
    collection_name="asm_z80",
    embedding_function=embeddings,
    host="localhost",
)

rag_prompt = PromptTemplate(template="""You are an assistant for gathering relevant course material.
                Summarize the content in a few sentences. If the content is empty, answer ONLY a single word, 'NONE'.
                Question: {question}
                Content: {context}
                Answer: """)


def format_docs(docs):
    return "\n\n".join(doc.page_content for doc in docs)


rag_chain = (
    {
        "context": vector_store.as_retriever() | format_docs,
        "question": RunnablePassthrough(),
    }
    | rag_prompt
    | llm
    | StrOutputParser()
)
# ------------------------------------------------------



### Build the SQL Chain
Create the SQL chain for getting grade information.

In [3]:
# Define schema for validation
class SQLQuery(BaseModel):
    query: str = Field(description="A valid SQL query targeting the student/assignment/grade database")

    @validator("query")
    def must_start_with_select(cls, v):
        if not v.strip().lower().startswith("select"):
            raise ValueError("Only SELECT queries are allowed.")
        return v

parser = PydanticOutputParser(pydantic_object=SQLQuery)

prompt = PromptTemplate(
    template=(
        "You are a helpful SQL generator.\n\n"
        "User request: {question}\n\n"
        "Generate a valid SQL SELECT query for the students/assignments/grades schema.\n"
        "{format_instructions}"
    ),
    input_variables=["question"],
    partial_variables={"format_instructions": parser.get_format_instructions()},
)

sql_chain = prompt | llm | parser

/tmp/ipykernel_159922/2506761555.py:5: PydanticDeprecatedSince20: Pydantic V1 style `@validator` validators are deprecated. You should migrate to Pydantic V2 style `@field_validator` validators, see the migration guide for more details. Deprecated in Pydantic V2.0 to be removed in V3.0. See Pydantic V2 Migration Guide at https://errors.pydantic.dev/2.11/migration/
  @validator("query")


## Setup The State for our Agent
This will create the parts of the agent state we want to track and manipulate, including messages.

In [4]:
class State(TypedDict):
    # here we can also put state tracking on status, if we are quizzing, etc.
    messages: Annotated[list[BaseMessage], add_messages]
    init_question: str
    sql: str # the sql response
    answer: str #the rag response
    rag_attempts: int
    route_attempts: int
    route: str

In [5]:
# Node for converting the user query into a SQL command.
def init_node(state: State) -> dict:
    init_msg = "Please introduce yourself, your job, and the course description."
    for msg in reversed(state["messages"]):
        if msg.type == "human":
            init_msg = msg.content
            break
    return {"init_question": init_msg, "sql": None, "answer": None, "rag_attempts": 0, "route_attempts": 0, "route": None}

def sql_generator_node(state: State) -> dict:
    # latest user message
    user_msg = state["init_question"]

    # run through LLM + parser
    sql_query = sql_chain.invoke({"question": user_msg})

    return {"sql": sql_query.query}

# Node for executing the SQL command that was generated.
def sql_execution_node(state: State) -> dict:
    result = db_chain.run(state["sql"])
    return {"messages": [("sql", f"Query result: {result}")]}

def response_node(state: State):
    """Generate answer."""
    # Get generated SQL and RAG responses, and initial question
    sql_answer = state["sql"]
    rag_answer = state["answer"]
    convo_messages = []

    # get the messages.
    for message in reversed(state["messages"]):
        convo_messages.append(message)
    
    system_message_content = (
        "You are a teacher, answering students questions."
        "Use the following information to answer the questions. "
        "If you don't know the answer, say that you don't know. "
        "\n\n"
        f"{sql_answer}\n\n"
        f"{rag_answer}\n\n"
    )
    
    conversation_messages = [
        message
        for message in state["messages"]
        if message.type in ("human", "system")
        or (message.type == "ai" and 
            not (message.type == "rag" or message.type == "sql") and 
            not message.tool_calls)
    ]
    
    prompt = [SystemMessage(system_message_content)] + conversation_messages

    # Run
    response = llm.invoke(prompt)
    return {"messages": [response], "rag_attempts": 0}

router_prompt = PromptTemplate.from_template(
    "Answer based on the following context:\n\n"
    "Messages: {messages}\n\n"
    "Answer with ONLY one word:"
    "'done' if all questions have been answered,"
    "'sql' if there are unanswered questions about scores or students, or assignments, or grades,"
    "or 'course' if there are unaswered questions about course material,"
)

router_chain = router_prompt | llm

def router(state: State) -> dict:
    if state["route_attempts"] > 5:
        return {"route": "done", "route_attempts": 0}
    
    user_msg = state["init_question"]
    recent_messages = []
    for message in reversed(state["messages"]):
        recent_messages.append(message)
    
    recent_messages = recent_messages[::-1]
    if len(recent_messages) > 10:
        recent_messages = recent_messages[::-10]
    
    context = "\n\n".join(message.content for message in recent_messages)
    route = router_chain.invoke({"messages": user_msg}).content.strip().lower()
    if "sql" in route:
        return {"route": "generate_sql", "route_attempts": state.get("route_attempts", 0) + 1}
    elif "course" in route:
        return {"route": "course", "route_attempts": state.get("route_attempts", 0) + 1}
    else:
        return {"route": "done", "route_attempts": 0}

def routing_function(state: State) -> dict:
    return state["route"].strip().lower()

def rag_retrieve_node(state: State) -> dict:
    user_msg = state["init_question"]
    #messages = state["messages"]
    #messages = "\n\n".join(message.content for message in messages)
    result = rag_chain.invoke(user_msg)  # returns {"result": ..., "source_documents": ...}
    
    if "none" in result.lower():
        return {"answer": None}
    
    return {"answer": result, "messages": [("ai", f"Course answer: {result}")]}

rephrase_prompt = PromptTemplate.from_template(
    "The user asked:\n\n{question}\n\n"
    "This did not retrieve relevant course material. "
    "Rephrase it in a clearer way that might match the knowledge base."
)
rephrase_chain = rephrase_prompt | llm

def rag_rephrase_node(state: State) -> dict:
    user_msg = state["init_question"]
    new_question = rephrase_chain.invoke({"question": user_msg}).content
    return {
        "messages": [("human", new_question)],
        "rag_attempts": state.get("rag_attempts", 0) + 1,
    }

def rag_fallback_node(state: State) -> dict:
    return {"messages": [("ai", "Sorry, I couldn't find relevant course material even after rephrasing.")]}

def rag_router(state: State) -> str:
    if state.get("answer"):
        return "found"  # stop
    elif state.get("rag_attempts", 0) >= 3:
        return "fallback"
    else:
        return "rephrase"



In [6]:
rag_builder = StateGraph(State)

rag_builder.add_node("retrieve", rag_retrieve_node)
rag_builder.add_node("rephrase", rag_rephrase_node)
rag_builder.add_node("fallback", rag_fallback_node)

rag_builder.set_entry_point("retrieve")

rag_builder.add_conditional_edges(
    "retrieve",
    rag_router,
    {
        "found": END,
        "rephrase": "rephrase",
        "fallback": "fallback",
    },
)

# Loop rephrased query back to retrieve
rag_builder.add_edge("rephrase", "retrieve")

rag_graph = rag_builder.compile()

def course_node(state: State) -> dict:
    return rag_graph.invoke(state)

In [7]:
builder = StateGraph(State)

# Add nodes
builder.add_node("init", init_node)
builder.add_node("router", router)
builder.add_node("generate_sql", sql_generator_node)
builder.add_node("execute_sql", sql_execution_node)
builder.add_node("course", course_node)
builder.add_node("response", response_node)

# Entry
builder.set_entry_point("init")

# Conditional edges from router
builder.add_conditional_edges(
    "router",
    routing_function,  # function returning the next node name
    {
        "generate_sql": "generate_sql",
        "course": "course",
        "done": "response"
    },
)

# Chain SQL path
builder.add_edge("init", "router")
builder.add_edge("generate_sql", "execute_sql")
builder.add_edge("execute_sql", "router")
builder.add_edge("course", "router")


memory = MemorySaver()

graph = builder.compile(checkpointer=memory)

In [8]:
graph.get_graph().print_ascii()

                              +-----------+                                 
                              | __start__ |                                 
                              +-----------+                                 
                                     *                                      
                                     *                                      
                                     *                                      
                                 +------+                                   
                                 | init |                                   
                                 +------+                                   
                                     *                                      
                                     *                                      
                                     *                                      
                                +--------+                                  

In [9]:
input_message = "What is the course lesson for day 7?"

config = {"configurable": {"thread_id": "greg"}}

for chunk in graph.stream({"messages": [{"role": "user", "content": input_message}]}, stream_mode="values", config=config):
    for state_key, state_value in chunk.items():
        if state_key == "messages":
            state_value[-1].pretty_print()


What is the course lesson for day 7?

What is the course lesson for day 7?

What is the course lesson for day 7?

Course answer:  The concept of sets and set theory is crucial in computer programming and mathematics. In this context, a set is a collection of distinct elements. The elements can be anything, such as numbers, characters, or even other sets. Here are some important characteristics of sets:

1. Each element in the set should be unique, meaning there cannot be more than one instance of the same element.
2. The order in which the elements appear does not matter, since sets are typically unordered.
3. An element can either belong to a set or not. There is no concept of "between" when it comes to set membership.
4. Sets are denoted using curly braces {}. For example, {0, x, ö, 9, 12, A} represents the set containing the integers 0, 9, and 12, as well as the characters x, ö, and A.

Character Sets in Assembly Programming:
In Assembly programming for TI-83/84 calculators, charac