In [1]:
from logging import config
# from langchain.embeddings import OpenAIEmbeddings
from langchain.text_splitter import RecursiveCharacterTextSplitter
# from langchain.vectorstores import Chroma
import psycopg2
import openai
from langchain_core.prompts import ChatPromptTemplate
from langchain.tools import Tool
from langchain_core.tools import BaseTool, tool
from core.llm import get_model
from core.settings import settings
from langchain.agents import initialize_agent, AgentType
from langgraph.managed import RemainingSteps
from agents.llama_guard import LlamaGuardOutput
from langgraph.graph import END, MessagesState, StateGraph
from typing import Any
from langchain_core.runnables import RunnableConfig, RunnableLambda, RunnableSerializable
from langchain_core.messages import BaseMessage, AIMessage, convert_to_messages
from langchain.agents import load_tools, create_react_agent, AgentExecutor
from langchain_core.output_parsers import StrOutputParser
from langchain.agents.output_parsers.openai_tools import OpenAIToolsAgentOutputParser
from langchain.sql_database import SQLDatabase
from langchain_core.output_parsers import StrOutputParser
from langchain_core.runnables import RunnablePassthrough
from langchain_groq import ChatGroq
import re
from typing import Literal
from pydantic import BaseModel, Field
from langchain_ollama import OllamaEmbeddings, ChatOllama
from langchain_openai import ChatOpenAI
 
DB_CONNECTION_STRING = "postgresql://postgres:123456@host.docker.internal:5433/intellab-db"
MAX_STRING_LENGTH = 1000000
# DB_CONNECTION_STRING = "postgresql://postgres:123456@localhost:5433/intellab-db"

class AgentState(MessagesState, total=False):
    """`total=False` is PEP589 specs.

    documentation: https://typing.readthedocs.io/en/latest/spec/typeddict.html#totality
    """
    course_name: str
    course_id: str
    response: str
    is_contained: bool
    
    
def get_schema(_):
    db = SQLDatabase.from_uri(DB_CONNECTION_STRING)  # Adjust as needed
    schema = db.get_table_info()
    return schema

# template = """
# You are tasked with summarizing lessons from a course based on a given course name. Use the provided table schema, question, SQL query, and SQL response to generate a natural language response.
# Do not need to tell the process, just return the narutal summarization response with at least 150 words and do not abbreviate
# Prompt Template:

# Schema:
# {schema}

# Task: Based on the table schema, question, SQL query, and SQL response:

# Generate the SQL query below by replacing the placeholder {course_id} with the actual course name provided by the user.
# SQL Query:
# SELECT lesson_name, content FROM lessons WHERE course_id = '{course_id}';

# Once the query is executed, use the query result ({response}) to summarize the lessons of {course_name} and generate a natural language response to the question.

# Output format: just return the summary content, not SQL generation

# {response}
# Question: Summarize all lessons for the course with name {course_name}.
# """
template = """
You are tasked with summarizing lessons from a course based on a given course name. Use the provided question, SQL query, and SQL response to generate a natural language response.
Do not need to tell the process, just return the narutal summarization response with AT LEAST 150 words
Prompt Template:

Task: Based on the table schema, question, SQL query, and SQL response:

Use the query result ({response}) to summarize the lessons of {course_name} and generate a natural language response to the question.

Output format: just return the summary content, not SQL generation

{response}
Question: Summarize all lessons for the course with name {course_name}.
"""
prompt_response = ChatPromptTemplate.from_template(template)

def run_query(course_id):
    db = SQLDatabase.from_uri(DB_CONNECTION_STRING)  # Adjust as needed
    query = f"SELECT lesson_name, content FROM lessons WHERE course_id = '{course_id}' LIMIT 3"
    return db.run(query)

# llm = ChatGroq(model="llama-3.3-70b-versatile", temperature=0.0)
# llm = ChatOpenAI(model="gpt-4o-mini", temperature=0.0)

# --- Utilites ---
def extract_course_info(input_string):
    # Regular expression to match the course name, ID, and regenerate flag
    pattern = r"course name: (.*?), id: (.*?), regenerate: (true|false)"
    
    # Search the string for matches
    match = re.search(pattern, input_string, re.IGNORECASE)
    
    if match:
        # Extracted groups
        course_name = match.group(1)
        course_id = match.group(2)
        regenerate = match.group(3).lower() == 'true'  # Convert to boolean
        return {
            "course_name": course_name,
            "course_id": course_id,
            "regenerate": regenerate
        }
    else:
        raise ValueError("Input string does not match the expected format.")


# ---- extract message node ----
def extract_message(state: AgentState) -> Literal["check_contained_summary", "generate"]:
    print("-------- EXTRACT MESSAGE ---------")
    message_content = state["messages"][-1].content
    extract_values = extract_course_info(message_content)
    print(extract_values)
    return {
        "course_name": extract_values["course_name"],
        "course_id": extract_values["course_id"],
        "regenerate": extract_values["regenerate"]
    }

# check summary content tool:
# contain -> finalize response, otherwise generate

# ----  check contained summary CONDITIONAL node----
def check_contained_summary(state: AgentState) -> Literal["retrieve_existing", "generate"]:
    print("-------- CHECK CONTAINED SUMMARY ---------")
    course_name = state['course_name']
    regenerate = state['regenerate']
    query = f"""
        SELECT summary_content
        FROM course_summary
        WHERE course_name = '{course_name}';
    """
    db = SQLDatabase.from_uri(DB_CONNECTION_STRING)  # Adjust as needed
    result = db.run(query)
    if result == '' or regenerate:
        print("-------- REGENERATE --------")
        return "generate"
    print("-------- RETRIEVE EXISTING --------")
    return "retrieve_existing"

# ---- retrieve existing summary content node ----
def retrieve_existing(state: AgentState):
    print("------- EXISTED --------")
    course_name = state['course_name']
    query = f"""
        SELECT summary_content
        FROM course_summary
        WHERE course_name = '{course_name}';
    """
    db = SQLDatabase.from_uri(DB_CONNECTION_STRING, max_string_length=MAX_STRING_LENGTH)  # Adjust as needed
    result = db.run(query)
    cleaned_string = re.sub(r"^\[\('", "", result)
    cleaned_string = re.sub(r"\',\)\]$", "", cleaned_string)
    return {"response": cleaned_string}

# ---- generate node ----
def generate(state: AgentState, config: RunnableConfig):
    print("-------- GENERATE ---------")
    llm = get_model(config["configurable"].get("model", settings.DEFAULT_MODEL))
    full_chain = (
        RunnablePassthrough.assign(
            schema=get_schema,
            response=lambda vars: run_query(vars["course_id"]),
        )
        | prompt_response
        | llm
        | StrOutputParser()
    )
    response = full_chain.invoke({"course_name": state["course_name"], "course_id": state["course_id"]})
    with open("hehet.txt", "w") as f:
        f.write(response)
    return {"response": response}
 
# compare new with existing content
# more informative and valuable -> store to db, otherwise ignore new content and get the existing
class ComparisonContent(BaseModel):
    """Binary score to assess the informative between generated content and existing content."""

    binary_score: str = Field(
        description="Answer addresses the question, 'yes' or 'no'"
    )
    
CHECKING_SYSTEM = """
You are the grader system assessing whether the new summary {new_summary} of {course_name} is more informative and valuable than the existing summary content {existing_content}.
Give a binary score 'yes' or 'no', where 'yes' means that the answer is new summary content more informative and valuable than existing content.
"""

CHECKING_PROMPT = ChatPromptTemplate.from_template(CHECKING_SYSTEM)

# ---- retrieve existing summary CONDITIONAL node
def retrieve_existing_summary(state: AgentState, config: RunnableConfig) -> Literal["finalize_response", "store_summary"]:
    print("-------- Retrieve existing Summary ---------")
    llm = get_model(config["configurable"].get("model", settings.DEFAULT_MODEL))
    query = f"""
        SELECT summary_content
        FROM course_summary
        WHERE course_name = '{state['course_name']}';
    """
    db = SQLDatabase.from_uri(DB_CONNECTION_STRING, max_string_length=MAX_STRING_LENGTH)  # Adjust as needed
    result = db.run(query)
    cleaned_string = re.sub(r"^\[\('", "", result)
    cleaned_string = re.sub(r"\',\)\]$", "", cleaned_string)
    
    model = CHECKING_PROMPT | llm.with_structured_output(ComparisonContent)
    comparison_grade: ComparisonContent = model.invoke({"existing_content": cleaned_string, "new_summary": state["response"], "course_name": state["course_name"]})
    if comparison_grade.binary_score == "no":
        print("-------- NO - RESPONSE --------")
        return "finalize_response"
    else:
        print("-------- YES - STORE --------")
        return "store_summary"

# store content to db
# ---- store summary node ----
def store_summary(state: AgentState):
    print("-------- STORE SUMMARY --------")
    new_content = state["response"]
    course_name = state["course_name"]
    course_id = state["course_id"]
    
    query = f"""
        SELECT summary_content
        FROM course_summary
        WHERE course_name = '{course_name}';
    """
    db = SQLDatabase.from_uri(DB_CONNECTION_STRING)  # Adjust as needed
    result = db.run(query)
    if result == '':
        # No summary record exists for the course, so insert new content
        insert_query = f"""
            INSERT INTO course_summary (course_id, course_name, summary_content)
            VALUES ('{course_id}', '{course_name}', '{new_content}');
        """
        db.run(insert_query)
        print(f"New summary added for course: {course_name}.")
    else:
        # Summary record exists but content is NULL, so update it
        update_query = f"""
            UPDATE course_summary
            SET summary_content = '{new_content}'
            WHERE course_id = '{course_id}';
        """
        db.run(update_query)
        print(f"Summary updated for course: {course_name}.")
    # else:
    #     print(f"Summary already exists for course: {course_name}. No action needed.")
    return {"response": new_content}

# ---- finalize response node ----
def finalize_response(state: AgentState):
    print("---FINALIZING THE RESPONSE---")
    print(state["response"])
    return {"messages": [AIMessage(content=state["response"])]}


agent = StateGraph(AgentState)

agent.add_node("extract_message", extract_message)
agent.add_node("generate", generate)
agent.add_node("retrieve_existing", retrieve_existing)
agent.add_node("store_summary", store_summary)
agent.add_node("finalize_response", finalize_response)

agent.set_entry_point("extract_message")
agent.add_edge("retrieve_existing", "finalize_response")
agent.add_edge("store_summary", "finalize_response")
agent.add_edge("finalize_response", END)

agent.add_conditional_edges(
    "extract_message",
    check_contained_summary
)

agent.add_conditional_edges(
    "generate",
    retrieve_existing_summary
)


summarize_assistant = agent.compile()


# inputs = {"messages": [("human", "The Logic Building Problems")]}
# for output in summarize_assistant.stream(inputs):
#     print(output)
#     print("\n---\n")

ModuleNotFoundError: No module named 'core'

In [15]:
from datetime import datetime
from fpdf import FPDF
import re
import markdown

output = """Lesson: Implement a stack using singly linked list\\n\\n• **Push Operation**: The push operation involves creating a new node, updating its data, linking it to the top of the linked list, and updating the top pointer. This operation has a time complexity of O(1) because it only requires traversing the current pointer.\\n• **Pop Operation**: The pop operation involves removing the first node from the linked list by updating the head pointer to the next node in the list. This operation also has a time complexity of O(1) for the same reason as the push operation.\\n• **Time and Auxiliary Space Complexity Analysis**: Both push and pop operations have a time complexity of O(1), making them efficient for stack operations. However, the auxiliary space complexity is O(N), where N is the size of the stack, because each node in the linked list requires additional memory.\\n\\nHere is the summary of the lesson:\\n\\nLesson: Applications, Advantages and Disadvantages of Stack\\n\\n• **Function calls**: Stacks are used to keep track of return addresses of function calls, allowing programs to return to the correct location after a function has finished executing. This is achieved through push and pop operations on the stack.\\n• **Efficiency**: Push and pop operations on a stack can be performed in constant time (O(1)), providing efficient access to data. This makes stacks suitable for applications where fast data retrieval is crucial.\\n• **Last-in, First-out (LIFO) principle**: Stacks follow the LIFO principle, ensuring that the last element added to the stack is the first one removed. This behavior is useful in many scenarios, such as function calls and expression evaluation.\\n\\nNote: I did not include the other concepts mentioned in the lesson content, as they are more related to specific applications of stacks rather than fundamental principles or techniques.\\n\\nLesson: What is Stack Data Structure? A Complete Tutorial\\n\\n• **LIFO Principle**: The stack follows the Last In First Out (LIFO) principle, where elements are added and removed in reverse order of their addition.\\n• **Types of Stacks**: There are two types of stacks: Fixed Size Stack and Dynamic Size Stack. Fixed size stacks have a fixed capacity, while dynamic size stacks can grow or shrink dynamically.\\n• **Basic Operations on Stack**: The basic operations on a stack include push, pop, top, isEmpty, and isFull. These operations allow for the manipulation of elements in the stack, including adding, removing, and checking the status of the stack.\\n\\nNote: Complexity analysis is not explicitly mentioned in the provided content, but based on general knowledge, the time complexity for these operations would be O(1) for push, pop, top, isEmpty, and isFull.\\n\\nLesson: Implement Stack using Array\\n\\n• **Initialization of Stack**: The stack is initialized by creating an array, treating its end as the top of the stack, and defining a capacity for the stack.\\n• **Push Operation**: The push operation adds an item to the stack. If the stack is full, it results in an overflow condition. The algorithm checks if the stack is full before pushing the element, and if so, it cannot be inserted into the stack.\\n• **Pop Operation**: The pop operation removes an item from the stack. If the stack is empty, it results in an underflow condition. The algorithm checks if the stack is empty before popping the element, and if so, it cannot remove any element from the stack.\\n\\nNote: The complexity analysis for each operation is as follows:\\n- Time Complexity: push (O(1)), pop (O(1)), peek (O(1)), isEmpty (O(1)), isFull (O(1))\\n- Auxiliary Space: O(n), where n is the number of items in the stack.\\n\\nBased on the provided lesson summaries, here\'s a comprehensive overview of the key concepts and recurring techniques:\\n\\n**Overview**\\n\\nThe course covers the fundamental principles and techniques of implementing a stack data structure using different approaches. The core concept of a stack is introduced, along with its applications, advantages, and disadvantages.\\n\\n**Key Concepts**\\n\\n1. **Last-In-First-Out (LIFO) Principle**: Stacks follow this principle, ensuring that the last element added to the stack is the first one removed.\\n2. **Basic Operations**: Push, pop, top, isEmpty, and isFull are the basic operations on a stack, allowing for manipulation of elements in the stack.\\n3. **Time and Auxiliary Space Complexity Analysis**: Both push and pop operations have a time complexity of O(1), making them efficient for stack operations. However, auxiliary space complexity is O(N) due to the additional memory required for each node.\\n\\n**Recurring Techniques**\\n\\n1. **Singly Linked List Implementation**: The first lesson introduces implementing a stack using a singly linked list, which provides an efficient way to perform push and pop operations.\\n2. **Array-Based Implementation**: The third lesson covers implementing a stack using an array, which is useful when the capacity of the stack needs to be fixed or dynamic.\\n\\n**Common Themes**\\n\\n1. **Efficiency**: Stacks are designed to provide efficient access to data through constant-time push and pop operations.\\n2. **Last-In-First-Out (LIFO) Principle**: The LIFO principle is a fundamental property of stacks, ensuring that elements are removed in the reverse order of their addition.\\n\\n**Applications**\\n\\n1. **Function Calls**: Stacks are used to keep track of return addresses of function calls, allowing programs to return to the correct location after a function has finished executing.\\n2. **Expression Evaluation**: Stacks can be used to evaluate expressions by following the LIFO principle.\\n\\nOverall, the course provides a comprehensive introduction to stacks and their applications, highlighting the importance of efficiency, LIFO principle, and basic operations in implementing stack data structures using different approaches."""
formatted_content = re.sub(r"\\n", "\n", output)
# Convert Markdown to HTML
html_text = markdown.markdown(formatted_content)

pdf_path ="./summary.pdf"
pdf = FPDF()
pdf.add_page()

# Load a Unicode font (make sure DejaVuSans.ttf is in the same directory)
pdf.add_font("DejaVu", "", "../documents/DejaVuSans.ttf", uni=True)
pdf.add_font("DejaVu", "B", "../documents/DejaVuSans-Bold.ttf", uni=True)  # Bold font

pdf.set_font("DejaVu", size=10)
pdf.cell(200, 10, txt="Summary", ln=True, align="C")
pdf.cell(200, 10, txt=f"Date: {datetime.now().strftime('%Y-%m-%d')}", ln=True, align="L")
pdf.multi_cell(0, 10, txt=formatted_content)

# pdf.write_html(html_text)
pdf.output("summary.pdf")
print("✅ PDF successfully generated!")
# # Save to a Markdown file
# md_file_path = "summary.md"
# with open(md_file_path, "w", encoding="utf-8") as md_file:
#     md_file.write(formatted_content)

# print(f"✅ Markdown file saved as {md_file_path}")

✅ PDF successfully generated!


  pdf.add_font("DejaVu", "", "../documents/DejaVuSans.ttf", uni=True)
  pdf.add_font("DejaVu", "B", "../documents/DejaVuSans-Bold.ttf", uni=True)  # Bold font
  pdf.cell(200, 10, txt="Summary", ln=True, align="C")
  pdf.cell(200, 10, txt="Summary", ln=True, align="C")
  pdf.cell(200, 10, txt=f"Date: {datetime.now().strftime('%Y-%m-%d')}", ln=True, align="L")
  pdf.cell(200, 10, txt=f"Date: {datetime.now().strftime('%Y-%m-%d')}", ln=True, align="L")
  pdf.multi_cell(0, 10, txt=formatted_content)


In [None]:
from fpdf import FPDF
import re

# Function to detect "Lesson:" and apply bold formatting
def process_markdown(md_text, pdf):
    lines = md_text.split("\n")  # Split content into lines
    for line in lines:
        # Check if the line starts with "Lesson:"
        lesson_match = re.match(r"^(Lesson: )(.*)", line)
        if lesson_match:
            # Extract lesson label and content
            lesson_label = lesson_match.group(1)  # "Lesson: "
            lesson_content = lesson_match.group(2)  # "What is Stack Data Structure? A Complete Tutorial"
            
            # Apply bold to "Lesson:" part
            pdf.set_font("DejaVu", "B", 12)  # Set bold font
            pdf.multi_cell(0, 10, lesson_label, align="L")
            
            # Normal font for content
            pdf.set_font("DejaVu", "", 12)
            pdf.multi_cell(0, 10, lesson_content, align="L")
        else:
            # Normal text processing
            pdf.multi_cell(0, 10, line, align="L")

# Read the Markdown file
with open("summary.md", "r", encoding="utf-8") as file:
    markdown_content = file.read()

# Create a PDF document
pdf = FPDF()
pdf.add_page()
pdf.add_font("DejaVu", "", "../documents/DejaVuSans.ttf", uni=True)  # Load font
pdf.add_font("DejaVu", "B", "../documents/DejaVuSans-Bold.ttf", uni=True)  # Load bold font
pdf.set_font("DejaVu", size=9)

# Add title
pdf.set_font("DejaVu", "B", 14)
pdf.cell(200, 10, txt="Summary", ln=True, align="C")
pdf.ln(10)  # Line break

# Process markdown content and apply bold to "Lesson:"
pdf.set_font("DejaVu", "", 12)
process_markdown(markdown_content, pdf)

# Save the PDF
pdf.output("summary.pdf")

print("✅ PDF successfully generated with 'Lesson:' in bold!")


  pdf.add_font("DejaVu", "", "../documents/DejaVuSans.ttf", uni=True)  # Load font
  pdf.add_font("DejaVu", "B", "../documents/DejaVuSans-Bold.ttf", uni=True)  # Load bold font
  pdf.cell(200, 10, txt="Summary", ln=True, align="C")
  pdf.cell(200, 10, txt="Summary", ln=True, align="C")


FPDFException: Not enough horizontal space to render a single character