In [4]:
from langgraph.graph import END, MessagesState, StateGraph
from langchain_ollama import ChatOllama
from langchain_core.prompts import ChatPromptTemplate
from langchain_core.messages import HumanMessage
import re


In [61]:
template = """You are an expert in reviewing code. Your mission is analyze complexity, how to optimize to improve performance and reformat following style guides from the given problem and solution. Give feedback to code naturally, meaningful and understandable.
Problem: {problem}
User's solution: {solution}
Answer includes these information:
- Time complexity from the given solution:
- Space complexity from the given solution:
- Give feedback
- Suggest better code"""

problem = """Given an array of integers, return indices of the two numbers such that they add up to a specific target.

You may assume that each input would have exactly one solution, and you may not use the same element twice.

Example:

Given nums = [2, 7, 11, 15], target = 9,

Because nums[0] + nums[1] = 2 + 7 = 9, return [0, 1]."""

solution = """let nums = [3, 3];
let target = 6;

const twoSum = (nums, target) => {
  for(let i = 0; i < nums.length; i++){
    for(let j = i + 1; j < nums.length; j++){
      if(nums[i] + nums[j] === target){
        return [i, j]
      }
    }
  }
  return null;
} 
twoSum(nums, target)"""
class AgentState(MessagesState):
    problem: str
    solution: str
prompt = ChatPromptTemplate.from_template(template)
def extract_message(state: AgentState):
  message = state["messages"][-1].content
  # print (f"========= {message} ============")
  pattern = r"Problem:\s*(.*?)\s*Solution:\s*(.*?)"

  match = re.search(pattern, message, re.DOTALL)
  # print (f"========= {match} ============")

  if match:
      problem = match.group(1)
      solution = match.group(2)
      print("Problem:", problem)
      print("Solution:", solution)
      return {"problem": problem, "solution": solution}
  else:
      return {"problen": "", "solution": ""}
    
def acall_model(state: AgentState):
    
    problem = state["problem"]
    solution = state["solution"]
    
    model = ChatOllama(model="codeqwen", base_url="http://localhost:11434")
    review_agent = prompt | model
    response = review_agent.invoke({"problem": problem, "solution": solution})
    return {"messages": [response]}

# Define the graph
agent = StateGraph(MessagesState)
agent.add_node("model", acall_model)
agent.add_node("extract", extract_message)
agent.add_edge("extract", "model")
agent.set_entry_point("extract")
agent.add_edge("model", END)
chatbot = agent.compile(
)

In [62]:
inputs = {"messages": [("human", f"Problem: {problem} Solution: {solution}")]}
response = chatbot.invoke(inputs)
response

Problem: Given an array of integers, return indices of the two numbers such that they add up to a specific target.

You may assume that each input would have exactly one solution, and you may not use the same element twice.

Example:

Given nums = [2, 7, 11, 15], target = 9,

Because nums[0] + nums[1] = 2 + 7 = 9, return [0, 1].
Solution: 


{'messages': [HumanMessage(content='Problem: Given an array of integers, return indices of the two numbers such that they add up to a specific target.\n\nYou may assume that each input would have exactly one solution, and you may not use the same element twice.\n\nExample:\n\nGiven nums = [2, 7, 11, 15], target = 9,\n\nBecause nums[0] + nums[1] = 2 + 7 = 9, return [0, 1]. Solution: let nums = [3, 3];\nlet target = 6;\n\nconst twoSum = (nums, target) => {\n  for(let i = 0; i < nums.length; i++){\n    for(let j = i + 1; j < nums.length; j++){\n      if(nums[i] + nums[j] === target){\n        return [i, j]\n      }\n    }\n  }\n  return null;\n} \ntwoSum(nums, target)', additional_kwargs={}, response_metadata={}, id='6401ada5-14a6-4b34-aeee-86d022204503'),
  AIMessage(content="Thank you for sharing your solution. Here is a detailed review of your code, including its time and space complexities, as well as some suggestions for optimizing it according to best practices.\n\n**Time Complexity

In [64]:
with open("hehe.txt", "w") as f:
    f.write(response["messages"][-1].content)

In [17]:
def extract_message(m):
  message = m
  pattern = r"Problem:\s*(.*?)\s*Solution:\s*(.*?)"

  match = re.search(pattern, message, re.DOTALL)

  if match:
      problem = match.group(1)
      solution = match.group(2)
      print("Problem:", problem)
      print("Solution:", solution)
      return {"problem": problem, "solution": solution}

t = extract_message(f"Problem: {problem} Solution: {solution}")
t

Problem: Given an array of integers, return indices of the two numbers such that they add up to a specific target.

You may assume that each input would have exactly one solution, and you may not use the same element twice.

Example:

Given nums = [2, 7, 11, 15], target = 9,

Because nums[0] + nums[1] = 2 + 7 = 9, return [0, 1].
Solution: 


{'problem': 'Given an array of integers, return indices of the two numbers such that they add up to a specific target.\n\nYou may assume that each input would have exactly one solution, and you may not use the same element twice.\n\nExample:\n\nGiven nums = [2, 7, 11, 15], target = 9,\n\nBecause nums[0] + nums[1] = 2 + 7 = 9, return [0, 1].',
 'solution': ''}

In [84]:
from typing import List
from langchain_ollama import ChatOllama, OllamaEmbeddings
from langchain_text_splitters import RecursiveCharacterTextSplitter
import psycopg2
import json
from langchain_community.vectorstores import PGVector
from langchain_community.document_loaders import JSONLoader
from pprint import pprint
from langchain_core.documents import Document
from langchain_community.query_constructors.pgvector import PGVectorTranslator
from langchain.chains.query_constructor.schema import AttributeInfo
from langchain.retrievers.self_query.base import SelfQueryRetriever
from langchain.chains.query_constructor.base import get_query_constructor_prompt
from langchain.chains.query_constructor.base import StructuredQueryOutputParser
from langchain_core.output_parsers import StrOutputParser

from langchain_core.prompts import ChatPromptTemplate
from langchain_core.runnables import RunnablePassthrough, RunnableParallel
def get_lessons_with_metadata(connection_string: str):
    """
    Connects to PostgreSQL, retrieves lesson and course data, 
    and returns a list of LangChain Documents with metadata.
    """
    query = """
    SELECT 
        l.lesson_id, l.content, l.description AS lesson_description, l.lesson_name,
        c.course_id, c.course_name, c.description AS course_description
    FROM lessons l
    JOIN courses c ON l.course_id = c.course_id;
    """
    
    try:
        # Connect to PostgreSQL database
        conn = psycopg2.connect(connection_string)
        cursor = conn.cursor()
        
        # Execute query
        cursor.execute(query)
        rows = cursor.fetchall()
        
        # Process results into LangChain Documents
        documents = []
        for row in rows:
            lesson_id, content, lesson_description, lesson_name, course_id, course_name, course_description = row
            
            metadata = {
                "lesson_id": lesson_id,
                "course_id": course_id,
                "course_name": course_name,
                "lesson_description": lesson_description,
                "course_description": course_description,
            }
            
            # doc = Document(page_content=content, metadata=metadata)
            documents.append({"page_content": content, "metadata": metadata})
        
        # Write documents to a file
        with open('/Users/mac/HCMUS/datn/agent-service-toolkit/src/researchs/lesson_documents.json', 'w') as f:
            json.dump(documents, f, indent=4)
        
        return documents
        
    except Exception as e:
        print(f"Error: {e}")
        return []
    
    finally:
        if cursor:
            cursor.close()
        if conn:
            conn.close()

def chunk_data(data):
    ''' Function to split documents into chunks '''
    text_splitter = RecursiveCharacterTextSplitter(chunk_size=4000, chunk_overlap=100)
    chunks = text_splitter.split_documents(data)
    return chunks

def create_embeddings():
    ''' Function to create vector embeddings '''
    ollama_embeddings = OllamaEmbeddings(model="nomic-embed-text", base_url="http://localhost:11434")
    return ollama_embeddings

def stuff_vectordatabase(chunks, embeddings, collection_name, connection_str):
    ''' Function to load the chunks into the vector database '''
    docsearch = PGVector.from_documents(documents=chunks, embedding=embeddings, connection_string=connection_str, collection_name=collection_name)
    return docsearch

# Example usage
if __name__ == "__main__":
    connection_string = "postgresql://postgres:123456@localhost:5433/intellab-db"

    documents: List[Document] = []
    # lesson_documents = get_lessons_with_metadata(connection_string)
    # for doc in lesson_documents:
    #     document = Document(page_content=doc["page_content"], metadata=doc["metadata"])
    #     documents.append(document)
    # print(len(documents))
    # chunks = chunk_data(documents[:2])
    embeddings = create_embeddings()
    # vectorstore = stuff_vectordatabase(chunks=chunks,embeddings=embeddings,collection_name="lesson_content", connection_str=connection_string)
    vectorstore = PGVector(embedding_function=embeddings, collection_name="lesson_content", connection_string=connection_string)
    # print("### Done!")
    metadata_field_info = [
        AttributeInfo(
            name="lesson_id",
            description="The unique identifier for the lesson",
            type="string",
        ),
        AttributeInfo(
            name="course_id",
            description="The unique identifier for the course",
            type="string",
        ),
        AttributeInfo(
            name="course_name",
            description="The name of the course",
            type="string",
        ),
        AttributeInfo(
            name="lesson_description",
            description="A brief description of the lesson content",
            type="string",
        ),
        AttributeInfo(
            name="course_description",
            description="A detailed description of the course, including learning objectives and topics covered",
            type="string",
        ),
    ]
    
    # Examples for few-shot learning
    examples = [
        (
            "Summarize each lessons from course 'Matrix Data Structure Guide' with course id '4e26b4bd-d406-4641-9d68-3ba8e1c39c97'",
            {
                "query": "all lessons of course",
                "filter": 'and(in("course_name", ["Matrix Data Structure Guide"]),in("course_id", ["4e26b4bd-d406-4641-9d68-3ba8e1c39c97"]))',
            },
        ),
    ]
    

    document_content_description = "Lesson content about datastructure and algorithms techniques"
    
    # Create constructor prompt
    constructor_prompt = get_query_constructor_prompt(
        document_content_description,
        metadata_field_info,
        allowed_comparators=PGVectorTranslator.allowed_comparators,
        examples=examples,
    )
    
    llm = ChatOllama(model="llama3.2", temperature=0, base_url="http://localhost:11434")
    # Create query constructor
    output_parser = StructuredQueryOutputParser.from_components()  
   
    query_constructor = constructor_prompt | llm | output_parser

    # Initialize the Self-Query Retriever
    retriever = SelfQueryRetriever(
        query_constructor=query_constructor,
        vectorstore=vectorstore,
        structured_query_translator=PGVectorTranslator(),
        search_kwargs={'k': 5}
    )


    # retriever = SelfQueryRetriever.from_llm(
    #     llm, vectorstore, document_content_description, metadata_field_info, verbose=True
    # )
    # response = retriever.invoke("What are the name of all lessons in The Logic Building Problems courses?")
    # print(response)
    template = '''You are a summary expert in summarizing lessons from courses about programming. Reply in a professional and friendly tone with each lesson is a bullet point starts with its lessons name and lesson content.
    
    Use this context to answer the question:
    {context}
    Question: {question}'''
    
    prompt = ChatPromptTemplate.from_template(template)

    def format_docs(docs):
        return "\n\n".join(f"{doc.page_content}\n\nMetadata: {doc.metadata}" for doc in docs)

    # Create a chatbot Question & Answer chain from the retriever
    rag_chain_from_docs = (
        RunnablePassthrough.assign(
            context=(lambda x: format_docs(x["context"])))
        | prompt
        | llm
        | StrOutputParser()
    )

    rag_chain_with_source = RunnableParallel(
        {"context": retriever, "question": RunnablePassthrough()}
    ).assign(answer=rag_chain_from_docs)

  vectorstore = PGVector(embedding_function=embeddings, collection_name="lesson_content", connection_string=connection_string)


{'context': [], 'question': {"Summarize each lessons from course 'Matrix Data Structure Guide' with course id '4e26b4bd-d406-4641-9d68-3ba8e1c39c97'"}, 'answer': 'I\'d be happy to help summarize the lessons from the "Matrix Data Structure Guide" course. Here are the key takeaways:\n\n• **Lesson 1: Introduction to Matrices**\nThe course introduces the concept of matrices, a fundamental data structure in linear algebra and computer science. Key concepts covered include matrix representation, operations (addition, multiplication), and properties (symmetry, skew-symmetry).\n\n• **Lesson 2: Matrix Representation**\nThis lesson delves into the different ways to represent matrices, including:\n\t+ Row-major and column-major storage\n\t+ Matrix dimensions and indexing\n\t+ Matrix operations using NumPy arrays\n\n• **Lesson 3: Matrix Operations**\nThe course covers various matrix operations, such as:\n\t+ Addition and subtraction of matrices\n\t+ Multiplication of matrices (element-wise, matrix

In [89]:
 # Example user query
query = "Summarize each lessons from course 'Matrix Data Structure Guide' with course id '4e26b4bd-d406-4641-9d68-3ba8e1c39c97'"
# Perform the retrieval and generate the response
events = []
async for event in rag_chain_with_source.astream_events({query}, version="v1"):
    events.append(event)

# # Display the response
# print(response)

NotImplementedError in LogStreamCallbackHandler.on_llm_end callback: NotImplementedError('Trying to load an object that doesn\'t implement serialization: {\'lc\': 1, \'type\': \'not_implemented\', \'id\': [\'ollama\', \'_types\', \'Message\'], \'repr\': "Message(role=\'assistant\', content=\'\', images=None, tool_calls=None)"}')
NotImplementedError in LogStreamCallbackHandler.on_llm_end callback: NotImplementedError('Trying to load an object that doesn\'t implement serialization: {\'lc\': 1, \'type\': \'not_implemented\', \'id\': [\'ollama\', \'_types\', \'Message\'], \'repr\': "Message(role=\'assistant\', content=\'\', images=None, tool_calls=None)"}')


In [91]:
events

[{'event': 'on_chain_start',
  'run_id': '76734992-52d1-4ebc-b1e5-75b0ae04e269',
  'name': 'RunnableSequence',
  'tags': [],
  'metadata': {},
  'data': {'input': {"Summarize each lessons from course 'Matrix Data Structure Guide' with course id '4e26b4bd-d406-4641-9d68-3ba8e1c39c97'"}},
  'parent_ids': []},
 {'event': 'on_chain_start',
  'name': 'RunnableParallel<context,question>',
  'run_id': 'bdc2e337-59e2-434a-8c4c-450923e2ed2b',
  'tags': ['seq:step:1'],
  'metadata': {},
  'data': {},
  'parent_ids': []},
 {'event': 'on_retriever_start',
  'name': 'SelfQueryRetriever',
  'run_id': 'b2c3c0f5-26c4-4714-b6f6-8526eede6def',
  'tags': ['map:key:context'],
  'metadata': {'ls_retriever_name': 'selfquery'},
  'data': {'input': {'query': {"Summarize each lessons from course 'Matrix Data Structure Guide' with course id '4e26b4bd-d406-4641-9d68-3ba8e1c39c97'"}}},
  'parent_ids': []},
 {'event': 'on_chain_start',
  'name': 'RunnablePassthrough',
  'run_id': 'a7c74e0c-d83c-4972-b511-6f70e25c7

In [95]:
# Custom JSON Encoder to handle unsupported objects
class CustomJSONEncoder(json.JSONEncoder):
    def default(self, obj):
        if isinstance(obj, set):
            return list(obj)  # Convert sets to lists
        return str(obj)  # Convert unsupported objects to string
# Write to a JSON file using the custom encoder
with open("output.json", "w", encoding="utf-8") as f:
    json.dump(events, f, indent=4, cls=CustomJSONEncoder)