## Importing libraries

In [37]:
from langchain_openai import OpenAIEmbeddings, ChatOpenAI
from langchain_chroma import Chroma
import re
from langchain.embeddings import OpenAIEmbeddings
from langchain.vectorstores import Chroma
from docx import Document
from langchain_core.tools import tool
from langchain_text_splitters import RecursiveCharacterTextSplitter
from langchain_community.graphs import Neo4jGraph
from langchain.chains import GraphCypherQAChain
from langchain_community.graphs import Neo4jGraph
from langchain.chains.graph_qa.cypher_utils import CypherQueryCorrector, Schema
from langchain.prompts import PromptTemplate
from neo4j import GraphDatabase
from langchain_core.prompts import ChatPromptTemplate
from langchain_core.runnables import Runnable, RunnableConfig
from datetime import datetime
from langchain_core.messages import HumanMessage

## Setting up open Ai env variable


In [38]:
import getpass
import os


def _set_env(var: str):
    if not os.environ.get(var):
        os.environ[var] = getpass.getpass(f"{var}: ")


_set_env("OPENAI_API_KEY")

## Setting up vector config tool

In [39]:
# Load the .docx file and extract the text content
config_document = Document("RTB Config document compilation.docx")

config_text = ""
for paragraph in config_document.paragraphs:
    config_text += paragraph.text + "\n"

# Split the config content into individual documents based on headings (assuming "##" as a delimiter)
text_splitter = RecursiveCharacterTextSplitter(chunk_size=2000, chunk_overlap=300)
config_doc_chunks = text_splitter.split_text(config_text)

# Initialize OpenAI embeddings for ChromaDB
OpenAIembedding = OpenAIEmbeddings(model="text-embedding-3-large")  # Updated model name

# Initialize ChromaDB vector store from text and embeddings
vector_store = Chroma.from_texts(config_doc_chunks, OpenAIembedding, collection_name="config_document")


# VectorStoreRetriever class using ChromaDB
class VectorStoreRetriever:
    def __init__(self, vector_store: Chroma):
        self.vector_store = vector_store

    def query(self, query: str, k: int = 5) -> list[dict]:
        # Perform a similarity search in ChromaDB
        results = self.vector_store.similarity_search_with_score(query, k=k)
        return [
            {
                "page_content": result[0].page_content,  
                "similarity_score": result[1],
                
            }
            for result in results
        ]

# Initialize retriever with the vector store
retriever = VectorStoreRetriever(vector_store)

@tool
def lookup_config(query: str) -> str:
    """Look up relevant parts of the config document based on the query."""
    docs = retriever.query(query, k=2)
    return "\n\n".join([doc["page_content"] for doc in docs])



## Setting up method parsing tool

In [None]:
from docx import Document
document = Document("Calss_export_word.docx")
document

text = ""
for paragraph in document.paragraphs:
    text += paragraph.text + "\n"

cleaned_text = text.replace('\xa0', ' ')


method_pattern = r"(?<!\w)METHOD\s+([\w_]+)\.\s*(.*?)\s*ENDMETHOD\."

# Extract methods and contents
methods_dict = {name.lower().strip(): content.strip() for name, content in re.findall(method_pattern, cleaned_text, re.DOTALL)}

def extract_method_details(text):
    method_pattern = r"class-methods\s+([^\s]+)\s+"
    importing_pattern = r"importing\s+(.*?)\s+(?:returning|exporting)"
    exporting_pattern = r"exporting\s+(.*?)\s*\."
    returning_pattern = r"returning\s+(.*?)\s*\."
    

    method_details = {}
    for match in re.finditer(method_pattern, text):
        method_name = match.group(1).lower()
        method_details[method_name] = {}

        importing_match = re.search(importing_pattern, text[match.end():])
        if importing_match:
            method_details[method_name]['importing'] = [param.strip() for param in importing_match.group(1).split('\n') if param.strip()]

        returning_match = re.search(returning_pattern, text[match.end():], re.DOTALL)
        if returning_match:
            method_details[method_name]['exporting'] = [param.strip() for param in returning_match.group(1).split('\n') if param.strip()]

        exporting_match = re.search(exporting_pattern, text[match.end():], re.DOTALL)
        if exporting_match:
            method_details[method_name]['exporting'] = [param.strip() for param in exporting_match.group(1).split('\n') if param.strip()]

    return method_details

method_params = extract_method_details(cleaned_text)
print(method_params)


consolidated_dict = {}

for method_name, method_content in methods_dict.items():
    consolidated_dict[method_name] = {
        "method_code": method_content,
        "importing_parameters": method_params.get(method_name, {}).get('importing', []),
        "exporting_parameters": method_params.get(method_name, {}).get('exporting', []),
        "returning_parameters": method_params.get(method_name, {}).get('returning', []),
    }

print(consolidated_dict)

class_pattern = r"class\s+([^\s]+)\s+"
class_name_match = re.search(class_pattern, cleaned_text)
class_name = class_name_match.group(1).lower() if class_name_match else "unknown_class"
class_details = {class_name: consolidated_dict}

# Tool to parse the class dictionary based on the method name.

@tool("parse_class_documentation", return_direct=True)
def parse_class_dict(method_name: str):
    """
    Parses the class documentation to extract the method details for a specific class and method.
    
    Arguments:
    - class_name: The class name for which the method belongs.
    - method_name: The specific method name for which the documentation is requested.

    Returns:
    - The method code and parameters as part of the documentation.
    """
    method_name = method_name.lower()
    if class_name in class_details and method_name in class_details[class_name]:
        method_code = class_details[class_name][method_name]['method_code']
        return method_code
    return "Method not found"

## Setting up the graph query tool

In [None]:
NEO4J_URI = "neo4j+s://909a82f6.databases.neo4j.io"  # or neo4j+s://xxxx.databases.neo4j.io
NEO4J_USERNAME = "neo4j"
NEO4J_PASSWORD = "ERHMOSLUToDtV33RdV3oRpne18Aoie82tOZVqAHl6KE"  # your password
NEO4J_DATABASE = "neo4j"

# Create a Neo4j driver
graph = Neo4jGraph(url=NEO4J_URI , username=NEO4J_USERNAME , password=NEO4J_PASSWORD, enhanced_schema=True)

# Initialize LLM
llm = ChatOpenAI(model = 'gpt-4o',temperature=0)

# Connect to Neo4j

driver = GraphDatabase.driver(uri=NEO4J_URI, auth=(NEO4J_USERNAME, NEO4J_PASSWORD))

@tool("generate_and_run_cypher_query", return_direct=True)
def generate_and_run_cypher_query(question: str):
    """
    Call this tool first.

    Tool to generate and execute a Cypher query based on the provided schema and user query.
    
    Arguments:
    - user_query: The user's input question that will be converted into a Cypher query to run on the graph database.

    Returns:
    - The results of the Cypher query executed on the graph. The results of the cypher query contains dependencies that will be useful in documentation generation
    """

    # Prompt for query generation
    query_prompt = PromptTemplate(
        input_variables=["schema", "question"],
        template="""   
        Generate a Cypher query to answer this question: {question}
    You are an expert in Neo4j and generating Cypher queries to query a graph database. Follow these instructions closely (non negotiable):

    Use only the provided relationship types and properties in the schema.
    Do not use any other relationship types or properties that are not provided.

    *Ensure case-insensitive name matching by converting both the stored name property and the input string to lowercase using toLower().*

    *If the Cypher query requires the following condition:
        WHERE toLower(method.name) = toLower("bdr_reject")
    Instead, try using:
        WHERE toLower(method.name) CONTAINS toLower("bdr_reject")*

    The schema includes the following entities: [Class, Method, Function, Variable, Table].

    If the exact answer is not found, attempt to use the in_community relationship, which can sometimes provide useful insights.

    Do not include any explanations, apologies, or additional text other than the Cypher query itself.
    Only generate the Cypher query that answers the given question.

    Replace "entity" with __Entity__ in all queries to reflect the schema's naming convention.


    Schema:
    {schema}
   
    Examples: Here are a few examples of generated Cypher statements for particular questions:
    # What are all the methods that are related to billing
    MATCH path = (entity:Method )-[r]-(billingcommunity:__Community__)
    WHERE toLower(billingcommunity.title) CONTAINS toLower("billing")
    RETURN path

    #How many methods exist in the class?
    MATCH path = (entity:Method)-[:RELATED]-(class:Class)
    RETURN count(path)

    #Methods with most dependencies?
    MATCH (method:Method)-[r]->(__Entity__)
    WHERE entity:Class OR entity:Variable OR entity:Table or entity:Method or entity:Function
    WITH method, count(r) AS dependenciesCount
    RETURN method.name AS MethodName, dependenciesCount
    ORDER BY dependenciesCount DESC

    #What are the functions present in a method?
    MATCH (method:Method)-[:RELATED]-(function:Function)
    RETURN method.name AS MethodName, function.name AS FunctionName

    MATCH (method:Method)-[:RELATED]-(function:Function)
    WHERE toLower(method.name) CONTAINS toLower("copy_clgrp_cond")
    RETURN function.name AS FunctionName
    full context: [{{'FunctionName': '/CGDC/CLRQ_DATA_GET'}}]
    the full context reurned from the query contains the answer to the question
    answer = '/CGDC/CLRQ_DATA_GET'

    #what are the functions used in a Get_Buffer_conditions method?
    MATCH (method:Method)-[:RELATED]-(function:Function)
    WHERE toLower(method.name) CONTAINS toLower("get_buffer_conditions")
    RETURN function.name AS FunctionName

    whenever you use entity in the query replace it with __Entity__ in the query
    in the response to questions like 

    "what are the entities that are related to SELECT_AMOUNTS_CALC_GRP_ROW "
    MATCH (method:Method)-[:RELATED]-(entity:__Entity__)
    WHERE toLower(method.name) CONTAINS toLower("copy_clgrp_cond")
    RETURN entity.name
  

  
        
        Cypher query:
        """
    )

    # Prompt for response generation
    response_prompt = PromptTemplate(
        input_variables=["question", "query_result"],
        template="""
        Question: {question}
        
        Based on the following query result:
        {query_result}
        
        Provide a concise answer:
        """
    )

    def get_schema():
        return graph.structured_schema
        

    def execute_query(query: str):
        with driver.session() as session:
            pattern = r"```cypher\s*(.*?)\s*```"

            # Find match
            match = re.search(pattern, query, re.DOTALL)

            # Check if match is found and print the result
            if match:
                clean_query = match.group(1).strip()  # Group 1 captures everything after 'cypher'
                # print("Text after 'cypher':", clean_query)
            else:
                print("No match found")
            result = session.run(clean_query)
            return [record.data() for record in result]

    def answer_question(question: str)-> str:
        question_conversion_prompt = f"""Extract the SAP method name from the user query: {question}. You can refer to the method names in {method_params.keys()}. 
        Return the response as find the entities in relation with <<extracted method name>>"""   
        question = llm.invoke(question_conversion_prompt)
        print("question_after conversion", question)    
        # Generate Cypher query
        schema = get_schema()
        query = llm.invoke(query_prompt.format(schema=schema, question=question))

        # Execute query
        result = execute_query(query.content) #the answers recieved by running the cypher query
        # print("result after eexcute_query", result)
        
        # Generate response
        response = llm.invoke(response_prompt.format(question=question, query_result=str(result)))
        
        return response
    return answer_question(question).content



## Testing the graph query tool

In [None]:
question = "What entities are related to COPY_CLGRP_COND method?"

# Generate and run the Cypher query using the dynamic schema
result = generate_and_run_cypher_query(question)
print(result)

## Setting up the function dictionary and tool

In [43]:
# Function parsing logic
def extract_function_details(text):
    # Adjusted regex pattern to match the function name
    function_name_pattern = r"(?<=FUNCTION\s)(\/\w+\/\w+)"
    # Adjusted regex pattern to match the entire function block
    function_pattern = r"FUNCTION\s+([\w_\/]+)\.\s*(.*?)\s*ENDFUNCTION\."

    # Dictionary to store function name as key and details as value
    function_details = {}

    # Find all function blocks
    for match in re.finditer(function_pattern, text, re.DOTALL):
        # Extract function name using the previously defined function_name_pattern
        function_name_match = re.search(function_name_pattern, match.group(0))
        if function_name_match:
            function_name = function_name_match.group(1).lower()  
            function_content = match.group(2).strip()  
            # Store content as the value
            # print(function_name)
            # print(function_content)
            function_details[function_name] = function_content

    return function_details

# Wrapping into a LangChain tool
@tool("parse_function_documentation", return_direct=True)
def parse_function_documentation(function_name: str):
    """
    Parses ABAP function documentation from the provided text.
    Extracts function names and their respective content between FUNCTION and ENDFUNCTION.
    Helpful in providing details of functions that can be used for understanding a method or class.
    """
    function_name = function_name.lower()
    for function_name in function_params.keys():
        if function_name in function_params:
            return function_params[function_name]

    return extract_function_details(function_text)

# Example usage of the tool

with open('Function clrq_data_get..txt', 'r') as file:
    function_text = file.read()

function_params = extract_function_details(function_text)


## Utility tools

In [55]:
## Pretty print the messages and log errors

from langchain_core.messages import ToolMessage
from langchain_core.runnables import RunnableLambda

from langgraph.prebuilt import ToolNode


def handle_tool_error(state) -> dict:
    error = state.get("error")
    tool_calls = state["messages"][-1].tool_calls
    return {
        "messages": [
            ToolMessage(
                content=f"Error: {repr(error)}\n please fix your mistakes.",
                tool_call_id=tc["id"],
            )
            for tc in tool_calls
        ]
    }


def create_tool_node_with_fallback(tools: list) -> dict:
    return ToolNode(tools).with_fallbacks(
        [RunnableLambda(handle_tool_error)], exception_key="error"
    )


def _print_event(event: dict, _printed: set, max_length=15000000):
    current_state = event.get("dialog_state")
    if current_state:
        print("Currently in: ", current_state[-1])
    message = event.get("messages")
    if message:
        if isinstance(message, list):
            message = message[-1]
        if message.id not in _printed:
            msg_repr = message.pretty_repr(html=True)
            print(msg_repr)
            _printed.add(message.id)

In [45]:
from typing import Annotated

from typing_extensions import TypedDict

from langgraph.graph.message import AnyMessage, add_messages


class State(TypedDict):
    messages: Annotated[list[AnyMessage], add_messages]

In [47]:
def set_method_params():
    global method_params
    method_params = extract_method_details(cleaned_text)

## Defining the Assistant

In [57]:
# Define the Assistant class
class Assistant:
    def __init__(self, runnable: Runnable, graph=None, method_params=None):
        #graph = Neo4jGraph(url=NEO4J_URI , username=NEO4J_USERNAME , password=NEO4J_PASSWORD, enhanced_schema=True)
        self.runnable = runnable
        #self.graph = graph
        self.method_params = method_params

    def __call__(self, state: dict, config: RunnableConfig, method_params = method_params):
        while True:
            print(f"Configuration received: {config}")
            messages = state.get("messages", [])
            print(messages)

            # Extract the user query from the messages and send the method name to generate_and_run_cypher_query tool
            if "generate_and_run_cypher_query" in self.runnable.get_name():
                print("Graph Query Tool detected.")
                # method_params = self.method_params if self.method_params else {}
                # graph = self.graph if self.graph else None  # Ensure graph is set in the state

                # Prepare the input for the tool
                tool_input = {
                                "question": question,  # Use the extracted query
                                     # Ensure graph is properly passed
                          # Ensure schema is properly passed
                                "context": method_params.keys()  # Pass method params to the tool
        }
                 # Invoke the runnable with the prepared input
                result = self.runnable.invoke(tool_input)
                print("Result from Graph Query Tool:", result)
            else:
                print("Invoking other tools or processing states.")
                result = self.runnable.invoke(state)


            # Check if LLM returned a valid response or if we need to retry
            if not result.tool_calls and (
                not result.content
                or isinstance(result.content, list)
                and not result.content[0].get("text")
            ):
                messages = state["messages"] + [("user", "Please provide a valid response.")]
                state = {**state, "messages": messages}
            else:
                break
        return {"messages": result}

# Initialize OpenAI's GPT-4 model
llm = ChatOpenAI(model="gpt-4o", temperature=0.2)

# Define a prompt template for the assistant to generate proper queries based on user input
primary_assistant_prompt = ChatPromptTemplate.from_messages(
    [
        (
            "system",
            "You are a helpful assistant focused on understanding SAP ABAP code, "
            "documenting methods, functions, and resolving queries using the provided tools. "
            "For every query, use the tools effectively. When searching in the graph or config, "
            "ensure you understand the context of the class/method/functions."
            "Try and identify the method name from the user query."
            "If the user is asking to print a documentation, use the generate and run query tool first, to check if functions exist or not."
            "Use the output from the parse class documentation to explain what the method or class is doing."
            """When you identify a method is being used, use the function documentation tool to understand the function and create a function documentation. Use the function documentation 
            to enhance the understanding of the method. and then generate a highly technical documentation by incorporating method and function understanding.""" 
            "\n\nUser Query:\n<User>\n{{user_query}}\n</User>"
        ),
        ("placeholder", "{messages}"),
    ]
).partial(time=datetime.now())

# List of tools that will be bound to the assistant (adjusted for your use case)
part_1_tools = [
    generate_and_run_cypher_query,  # Tool for graph querying
    lookup_config,  # Tool for retrieving information from the configuration document
    parse_class_dict,  # Tool for extracting class and method information from the doc
    parse_function_documentation,  # Tool to parse and understand functions
]

# Bind the tools to the assistant prompt using LangChain’s tool binding mechanism
part_1_assistant_runnable = primary_assistant_prompt | llm.bind_tools(part_1_tools)

# Example of the Assistant node setup using LangGraph (simplified)
from langgraph.graph import END, StateGraph, START
from langgraph.prebuilt import tools_condition
from langgraph.checkpoint.memory import MemorySaver

builder = StateGraph(State)

# Define nodes: assistant and tools
builder.add_node("assistant", Assistant(part_1_assistant_runnable))
builder.add_node("tools", create_tool_node_with_fallback(part_1_tools))

# Define edges for control flow
builder.add_edge(START, "assistant")
builder.add_conditional_edges("assistant", tools_condition)
builder.add_edge("tools", "assistant")
builder.add_edge("assistant", END)

# Create the memory to save the graph state
memory = MemorySaver()
part_1_graph = builder.compile(checkpointer=memory)


In [None]:
import shutil
import uuid

# Let's create an example conversation a user might have with the assistant
tutorial_questions = [
    "Write a documentation for the method BDR_REJECT",
]

# Let's use a UUID for the thread ID for consistent state checkpoints
thread_id = str(uuid.uuid4())

# Configuration, can include things like default class or method names to query
config = {
    "configurable": {
        # The thread_id can be used for persistent checkpoints
        "thread_id": thread_id,
    }
}

_printed = set()

# Iterate over the tutorial questions and simulate the conversation with the agent
for question in tutorial_questions:
    # Stream the events from the graph, processing the user's question
    events = part_1_graph.stream(
        {"messages": ("user", question)}, config, stream_mode="values"
    )
    
    # Print the responses from the agent for each event
    for event in events:
        _print_event(event, _printed)


In [None]:
import shutil
import uuid

# Let's create an example conversation a user might have with the assistant
tutorial_questions = [
    "Write a documentation for class /CGDC/CL_CLRQ_PROCESS",
]

# Let's use a UUID for the thread ID for consistent state checkpoints
thread_id = str(uuid.uuid4())

# Configuration, can include things like default class or method names to query
config = {
    "configurable": {
        # The thread_id can be used for persistent checkpoints
        "thread_id": thread_id,
    }
}

_printed = set()

# Iterate over the tutorial questions and simulate the conversation with the agent
for question in tutorial_questions:
    # Stream the events from the graph, processing the user's question
    events = part_1_graph.stream(
        {"messages": ("user", question)}, config, stream_mode="values"
    )
    
    # Print the responses from the agent for each event
    for event in events:
        _print_event(event, _printed)