In [None]:
# First we initialize the model we want to use.
from langchain_openai import ChatOpenAI

apikey = ''

from typing import Literal


In [2]:
file_path = r'C:\Users\shrvishwakarma\OneDrive - Deloitte (O365D)\Desktop\Data Lineage\TestCode1\test1.py'
file_content =  open(file_path, 'r', encoding='utf-8').read()
# file_content

In [3]:
from langchain_core.messages import AIMessage, BaseMessage, HumanMessage
from langchain_core.prompts import ChatPromptTemplate, MessagesPlaceholder

prompt = ChatPromptTemplate.from_messages(
    [
        (
            "system",
            """
                You are an expert in analyzing codebases to trace variable lineage across multiple files with cross dependencies. Your task is to analyze the provided code and trace every variable's transformation, grouping them into nodes according to the following rules:

                1. **Node 0**: Represents the first manually initialized variables.
                2. **Node 1**: Includes variables that are directly created from Node 0 variables.
                3. **Node 2**: Includes variables that are derived using Node 1 variables, or a combination of Node 0 and Node 1.
                4. **Node 3**: Includes variables that are derived using Node 2 variables, or a combination of Node 1 and Node 2.
                4. **Node 4**: Includes variables that are derived using Node 3 variables, or a combination of Node 1 and Node 3, or a combination of node 2 and node 3, or a combination of all the previous nodes, but including node 3.
                5. **Node 5 and beyond**: Continue to trace further transformations based on the inputs from previous nodes.

                For each transformation, generate a table row with the following columns:
                - **From_Node_ID:** The node ID of the source variable.
                - **From_Node_ID_Name:** The name of the source variable.
                - **To_Node_ID:** The node ID for the new variable.
                - **To_Node_ID_Name:** The name of the new variable.
                - **Relationship_Type:** A brief description of the transformation.

                For example, consider the following table:

                Your output should be exactly in the table format above with the appropriate columns and entries based on the analysis of the given code.

                Remember to:
                - Carefully analyze variable definitions and usages across files.
                - Check if the variable name is directly being used in the formula, only then add it to a dependent node. 
                - Name the nodes based on the transformation level.
                - Include a description of the relationship (e.g., "calculate d1", "calc nd2", etc.).
                - Output the result in the specified table format.

                Now, analyze the provided code to extract the variable lineage and generate the output table.

            """,
        ),
        MessagesPlaceholder(variable_name="messages"),
    ]
)
llm = ChatOpenAI(model="gpt-4o", temperature=0.7, api_key=apikey)

generate = prompt | llm

prompt

ChatPromptTemplate(input_variables=['messages'], input_types={'messages': list[typing.Annotated[typing.Union[typing.Annotated[langchain_core.messages.ai.AIMessage, Tag(tag='ai')], typing.Annotated[langchain_core.messages.human.HumanMessage, Tag(tag='human')], typing.Annotated[langchain_core.messages.chat.ChatMessage, Tag(tag='chat')], typing.Annotated[langchain_core.messages.system.SystemMessage, Tag(tag='system')], typing.Annotated[langchain_core.messages.function.FunctionMessage, Tag(tag='function')], typing.Annotated[langchain_core.messages.tool.ToolMessage, Tag(tag='tool')], typing.Annotated[langchain_core.messages.ai.AIMessageChunk, Tag(tag='AIMessageChunk')], typing.Annotated[langchain_core.messages.human.HumanMessageChunk, Tag(tag='HumanMessageChunk')], typing.Annotated[langchain_core.messages.chat.ChatMessageChunk, Tag(tag='ChatMessageChunk')], typing.Annotated[langchain_core.messages.system.SystemMessageChunk, Tag(tag='SystemMessageChunk')], typing.Annotated[langchain_core.mes

In [4]:
# (Eg: if a = b+c and d = a+e, then don't put b and c as dependednt variables for d)

In [5]:
output = ""
request = HumanMessage(
    content=file_content
)
for chunk in generate.stream({"messages": [request]}):
    print(chunk.content, end="")
    output += chunk.content

Below is the table tracing the lineage of each variable in the provided code following the transformation rules:

| From_Node_ID | From_Node_ID_Name | To_Node_ID | To_Node_ID_Name | Relationship_Type        |
|--------------|-------------------|------------|-----------------|--------------------------|
| 0            | S                 | 1          | forward         | calculate forward        |
| 0            | K                 | 1          | forward         | calculate forward        |
| 0            | r                 | 1          | forward         | calculate forward        |
| 0            | q                 | 1          | forward         | calculate forward        |
| 0            | T                 | 1          | forward         | calculate forward        |
| 0            | r                 | 1          | discount        | calculate discount       |
| 0            | T                 | 1          | discount        | calculate discount       |
| 0            | S             

In [6]:
reflection_prompt = ChatPromptTemplate.from_messages(
    [
        (
            "system",
            """
            You are a self-reflection agent whose task is to critique the generated table that traces variable lineage across a given set of Python code files. Your evaluation must adhere strictly to the following guidelines and rules:

            1. **Rules Verification:**
            - Confirm that the table correctly categorizes variables into nodes based on their transformation level:
                1. **Node 0**: Represents the first manually initialized variables.
                2. **Node 1**: Includes variables that are directly created from Node 0 variables.
                3. **Node 2**: Includes variables that are derived using Node 1 variables, or a combination of Node 0 and Node 1.
                4. **Node 3**: Includes variables that are derived using Node 2 variables, or a combination of Node 1 and Node 2.
                4. **Node 4**: Includes variables that are derived using Node 3 variables, or a combination of Node 1 and Node 3, or a combination of node 2 and node 3, or a combination of all the previous nodes, but including node 3.
                5. **Node 5 and beyond**: Continue to trace further transformations based on the inputs from previous nodes.

            - Ensure that the relationship descriptions (e.g., "calculate d1", "calc nd2", "call or put") are clearly and accurately specified.

            2. **Table Format and Consistency:**
            - Check that the output is in the required table format with the exact column headers:
                - From_Node_ID
                - From_Node_ID_Name
                - To_Node_ID
                - To_Node_ID_Name
                - Relationship_Type
            - Verify that each row follows the expected structure and includes proper mappings of the variable transformations.

            3. **Code and Cross-Dependency Evaluation:**
            - Analyze how the provided Python codes define and transform variables across multiple files.
            - Verify that the table correctly represents variable transformations and cross-dependencies as per the code.
            - Ensure that every transformation from one node to the next is logically deduced from the code, and check if any transformation or dependency is missing or incorrectly assigned.

            4. **Self-Critique Guidelines:**
            - Provide a detailed critique of the generated table. Highlight both strengths and weaknesses on the front of identification of node relationship (if the node identification is correct or not) only and nothing else.
            - Identify discrepancies between the transformation rules and the generated table.
            - Suggest specific improvements or corrections needed to ensure that the table accurately reflects the variable lineage.
            - Justify your critique with references to the rules and the actual content and flow of the provided Python code.

            5. **Output:**
            - Your output should be a clear, structured critique, beginning with an overall evaluation, followed by detailed feedback on each section of the table.
            - Conclude with a summary of recommended modifications to align the table with the given rules and code behavior.

            Now, begin your self-reflection and critique of the generated table based on these instructions.

                        """
        ),
        MessagesPlaceholder(variable_name="messages"),
    ]
)
reflect = reflection_prompt | llm

In [7]:
reflection = ""
for chunk in reflect.stream({"messages": [request, HumanMessage(content=output)]}):
    print(chunk.content, end="")
    reflection += chunk.content

### Overall Evaluation:
The generated table attempts to trace the lineage of variables within the provided Python code, applying the transformation rules specified. However, there are several discrepancies in how the nodes are categorized and how transformations are mapped. While the table captures the basic idea of variable transformations, it does not fully align with the provided rules or the actual code structure.

### Detailed Feedback:

1. **Node Categorization:**
   - **Node 0:** The table correctly identifies `S`, `K`, `T`, `r`, `sigma`, and `q` as Node 0 variables, which are the manually initialized parameters of the `black_scholes_merton` function.
   
2. **Node 1:**
   - **Incorrect Assignments:** 
     - `forward` and `discount` are calculated from Node 0 variables, which is correctly reflected as Node 1, but these calculations are only relevant when `sigma == 0`, which the table does not account for.
     - `d1` and `d2` should be in Node 1 and Node 2, respectively, in the

In [8]:
for chunk in generate.stream(
    {"messages": [request, AIMessage(content=output), HumanMessage(content=reflection)]}
):
    print(chunk.content, end="")

Below is the revised table that accurately reflects the variable lineage based on the transformation rules and analysis of the provided code:

| From_Node_ID | From_Node_ID_Name | To_Node_ID | To_Node_ID_Name | Relationship_Type        |
|--------------|-------------------|------------|-----------------|--------------------------|
| 0            | S                 | 1          | d1              | calculate d1             |
| 0            | K                 | 1          | d1              | calculate d1             |
| 0            | r                 | 1          | d1              | calculate d1             |
| 0            | q                 | 1          | d1              | calculate d1             |
| 0            | sigma             | 1          | d1              | calculate d1             |
| 0            | T                 | 1          | d1              | calculate d1             |
| 1            | d1                | 2          | d2              | calculate d2             |
| 

In [9]:
from typing import Annotated, List, Sequence
from langgraph.graph import END, StateGraph, START
from langgraph.graph.message import add_messages
from langgraph.checkpoint.memory import MemorySaver
from typing_extensions import TypedDict


class State(TypedDict):
    messages: Annotated[list, add_messages]


async def generation_node(state: State) -> State:
    return {"messages": [await generate.ainvoke(state["messages"])]}


async def reflection_node(state: State) -> State:
    # Other messages we need to adjust
    cls_map = {"ai": HumanMessage, "human": AIMessage}
    # First message is the original user request. We hold it the same for all nodes
    translated = [state["messages"][0]] + [
        cls_map[msg.type](content=msg.content) for msg in state["messages"][1:]
    ]
    res = await reflect.ainvoke(translated)
    # We treat the output of this as human feedback for the generator
    return {"messages": [HumanMessage(content=res.content)]}


builder = StateGraph(State)
builder.add_node("generate", generation_node)
builder.add_node("reflect", reflection_node)
builder.add_edge(START, "generate")


def should_continue(state: State):
    if len(state["messages"]) > 6:
        # End after 3 iterations
        return END
    return "reflect"


builder.add_conditional_edges("generate", should_continue)
builder.add_edge("reflect", "generate")
memory = MemorySaver()
graph = builder.compile(checkpointer=memory)

In [10]:
config = {"configurable": {"thread_id": "1"}}

In [11]:
async for event in graph.astream(
    {
        "messages": [
            HumanMessage(
                content=file_content
            )
        ],
    },
    config,
):
    print(event)
    print("---")

{'generate': {'messages': [AIMessage(content='| From_Node_ID | From_Node_ID_Name | To_Node_ID | To_Node_ID_Name | Relationship_Type      |\n|--------------|-------------------|------------|-----------------|------------------------|\n| 0            | S                 | 1          | forward         | calculate forward      |\n| 0            | r                 | 1          | forward         | calculate forward      |\n| 0            | q                 | 1          | forward         | calculate forward      |\n| 0            | T                 | 1          | forward         | calculate forward      |\n| 0            | K                 | 1          | discount        | calculate discount     |\n| 0            | r                 | 1          | discount        | calculate discount     |\n| 0            | T                 | 1          | discount        | calculate discount     |\n| 0            | S                 | 1          | d1              | calculate d1           |\n| 0           

In [12]:
state = graph.get_state(config)

In [13]:
ChatPromptTemplate.from_messages(state.values["messages"]).pretty_print()


import numpy as np
from scipy.stats import norm

def black_scholes_merton(S, K, T, r, sigma, q, option_type='call'):
    """
    Calculate the Black-Scholes-Merton option price and Greeks for European calls and puts.

    Parameters:
    S (float): Current stock price
    K (float): Strike price
    T (float): Time to maturity in years
    r (float): Risk-free interest rate
    sigma (float): Volatility of the stock
    q (float): Dividend yield
    option_type (str): 'call' or 'put'

    Returns:
    dict: Contains 'price', 'delta', 'gamma', 'theta', 'vega', 'rho'
    """

    # Validate inputs
    for arg in [S, K, T, r, sigma, q]:
        if not isinstance(arg, (float, int)):
            raise TypeError(f"All inputs must be numeric. Received {type(arg)}.")
    if S <= 0 or K <= 0 or T < 0 or r < 0 or sigma < 0 or q < 0:
        raise ValueError("S, K, T, r, sigma, q must be positive (T can be zero).")
    if option_type not in ['call', 'put']:
        raise ValueError("option_type 