In [1]:
from crewai import Agent, Task, Crew
from crewai import LLM
import os
import json
import re

In [2]:
instrospection_template = json.load(open("input/1_software_introspection.json"))
causal_template = json.load(open("input/2_causal_interpretability.json"))

In [3]:

llm = LLM(

    #model = "ollama/codellama:13b", #13b
    model = "ollama/gpt-oss:latest", #20b
    #model = "ollama/devstral:latest", #24b
    #model = "ollama/mistral-small:latest", #22b
    base_url = "http://localhost:11434")

## Scenario A: Code generation text2code task

In [4]:
messages_a = instrospection_template['scenario_a']['step1']['messages']

In [5]:
messages_a

[{'role': 'system',
  'content': 'You are an experienced software engineer. Your job is to generate source code given a programming problem or a software specification.'},
 {'role': 'user',
  'content': '[SOFTWARE TASK] Generate a python method that receives two vectors as parameters and return the cross product. Generate only the code of the function without any examples or tests.'},
 {'role': 'user',
  'content': '[INTROSPECTION TASK] What are the main factors or variables that you believe influence the decision of generating the {FUNCTION} function? Estimate an attribution weight to each variable. Label the variables in alphabetical order. The output must be contained in a json format in which the first level comprises the label; the second level comprises the name of the variable, the description of the variable, and the attribution weight.'}]

In [6]:
def extract_function_name(response):
    match = re.search(r"def\s+(\w+)\s*\(", response)
    if match:
        return match.group(1)
    return None

In [7]:
def process_messages(message,function, factors_json, node, unit_test):
    #new_message= re.sub(r"\[.*?\]","", message['content'])
    new_message= message['content']
    if function:
        new_message = re.sub(r"\{FUNCTION\}",function, new_message)
    if factors_json:
        new_message = re.sub(r"\{FACTORS\}",factors_json, new_message)
    if re.search(r"\{NODE\}", new_message):
        new_message = re.sub(r"\{NODE\}",node, new_message)
    if unit_test:
        new_message = re.sub(r"\{UNIT_TEST\}",unit_test, new_message)
    return new_message

In [9]:
def find_name(obj):
    if isinstance(obj, dict):
        for k, v in obj.items():
            if k == "name":
                yield v
            else:
                yield from find_name(v)
    elif isinstance(obj, list):
        for item in obj:
            yield from find_name(item)

In [26]:
def process_intraspection_factors_step_1(introspection_chat_history):
    factors = []
    script, function_name, factors, factors_message = None, None, None, None
    if len(introspection_chat_history) == 4:
        chat = introspection_chat_history[3]['content']
        function_name = extract_function_name(chat)
        match = re.search(r"```(?:python)?(.*?)```", chat, re.DOTALL)
        script = match.group(1).strip() if match else None
   
    if len(introspection_chat_history) == 6:
        factors_message = introspection_chat_history[5]['content']
        cleaned = re.sub(r"^```json\s*|\s*```$", "", factors_message.strip(), flags=re.MULTILINE)

        data = json.loads(cleaned)
        factors=list(find_name(data))
    return script, function_name, factors, factors_message
   

In [27]:
chat_history = []
last_response = ''
messagges = messages_a.copy()
for message in messagges:
    function_script, function_name, nodes, factors_json = process_intraspection_factors_step_1(chat_history)
    message['content'] = process_messages(message, function_script, factors_json, None, None)
    print(message['content'])
    chat_history.append(message)
    last_response = llm.call(messages=chat_history)
    #print({'role':'assistant','content':last_response})
    print(last_response)
    chat_history.append({"role": "assistant", "content": last_response})

You are an experienced software engineer. Your job is to generate source code given a programming problem or a software specification.
I’m ready to help! Please share the programming problem or software specification you’d like me to work on, and I’ll generate the appropriate source code for you.
[SOFTWARE TASK] Generate a python method that receives two vectors as parameters and return the cross product. Generate only the code of the function without any examples or tests.
```python
def cross_product(a, b):
    """
    Compute the cross product of two 3‑dimensional vectors.

    Parameters
    ----------
    a, b : sequence of numbers
        Input vectors of length 3.

    Returns
    -------
    tuple
        The cross product vector (x, y, z).

    Raises
    ------
    ValueError
        If either input is not of length 3.
    """
    if len(a) != 3 or len(b) != 3:
        raise ValueError("Vectors must be of length 3")

    return (
        a[1] * b[2] - a[2] * b[1],
        a[2]

In [29]:
from IPython import get_ipython

# Get the current execution count
exec_count = get_ipython().execution_count
print("This cell run number is:", exec_count)

This cell run number is: 29


In [30]:
output = {'scenario_a': {'step1': {'messages': chat_history}}}
file = "output/introspection/"+ llm.model.replace("/","_")+"_step1_"+ str(exec_count) +".json"
with open(file, "w") as f:
    json.dump(output, f, indent=4)  

'role': 'system', 'content': 'You are an experienced software engineer. Your job is to generate source code given a programming problem or a software specification.'}
{'role': 'assistant', 'content': "\nAs an AI language model, I can assist you in generating source code based on the input provided. However, please note that the quality and effectiveness of the generated code depend on the clarity and completeness of the specifications and requirements provided. It's also important to ensure that the generated code meets any relevant programming standards or guidelines set by your organization or industry."}
{'role': 'user', 'content': ' Generate a python method that receives two vectors as parameters and return the cross product.'}
{'role': 'assistant', 'content': '```python\ndef cross_product(v1, v2):\n    # Calculate the cross product of the two vectors\n    cross = np.cross(v1, v2)\n    \n    # Return the result\n    return cross\n```\nThis method takes in two numpy arrays `v1` and `v2`, which represent the two vectors to be multiplied. The `np.cross()` function is used to calculate the cross product of the two vectors. The resulting vector is then returned as a numpy array.\n\nPlease note that this code assumes that you have already imported the necessary libraries, such as NumPy and Scipy, in your Python script.'}
{'role': 'user', 'content': ' What are the main factors or variables that you believe influence the decision of generating the cross_product function? Estimate an attribution weight to each variable. Label the variables in alphabetical order. The output must be contained in a json format in which the first level comprises the label; the second level comprises the name of the variable, the description of the variable, and the attribution weight.'}
{'role': 'assistant', 'content': ' 
```json[\n  {\n    "label": "attribution weights",\n    "variables": [\n      {\n        "name": "code quality",\n        "description": "The quality of the generated code will have a significant impact on its effectiveness and maintainability.",\n        "weight": "0.7"\n      },\n      {\n        "name": "completeness of specifications",\n        "description": "The completeness of the specifications provided will determine the accuracy of the generated code.",\n        "weight": "0.5"\n      },\n      {\n        "name": "programming standards and guidelines",\n        "description": "The generated code must meet any relevant programming standards or guidelines set by your organization or industry.",\n        "weight": "0.3"\n      }\n    ]\n  }\n]\n```'}

In [31]:
chat_history


[{'role': 'system',
  'content': 'You are an experienced software engineer. Your job is to generate source code given a programming problem or a software specification.'},
 {'role': 'assistant',
  'content': 'I’m ready to help! Please share the programming problem or software specification you’d like me to work on, and I’ll generate the appropriate source code for you.'},
 {'role': 'user',
  'content': '[SOFTWARE TASK] Generate a python method that receives two vectors as parameters and return the cross product. Generate only the code of the function without any examples or tests.'},
 {'role': 'assistant',
  'content': '```python\ndef cross_product(a, b):\n    """\n    Compute the cross product of two 3‑dimensional vectors.\n\n    Parameters\n    ----------\n    a, b : sequence of numbers\n        Input vectors of length 3.\n\n    Returns\n    -------\n    tuple\n        The cross product vector (x, y, z).\n\n    Raises\n    ------\n    ValueError\n        If either input is not of len

In [32]:
messages_a

[{'role': 'system',
  'content': 'You are an experienced software engineer. Your job is to generate source code given a programming problem or a software specification.'},
 {'role': 'user',
  'content': '[SOFTWARE TASK] Generate a python method that receives two vectors as parameters and return the cross product. Generate only the code of the function without any examples or tests.'},
 {'role': 'user',
  'content': '[INTROSPECTION TASK] What are the main factors or variables that you believe influence the decision of generating the def cross_product(a, b):\n    """\n    Compute the cross product of two 3‑dimensional vectors.\n\n    Parameters\n    ----------\n    a, b : sequence of numbers\n        Input vectors of length 3.\n\n    Returns\n    -------\n    tuple\n        The cross product vector (x, y, z).\n\n    Raises\n    ------\n    ValueError\n        If either input is not of length 3.\n    """\n    if len(a) != 3 or len(b) != 3:\n        raise ValueError("Vectors must be of len

## Step2: Code generation text2code task with interpretability 

In [33]:
messages_2 = instrospection_template['scenario_a']['step2']['messages']

In [34]:
messages_2

[{'role': 'system',
  'content': 'You are an experienced software tester. Your job is to generate unit test cases given a focal method.'},
 {'role': 'user',
  'content': '[SOFTWARE TASK] Generate a python unit test case for the following focal method {FUNCTION}'},
 {'role': 'user',
  'content': '[INTROSPECTION TASK] What are the main factors or variables that you believe influence the decision of generating the {UNIT_TEST} test? Estimate an attribution weight to each variable. Label the variables in alphabetical order. The output must be contained in a json format in which the first level comprises the label; the second level comprises the name of the variable, the description of the variable, and the attribution weight.'}]

In [35]:
print(function_script)

def cross_product(a, b):
    """
    Compute the cross product of two 3‑dimensional vectors.

    Parameters
    ----------
    a, b : sequence of numbers
        Input vectors of length 3.

    Returns
    -------
    tuple
        The cross product vector (x, y, z).

    Raises
    ------
    ValueError
        If either input is not of length 3.
    """
    if len(a) != 3 or len(b) != 3:
        raise ValueError("Vectors must be of length 3")

    return (
        a[1] * b[2] - a[2] * b[1],
        a[2] * b[0] - a[0] * b[2],
        a[0] * b[1] - a[1] * b[0]
    )


In [37]:
def process_intraspection_factors_step_2(introspection_chat_history):
    if len(introspection_chat_history) < 3:
        return None, None
    chat = introspection_chat_history[3]['content']
    function_name = extract_function_name(chat)
    match = re.search(r"```(?:python)?(.*?)```", chat, re.DOTALL)
    function = match.group(1).strip() if match else None
    return function_name, function


#function_name, script = process_intraspection_factors_step_2(chat_history)

In [40]:
chat_history_2 = []
last_response = ''
messages = messages_2.copy()
for message in messages:
    test_name, unit_test = process_intraspection_factors_step_2(chat_history_2)
    message['content'] = process_messages(message, function_script, None, None, unit_test)
    print(message['content'])
    chat_history_2.append(message)
    last_response = llm.call(messages=chat_history_2)
    #print({'role':'assistant','content':last_response})
    print(last_response)
    chat_history_2.append({"role": "assistant", "content": last_response})

You are an experienced software tester. Your job is to generate unit test cases given a focal method.
Sure! Please share the method (or the class containing it) you’d like me to create unit tests for, along with any relevant context (e.g., its purpose, expected inputs/outputs, or any dependencies). Once I have that, I’ll generate a comprehensive set of unit test cases for you.
[SOFTWARE TASK] Generate a python unit test case for the following focal method def cross_product(a, b):
    """
    Compute the cross product of two 3‑dimensional vectors.

    Parameters
    ----------
    a, b : sequence of numbers
        Input vectors of length 3.

    Returns
    -------
    tuple
        The cross product vector (x, y, z).

    Raises
    ------
    ValueError
        If either input is not of length 3.
    """
    if len(a) != 3 or len(b) != 3:
        raise ValueError("Vectors must be of length 3")

    return (
        a[1] * b[2] - a[2] * b[1],
        a[2] * b[0] - a[0] * b[2],
      

In [41]:
chat_history

[{'role': 'system',
  'content': 'You are an experienced software engineer. Your job is to generate source code given a programming problem or a software specification.'},
 {'role': 'assistant',
  'content': 'I’m ready to help! Please share the programming problem or software specification you’d like me to work on, and I’ll generate the appropriate source code for you.'},
 {'role': 'user',
  'content': '[SOFTWARE TASK] Generate a python method that receives two vectors as parameters and return the cross product. Generate only the code of the function without any examples or tests.'},
 {'role': 'assistant',
  'content': '```python\ndef cross_product(a, b):\n    """\n    Compute the cross product of two 3‑dimensional vectors.\n\n    Parameters\n    ----------\n    a, b : sequence of numbers\n        Input vectors of length 3.\n\n    Returns\n    -------\n    tuple\n        The cross product vector (x, y, z).\n\n    Raises\n    ------\n    ValueError\n        If either input is not of len

In [42]:
chat_history_2

[{'role': 'system',
  'content': 'You are an experienced software tester. Your job is to generate unit test cases given a focal method.'},
 {'role': 'assistant',
  'content': 'Sure! Please share the method (or the class containing it) you’d like me to create unit tests for, along with any relevant context (e.g., its purpose, expected inputs/outputs, or any dependencies). Once I have that, I’ll generate a comprehensive set of unit test cases for you.'},
 {'role': 'user',
  'content': '[SOFTWARE TASK] Generate a python unit test case for the following focal method def cross_product(a, b):\n    """\n    Compute the cross product of two 3‑dimensional vectors.\n\n    Parameters\n    ----------\n    a, b : sequence of numbers\n        Input vectors of length 3.\n\n    Returns\n    -------\n    tuple\n        The cross product vector (x, y, z).\n\n    Raises\n    ------\n    ValueError\n        If either input is not of length 3.\n    """\n    if len(a) != 3 or len(b) != 3:\n        raise Val

In [44]:
output ={
    'scenario_a': {
        'step1': {'messages': chat_history},
        'step2': {'messages': chat_history_2}
    }
}
file = "output/introspection/"+ llm.model.replace("/","_")+"_step1_step2_"+ str(exec_count) +".json"
with open(file, "w") as f:
    json.dump(output, f, indent=4)  