## Imports & helper functions

In [None]:
%pip install --upgrade py2neo
%pip install --upgrade openai

In [None]:
from py2neo import Graph


In [None]:
from itertools import groupby

In [None]:
import json


def read_json_file(file_path):
    with open(file_path, 'r') as file:
        data = json.load(file)
    return data


def parse_json(json_string):
  json_dict = json.loads(json_string)
  return json_dict


def dict_to_pretty_json(dictionary):
  pretty_json = json.dumps(dictionary, indent=2)
  return pretty_json


def write_dict_to_json(dictionary, file_path):
  with open(file_path, 'w') as json_file:
    json.dump(dictionary, json_file, indent=2)


In [None]:
import configparser

def read_ini_file(file_path):
    config = configparser.ConfigParser()
    config.read(file_path)
    ini_dict = {section: dict(config.items(section))
                for section in config.sections()}
    return ini_dict


In [None]:
from openai import OpenAI

In [None]:
import re


def remove_java_comments(java_source):
    # Regular expression to match Java comments (both single-line and multi-line)
    pattern = r"(//.*?$)|(/\*.*?\*/)"

    # Remove comments using the regular expression
    java_source_without_comments = re.sub(
        pattern, "", java_source, flags=re.MULTILINE | re.DOTALL)

    return java_source_without_comments


In [None]:
def sentence(s):
  '''
  Turn a string into a "sentence", i.e., with no leading and trailing whitespaces, 
  starting with a capital letter, and ending with a period.
  '''
  t = s.strip()
  if t[-1]=='.':
    return f'{t[0].upper()}{t[1:]}'
  else:
    return f'{t[0].upper()}{t[1:]}.'
  
sentence(' hello world  ')

## Parameters

In [None]:
# If True: do not call the API, just print the prompts
only_print_prompt = False

In [None]:
secrets = read_ini_file('secrets.ini')
project_name = secrets['project']['name']
project_name

## Connect to neo4j

To access knowledge graph extracted using javapers which is then loaded to neo4j graph database.

In [None]:
graph = Graph(secrets['neo4j']['url'], auth=(secrets['neo4j']['username'], secrets['neo4j']['password']))

## Connect to openai

In [None]:
args = dict()

if 'apikey' in secrets['openai']:
  args['api_key'] = secrets['openai']['apikey']
if 'apibase' in secrets['openai']:
  args['base_url'] = secrets['openai']['apibase']
if 'model' in secrets['openai']:
  model = secrets['openai']['model']
else:
  model = "gpt-3.5-turbo"

(args, model)

In [None]:
client = OpenAI(**args)
client.base_url

In [None]:
# test the LLM server---create a completion
completion = client.chat.completions.create(
  model=model,
  messages=[{"role":"user","content":"What is your name?"}]
)
# print the completion
print(completion.choices[0].message.content)

## Packages to be inspected

In [None]:
packages = { record['p']['qualifiedName'] for record in graph.run('match (p:Container)-[:contains]->(:Structure) where p.kind="package" return p') }

In [None]:
len(packages), packages

## Ask LLM to summarize methods

In [None]:
goals = dict()
# method_prompt_template = '''This is method `{op_name}` of {struct_kind} `{struct_name}`:

# ```java
# {op_src}
# ```

# Write a 1-sentence documentation comment for this method in imperative mood.'''
method_prompt_template = '''This is method `{op_name}` of {struct_kind} `{struct_name}`:

```java
{op_src}
```

What is the purpose of this method?'''
# In one sentence, the purpose of this method is to'''
for pkg_name in sorted(list(packages)):
  print(pkg_name)
  cursor = graph.run('MATCH (p:Container)-[:contains]->(c:Structure)-[:hasScript]->(m:Operation)'
                        f'WHERE p.qualifiedName="{pkg_name}" AND m.kind="method"'
                        'RETURN p.qualifiedName, c.qualifiedName, c.kind, m.simpleName, m.sourceText')
  grouped_node_data = groupby(cursor, lambda x: (x['c.kind'],x['c.qualifiedName']))
  if grouped_node_data:
    goals[pkg_name] = dict()
    goals[pkg_name]['qualifiedName'] = pkg_name
    goals[pkg_name]['classes'] = dict()
    for (kind,class_name), methods_data in grouped_node_data:
      class_desc = dict()
      class_desc['kind'] = kind
      class_desc['qualifiedName'] = class_name
      class_desc['methods'] = dict()
      if kind == 'enumeration':
        kind = 'enum'
      elif kind == 'abstract':
        kind = 'abstract class'
      for row in methods_data:
        method_name = row['m.simpleName']
        prompt = method_prompt_template.format(
            op_name= method_name, 
            struct_kind= kind, 
            struct_name= class_name, 
            op_src= remove_java_comments(row["m.sourceText"]))
        if only_print_prompt:
          print(prompt)
          print()
        else:
          response = None
          try:
            response = client.chat.completions.create(
              model=model,
              messages=[{
                "role": "user",
                "content": prompt
              }, {
                "role": "assistant",
                "content": "In one sentence, the purpose of this method is to"
              }],
              max_tokens=1024, stop=". ",
              temperature=0)
            method_goal = {'simpleName':row['m.simpleName'],
                          'description':sentence(response.choices[0].message.content)}
          except:
            method_goal = {'simpleName':row['m.simpleName'],
                          'description':"(no description)"}
            print(response)
          class_desc['methods'][method_name] = method_goal
      goals[pkg_name]['classes'][class_name] = class_desc
      print("  " + str(goals[pkg_name]['classes'][class_name]))
    print(str(goals[pkg_name]))
    print()
print(dict_to_pretty_json(goals))


In [None]:
from statistics import median, quantiles

num_classes_per_pkg = [len(pkg_desc['classes']) for _, pkg_desc in goals.items()]
num_methods_per_class =  [len(class_desc['methods']) for _, pkg_desc in goals.items() for _, class_desc in pkg_desc['classes'].items()]
len(goals), sum(num_classes_per_pkg), min(num_classes_per_pkg), max(num_classes_per_pkg), median(num_classes_per_pkg), quantiles(num_classes_per_pkg, n=4), sum(
    num_methods_per_class), min(num_methods_per_class), max(num_methods_per_class), median(num_methods_per_class), quantiles(num_methods_per_class, n=4)


In [None]:
if not only_print_prompt:
  write_dict_to_json(goals, f"{project_name}-goals_1-{model}.json")

In [None]:
goals = read_json_file(f"{project_name}-goals_1-{model}.json")
goals.keys()

## Ask LLM to summarize classes (based on methods)

In [None]:
cursor = graph.run('MATCH p=(c)-[:hasVariable]->(v)-[:type]->(t) RETURN c.qualifiedName,v.simpleName,t.qualifiedName')

class_fields = dict()

for record in cursor:
  class_name = record['c.qualifiedName']
  field_name = record['v.simpleName']
  field_type = record['t.qualifiedName']
  if not class_name in class_fields:
    class_fields[class_name] = []
  class_fields[class_name].append((field_name,field_type))

class_fields

In [None]:
class_prompt_template = '''A Java {struct_type} `{struct_name}` contains the following field(s) and method(s):

Fields:

{fields}

Methods:

{methods}

What is the purpose of this {struct_type}?'''
# In one sentence, the purpose of this class is to'''
# Write a one-sentence documentation comment for the Java {struct_type} `{struct_name}` without mentioning the context of the project/app/library. 
# Start the sentence with "The Java {struct_type} `{struct_name}` ..."'''
# Write a 1-sentence documentation comment for the {struct_type} in imperative mood. Group the methods into key responsibilities. Give short names to the responsibilities. Provide the result as json string like:

# {{"description":"...","responsibilities":[{{"name": "...", "description": "...", "methodsInvolved": [method1Signature, method2Signature, ...]}}, ...]}}

# Only output the json string without any additional text.'''
for pkg_name in sorted(goals.keys()):
  classes = goals[pkg_name]['classes']
  print('#', pkg_name)
  print()
  for class_name in classes:
    kind = classes[class_name]['kind']
    methods = classes[class_name]['methods']
    prompt = class_prompt_template.format(
      # prj_name=project_name,
      struct_type=kind, 
      struct_name=class_name, 
      fields="\n".join([f"- `{t} {f}`" for f,t in class_fields[class_name]]) if class_name in class_fields else "(no fields)",
      methods="\n".join([f"- `{method_desc['simpleName']}`: {method_desc['description']}" for _, method_desc in methods.items()])) if methods else "(no methods)"
    if only_print_prompt:
      print(prompt)
      print()
    else:
      response = None
      try:
        # print(prompt)
        response = client.chat.completions.create(
          model=model,
          messages=[{
              "role": "user",
              "content": prompt
          }, {
              "role": "assistant",
              "content": f"In one sentence, the purpose of this {kind} is to"
          }
          # {
          #   "role":"assistant",
          #   "content":f"The Java {kind} `{class_name}`"
          # }
          ],
          # prompt=prompt, 
          max_tokens=1024, 
          stop=". ",
          temperature=0)
        class_goal = sentence(response.choices[0].message.content)
        # class_goal = sentence("This class " + response['choices'][0].text.strip())
      except:
        # class_goal = {"description": "(no description)", "responsibilities": []}
        class_goal = "(no description)"
        print(response)
      # goals[pkg_name]['classes'][class_name]['description'] = class_goal['description']
      # goals[pkg_name]['classes'][class_name]['responsibilities'] = class_goal['responsibilities']
      goals[pkg_name]['classes'][class_name]['description'] = class_goal
      print(class_name +": "+ str(class_goal))
      print()
  # print("  " + str(len(goals[pkg_name]['classes'])))
  print()
print(dict_to_pretty_json(goals))


In [None]:
if not only_print_prompt:
  write_dict_to_json(goals, f"{project_name}-goals_2-{model}.json")


In [None]:
# goals = read_json_file(f"{project_name}-goals_2.json")
# goals.keys()

## Ask LLM to summarize packages (based on classes)

In [None]:
package_prompt_template = '''Given a Java package `{pkg_name}` containing the following classes:

{classes}

What is the purpose of this package?'''
# In one sentence, the purpose of this package is to'''
# Write a 1-sentence documentation comment for the package in imperative mood (without comment markers).'''
nl = '\n'
for pkg_name in sorted(goals.keys()):
  classes = goals[pkg_name]['classes']
  if classes:
    prompt = package_prompt_template.format(
      pkg_name= pkg_name,
      classes= "\n".join([f"- {class_desc['kind']} `{class_desc['qualifiedName']}`: {class_desc['description'].replace(nl, '')}" for _, class_desc in classes.items()])
    )
    if only_print_prompt:
      print(prompt)
    else:
      response = None
      try:
        response = client.chat.completions.create(
          model=model,
          messages=[{
              "role": "user",
              "content": prompt
          }, {
              "role": "assistant",
              "content": "In one sentence, the purpose of this package is to"
          }],
          max_tokens=1024, 
          stop=". ",
          temperature=0)
        goals[pkg_name]['description'] = sentence(response.choices[0].message.content)
      except:
        goals[pkg_name]['description'] = '(no description)'
        print(response)
      print(goals[pkg_name]['description'])
print()
print(dict_to_pretty_json(goals))


In [None]:
if not only_print_prompt:
  write_dict_to_json(goals, f"{project_name}-goals_3-{model}.json")


In [None]:
# goals = read_json_file(f"{project_name}-goals_3-MistralOrca-7b-v2.json")
# goals.keys()

## Which methods/classes/packages could not be summarized by the LLM?

In [None]:
method_no_desc = [
    (pkg_name, class_name, method_name)
    for pkg_name, pkg_desc in goals.items()
    for class_name, class_desc in pkg_desc['classes'].items()
    for method_name in class_desc['methods']
    if class_desc['methods'][method_name]['description'] == "(no description)"
]

print(dict_to_pretty_json(method_no_desc))


In [None]:
class_no_desc = [
    (pkg_name, class_name)
    for pkg_name, pkg_desc in goals.items()
    for class_name in pkg_desc['classes']
    if pkg_desc['classes'][class_name]['description'] == "(no description)"
]

print(dict_to_pretty_json(class_no_desc))


In [None]:
package_no_desc = [
    pkg_name
    for pkg_name in goals
    if goals[pkg_name]['description'] == "(no description)"
]

print(dict_to_pretty_json(package_no_desc))


# UNUSED

## Ask LLM to decompose package into subpackages

This part may require manual tuning of prompts and "conditioning" the LLM to get the desired results.

In [None]:
def parse_text(text):
    pattern = r"`(.*?)`\s*:\s*\[(`.*?`)\]"
    matches = re.findall(pattern, text)

    result = {}
    for key, values in matches:
        value_list = [v.replace('`','').strip() for v in values.split(',')]
        result[key] = value_list

    return result


text = """
- `key1`: [`Value1`, `Value2`]
- `key2`: [`Value3`]
"""

parsed_dict = parse_text(text)
print(parsed_dict)


In [None]:
goals

In [None]:
for pkg_name, pkg_desc in goals.items():
  if len(pkg_desc['classes']) > 7:
    pkg_desc['subpackages'] = []
#     prompt = """Java package `CH.ifa.draw.applet` contains the following classes:

# * class `CH.ifa.draw.applet.DrawApplet`: Provide a simple interface for creating and manipulating drawings using various tools.
# * class `CH.ifa.draw.applet.SleeperThread`: Display a loading icon and keep the applet busy while it is loading.

# By considering commonalities among the description of classes from package `CH.ifa.draw.applet`, this package can be reorganized into the following subpackages:

# - `CH.ifa.draw.applet.core`: [`DrawApplet`]
# - `CH.ifa.draw.applet.loading`: [`SleeperThread`]


# """
    prompt = ""
    prompt += f'Java package `{pkg_name}` contains the following classes:\n\n'
    for class_name, class_desc in pkg_desc['classes'].items():
      kind = class_desc['kind']
      prompt += f'* {kind} `{class_name}`: {class_desc["description"]}\n'
    prompt += '\n'
    prompt += f'By considering the functionalities of classes from package `{pkg_name}`, this package can be reorganized into the following subpackages:\n\n- `'
    # prompt += (f'The purpose of package `{pkg_name}` is to {pkg_desc["description"]}\n'
    #            'Determine its subgoals and give the result as json object like:\n\n'
    #            '{"subgoals": [{"name": "...", "description": "...", "classesInvolved": [class1QualifiedName, ...]}, ...]}\n\n')
    # prompt += 'A subgoal encompasses several classes with common or similar goals—there may be only one subgoal if the package is cohesive enough. Try your best to have as few subgoals as possible. Only respond with json text with no other text at all.'
    if only_print_prompt:
      print(prompt)
      print()
    else:
      response = None
      try:
        print("======")
        print(prompt)
        print()
        response = client.chat.completions.create(
            model=model,
            messages=[{
                "role": "user",
                "content": prompt
            }],
            # prompt=prompt, max_tokens=8000,
            temperature=0)
        print(response)
        # print(response)
        # response = parse_json(response['choices'][0]['message']['content'])
        # response_text = response['choices'][0].text
        response_text = response.choices[0].message.content
        parsed_response = parse_text(f"- `{response_text}")
        print(response_text)
        goals[pkg_name]['subpackages'] = parsed_response  # ['subgoals']
        # desc = "description"
        # prompt2 = f"{prompt[:-1]}{nl.join([f'- `{k}`: {parsed_response[k][desc]}' for k in parsed_response])}"
        # prompt2 += "\n\nThese are the classes that should be contained in each subpackage:\n\n"
        # for subpkg in parsed_response:
        #   # prompt2 += f"* {subpkg}:\n\n    -"
        #   prompt2 += f"* {subpkg}: `"
        #   print("======")
        #   print(prompt2)
        #   response = openai.Completion.create(
        #       model=model,
        #       prompt=prompt2, max_tokens=1024, stop="\n",
        #       temperature=0)
        #   response_text = response['choices'][0].text
        #   print(response_text)
        #   prompt2 += f"{response_text}\n\n"
      except:
        print(response)
    break
  
print()
# print(dict_to_pretty_json(goals))


In [None]:
if not only_print_prompt:
  write_dict_to_json(goals, f"{project_name}-goals_4b.json")


# UNUSED
## Experiment for classification -- not used in the paper

In [None]:
layers = {
  "presentation": [],
  "application services": [],
  "domain services": [],
  "technical services": []
}
for pkg_name, pkg_desc in goals.items():
  pkg_desc['subgoals'] = []
  prompt = f'Java package `{pkg_name}` contains the following class(es) with responsibilities:\n\n'
  for class_name, class_desc in pkg_desc['classes'].items():
    prompt += f'- {kind} `{class_name}`: {class_desc["description"]}\n'
    # for resp in class_desc["responsibilities"]:
    #   prompt += f'    * {resp["name"]}\n'
  prompt += '\n'
  prompt += f'The purpose of package `{pkg_name}` is to {pkg_desc["description"]}\nClassify each class into one of architectural layers "Presentation", "Application Services", "Domain Services", and "Technical Services". Give the result as json string:\n\n'
  prompt += '''{
  "presentation": ["class1QualifiedName",...],
  "application services": [...], ...
}'''
  prompt += '\n\nOnly respond with json text with no other text at all.'
  if only_print_prompt:
    print(prompt)
    print()
  else:
    response = None
    try:
      response = client.chat.completions.create(
        model=model,
        messages=[{
            "role": "user",
            "content": prompt
        }],
        temperature=0)
      # print(response)
      response = parse_json(response.choices[0].message.content)
      print(response)
      for layer in response:
        for clazz in response[layer]:
          layers[layer].append(clazz)
    except:
      print(response)
    print(layers)

print()
print(dict_to_pretty_json(layers))


# UNUSED

In [None]:
if not only_print_prompt:
  write_dict_to_json(
      layers, f"{project_name}-layers-MistralOrca-7b.json")


# UNUSED

In [None]:
role_stereotypes = {
	"Controller": '''The controller stereotype in object-oriented design encompasses objects or classes that play a pivotal role in decision-making and orchestration of actions within a system. To determine whether a method in a class aligns with the controller stereotype, one should consider the following characteristics:

1. **Decision-Making Responsibility:**
   - A method within the controller should primarily focus on making decisions rather than performing subsequent actions directly.

2. **Collaboration with Others:**
   - Controllers collaborate with other objects or classes to gather information necessary for decision-making and to delegate specific actions.

3. **Initiation and Sequencing:**
   - Methods within controllers often initiate and control a cycle of action. They may involve repetitive cycles with conditional branching logic or respond to specific events or circumstances.

4. **Delegation of Actions:**
   - The ultimate responsibility for accomplishing actions is often delegated to other objects or classes with more specific responsibilities, which are managed by the controller.

5. **System-Wide Activity Control:**
   - Controllers can be responsible for initiating and controlling ongoing system-wide activities or iterating over minor application tasks.

6. **Event-Driven Behavior:**
   - Methods within controllers might be triggered by events or circumstances, allowing them to respond to changes in the system.

7. **Independence from User Directives:**
   - Controllers are not limited to responding solely to user directives; they can be involved in managing various parts of an application, initiating, sequencing, and possibly completing cycles of activity.

By evaluating a method against these characteristics, one can determine whether it aligns with the controller stereotype. If the method primarily involves decision-making, collaboration with other objects, and the orchestration of actions within a system, it is likely to exhibit the traits associated with the controller stereotype in object-oriented design.''',

	"Coordinator": '''The coordinator stereotype in object-oriented design encapsulates classes or objects that specialize in managing and facilitating communication between other objects within a system. Here are key characteristics that can be used to evaluate whether a method in a class belongs to the coordinator stereotype:

1. **Event Reaction and Delegation:**
   - Coordinators react to events triggered within the system.
   - Methods within a coordinator class should primarily focus on delegating tasks to other objects or classes based on the events they respond to.

2. **Information Passing:**
   - Coordinators exist to pass along information between objects rather than performing the actual tasks themselves.
   - Methods in a coordinator class should be primarily responsible for forwarding information and requests to other objects.

3. **Connection Management:**
   - Coordinators specialize in holding connections between objects.
   - Methods should be geared towards establishing and managing connections or relationships between various objects in the system.

4. **Communication Facilitation:**
   - A coordinator's main purpose is to facilitate communication and collaboration between objects.
   - Methods should be designed to enhance communication channels and streamline the interaction between different components in the system.

5. **Traffic Cop and Managerial Role:**
   - Coordinators act as traffic cops and managers within a system, ensuring smooth flow and coordination of tasks.
   - Methods should exhibit managerial qualities, orchestrating the execution of tasks by other objects.

6. **Request-Service Pairing:**
   - Coordinators often pair client requests with the desired services or objects capable of performing the requested service.
   - Methods should involve the intelligent matching of client requests with appropriate services or objects.

7. **Name Conventions:**
   - A coordinator's name might follow a convention such as ending with "-Manager."
   - Methods within the class should align with this managerial role, focusing on coordination and delegation rather than direct task execution.

8. **Dynamic Context Establishment:**
   - In certain design patterns, coordinators may respond to requests by briefly establishing an appropriate context before delegating to other objects.
   - Methods may involve context setup or configuration before handing over the task to relevant objects.

9. **Elimination of Hard-Wired References:**
   - Coordinators prove their worth by eliminating the need for direct, hard-wired references between objects.
   - Methods should contribute to the flexibility and maintainability of the system by avoiding tightly coupled dependencies.

By evaluating a method against these characteristics, one can determine whether it aligns with the coordinator stereotype and plays a role in managing, delegating, and facilitating communication within the overall system architecture.''',

	"Structurer": '''The structurer stereotype in object-oriented design pertains to a class or object that is responsible for organizing and maintaining relationships between other objects within an application. When evaluating whether a method in a class belongs to the structurer stereotype, consider the following criteria:

1. **Relationship Maintenance:**
   - The primary responsibility of a structurer is to maintain relationships between application objects. Evaluate whether the method is involved in establishing, modifying, or managing relationships between instances of other classes.

2. **Information Organization:**
   - A structurer organizes and structures information by grouping objects in different ways. Examine whether the method contributes to the organization or structuring of information within the system.

3. **Object Pooling and Management:**
   - Check if the method involves pooling, collecting, or managing objects. Structurers are often responsible for handling the lifecycle and storage of objects they organize.

4. **Collaboration and Connectivity:**
   - Assess whether the method collaborates with other objects, especially those responsible for connecting to external databases or devices. Structurers may obtain objects through collaborations with external entities.

5. **Visibility of Objects:**
   - A structurer needs visibility into other objects for a reason. Verify whether the method requires knowledge about and interaction with other objects within the application.

6. **Complex Relationship Management:**
   - If the method is responsible for maintaining complex relationships, it aligns with the structurer stereotype. This includes managing connections and constraints among related objects.

7. **Additional Behavior:**
   - While the primary focus is on relationship maintenance, consider whether the method exhibits any additional behavior such as tracking references, managing space, or providing other relevant information about the structurer's contents.

8. **Permanence and Connectivity Rules:**
   - Recognize that the rules surrounding permanence and connectivity between structurers and their parts may not be straightforward. Debate on composition versus aggregation may not always be conclusive; however, the method should contribute to maintaining the intended relationships.

9. **Business Object Relationships:**
   - In many applications, business objects have intricate structural relationships. If the method deals with the structuring of complex relationships, it aligns with the structurer stereotype.

10. **Content-Centric Perspective:**
    - A structurer is primarily concerned with its contents. Evaluate whether the method's main focus is on the organization and management of the objects it contains.

When classifying a method as belonging to the structurer stereotype, prioritize its role in maintaining relationships between objects, and consider additional behaviors as secondary considerations. The method should contribute to the overarching goal of structuring and organizing information within the application.''',

	"Information Holder": '''The information holder stereotype in object-oriented design encapsulates classes or objects that are primarily responsible for acquiring, storing, and providing information. When evaluating whether a method in a class belongs to the information holder stereotype, consider the following characteristics:

1. **Knowledge Management**: Information holder classes focus on knowing and managing specific facts or data. If a method's primary responsibility is to acquire, store, or manipulate information, it aligns with the information holder stereotype.

2. **Limited Collaboration**: Information holders typically have limited collaboration with other objects, especially for obtaining the information they are responsible for. If a method interacts extensively with other objects to perform actions beyond information management, it might not strictly adhere to the information holder stereotype.

3. **Consistency Maintenance**: Methods within an information holder class should contribute to maintaining the consistency of the stored information. If a method modifies or updates information within the object and ensures its correctness, it aligns with the information holder stereotype.

4. **Independence in Information Retrieval**: Information holder classes often retrieve information internally or from hidden helpers. If a method actively retrieves or processes information from external sources, it may not strictly adhere to the information holder stereotype.

5. **Persistence Management**: In some cases, information holders are responsible for ensuring the persistence of their data. If a method involves tasks related to data persistence, such as saving or loading information, it aligns with the information holder stereotype.

6. **Computation and Interpretation**: Information holders may go beyond simple storage and retrieval, sometimes involving computation or interpretation of the stored facts. If a method within the class performs calculations or interprets data, it remains consistent with the information holder stereotype.

7. **Responsibility for Yielding Information**: Methods in information holder classes should be geared towards yielding information when queried. If a method primarily serves the purpose of providing information in response to requests, it aligns with the information holder stereotype.

8. **Value Holding for Various Applications**: Information holder classes might be designed to hold values relevant to diverse application objects. If a method's purpose is to manage values that can be requested by various parts of the application, it adheres to the information holder stereotype.

By evaluating methods against these characteristics, you can determine the extent to which a class conforms to the information holder stereotype in object-oriented design.''',

	"Service Provider": '''The service provider stereotype in object-oriented design characterizes a class or object that serves as a specialized entity dedicated to performing specific computing services. To evaluate whether a method in a class belongs to the service provider stereotype, consider the following criteria:

1. **Work and Computing Services:**
   - A method within the service provider should primarily focus on performing specific work or offering computing services.

2. **Specialized Responsibilities:**
   - The method should handle responsibilities that require specialized skills or computations. It contributes to the overall functionality of the service provider by executing tasks that demand a distinct set of capabilities.

3. **Single Operation or Activity:**
   - Methods within the service provider should typically be designed to perform a single operation or activity on demand. Each method should have a well-defined purpose and contribute to the overall functionality of the service provider.

4. **Simple Protocol for Defined Operation:**
   - A well-designed method within the service provider should provide a straightforward and clear protocol for a specific operation. It should be easy to set up and use, ensuring simplicity in its implementation.

5. **Factored Design and Specialized Behaviors:**
   - Methods belonging to the service provider stereotype are often part of a highly factored design, where various classes contribute highly specialized behaviors. The method's purpose should align with the specialized nature of the service provider it belongs to.

6. **Configurability and Optional Features:**
   - The method may facilitate optional or configurable software features. It supports the adaptability of the system by allowing users to add or remove entire classes of objects, thus configuring the product's features without directly modifying class behaviors.

By considering these criteria, you can assess whether a method in a class conforms to the service provider stereotype. If the method primarily focuses on specialized computing services, has a well-defined purpose, and aligns with the characteristics associated with service providers, it can be considered part of this stereotype.''',

	"Interfacer": '''The interfacer stereotype in object-oriented design encompasses classes or objects that play a crucial role in facilitating communication and interaction between disparate components within a system. To evaluate whether a method in a class belongs to the interfacer stereotype, one should consider the following characteristics:

1. **Information Transformation:**
   - Methods within an interfacer class should primarily focus on transforming information or requests between different parts of the system. They act as bridges that enable seamless communication and collaboration.

2. **Boundary Responsibilities:**
   - Interfacers are typically found at the boundaries of an object-oriented application. Methods within this stereotype are responsible for managing interactions with users, other programs, or external services, depending on the specific type of interfacer.

3. **Collaboration Patterns:**
   - Methods within an interfacer class may exhibit specific collaboration patterns. For example, user interfacers may collaborate with non-UI parts of the application to signal events or changes, while internal interfacers delegate external requests to objects within their neighborhood.

4. **Encapsulation of Details:**
   - Interfacers should encapsulate many details related to communication protocols, formatting, or conversion of information. Methods within an interfacer class are designed to hide complexity and provide a higher-level, abstract protocol to the interacting components.

5. **Support for Two-Way Communication:**
   - Depending on the specific design requirements, interfacer methods might support ongoing two-way communication with external entities. This could involve handling user requests, transmitting updates, or translating external events into messages for application objects.

6. **Focus on Protocol Definition:**
   - When evaluating a method for the interfacer stereotype, consider whether it contributes to defining the protocol to the outside world. Interfacers often play a role in shaping how the application communicates with users, other programs, or external services.

7. **Responsibility for Bridging:**
   - Interfacers act as bridges between the non-object world and the object world of messages and objects. Methods within an interfacer class should reflect this responsibility by managing the flow of information between different realms of the system.

8. **Stylized Dialogue (for User Interfacers):**
   - In the case of user interfacers, methods may support a highly stylized dialogue between the user and the system. This could involve handling user input, displaying information, and managing the user experience.

By assessing these characteristics, one can determine whether a method aligns with the interfacer stereotype and is effectively contributing to the role of facilitating communication and collaboration in the system.'''
}

print(len(role_stereotypes))

for rs in role_stereotypes:
	print('#', rs)
	print()
	print(role_stereotypes[rs])
	print()

## Experiment on method responsibility stereotypes to support class role stereotypes

In [None]:
rs_prompt_template = '''This is method `{op_name}` of {struct_kind} `{struct_name}`:

```java
/**
 * {comment}
 */
{op_src}
```

Does this method appear to be from each of the method stereotypes below, i.e., is this the main responsibility of the method?

- Decider: invoke method(s) of other objects and perform further actions based on the return value.
- Sequencer: perform repetitive cycles.
- Event-handler: perform action based on specific events or circumstances; method name may start with "on-".
- Event-dispatcher: call the method of event listeners/handlers.
- Forwarder: pass along information from its parameter(s) to another object, as method argument(s).
- Connector: link client-type objects to server-type objects.
- Manager: orchestrate the execution of services provided by other objects.
- Collector: add or remove an object (its parameter) to/from a collection.
- Container: provide information of the state of a collection, e.g., whether it is empty or full, how many objects are in the collection.
- Mapper: map an object to another.
- Finder: perform a search algorithm on a collection of objects.
- Getter: return the value of a field.
- Setter: assign a value from parameter to a field.
- Converter-getter: return the converted or modified value of a field.
- Prober: invoke a field's method and return the result.
- Converter-setter: convert or modify a parameter value before assigning it to a field.
- Multi-setter: assign values from parameter(s) to several fields.
- Validator: check if a parameter value complies to a particular constraint and return a boolean.
- Enforcer: force a parameter value to comply to a particular constraint, e.g., by returning a default value.
- Calculator: perform a complex calculation and return the result.
- Computator: perform a complex computation and return the result.
- Transformer: convert or modify a parameter value and return the result.
- Displayer: set the attributes of UI objects.
- Predicate: return a computed boolean value.
- Complex-constructor: construct an object with a complex configuration and return the constructed object.
- Converter: construct a different representation of the current object (`this`) and return the result.
- Loader: load data from external source, e.g., a file, database, or network connection.
- Storer: store data to external location, e.g., a file, database, or network connection.

Answer as JSON array that contains the stereotypes for which the answer is yes, like so: {json_template}'''
# In one sentence, the purpose of this method is to'''
for pkg_name in sorted(list(packages)):
  print('#', pkg_name)
  print()
  cursor = graph.run('MATCH (p:Container)-[:contains]->(c:Structure)-[:hasScript]->(m:Operation)'
                        f'WHERE p.qualifiedName="{pkg_name}" AND m.kind="method"'
                        'RETURN p.qualifiedName, c.qualifiedName, c.kind, m.simpleName, m.sourceText')
  grouped_node_data = groupby(cursor, lambda x: (x['c.kind'],x['c.qualifiedName']))
  if grouped_node_data:
    for (kind,class_name), methods_data in grouped_node_data:
      print('##', class_name)
      print()
      class_desc = dict()
      class_desc['kind'] = kind
      class_desc['qualifiedName'] = class_name
      class_desc['methods'] = dict()
      if kind == 'enumeration':
        kind = 'enum'
      elif kind == 'abstract':
        kind = 'abstract class'
      for row in methods_data:
        method_name = row['m.simpleName']
        print('###', method_name)
        print()
        print(goals[pkg_name]['classes'][class_name]['methods'][method_name]['description'])
        print()
        prompt = rs_prompt_template.format(
          op_name= method_name, 
          struct_kind= kind, 
          struct_name= class_name, 
          op_src= remove_java_comments(row["m.sourceText"]),
          comment= goals[pkg_name]['classes'][class_name]['methods'][method_name]['description'],
          json_template= '["Decider", "Sequencer"]')
        if only_print_prompt:
          print(prompt)
          print()
        else:
          response = None
          try:
            response = client.chat.completions.create(
              model=model,
              messages=[{
                "role": "user",
                "content": prompt
              }],
              max_tokens=2048,
              temperature=0)
            # method_goal = {'simpleName':row['m.simpleName'],
            #               # 'description':response['choices'][0]['message']['content']}
            #               'description':sentence(response.choices[0].message.content)}
          except:
            # method_goal = {'simpleName':row['m.simpleName'],
            #               'description':"(no description)"}
            pass
          try:
            stereotypes = json.loads(response.choices[0].message.content)
          except:
            stereotypes = [response.choices[0].message.content]
          print(dict_to_pretty_json(stereotypes))
          print()
          goals[pkg_name]['classes'][class_name]['methods'][method_name]['stereotypes'] = stereotypes
      # goals[pkg_name]['classes'][class_name] = class_desc
      # print("  " + str(goals[pkg_name]['classes'][class_name]))
    print(str(goals[pkg_name]))
    print()
print(dict_to_pretty_json(goals))


In [None]:
from collections import Counter

for pkg_name in sorted(goals.keys()):
	print(pkg_name)
	for cls_name in goals[pkg_name]['classes']:
		print(cls_name)
		stereotypes = Counter()
		for met_name in goals[pkg_name]['classes'][cls_name]['methods']:
			stereotypes.update(goals[pkg_name]['classes'][cls_name]['methods'][met_name]['stereotypes'])
		print(stereotypes)
		num_methods = len(goals[pkg_name]['classes'][cls_name]['methods'])
		goals[pkg_name]['classes'][cls_name]['stereotypes'] = {s:stereotypes[s]/num_methods for s in stereotypes}

print(dict_to_pretty_json(goals))

In [None]:
if not only_print_prompt:
  write_dict_to_json(goals, f"{project_name}-goals-rs-{model}.json")



# UNUSED

In [None]:
rs_prompt_template = '''This is method `{op_name}` of {struct_kind} `{struct_name}`:

```java
/**
 * {comment}
 */
{op_src}
```

Does this method appear to be from the abovementioned stereotype? Answer with only YES or NO in the first line, and then the reasoning in the next line.'''
# In one sentence, the purpose of this method is to'''
for pkg_name in sorted(list(packages)):
  print('#', pkg_name)
  cursor = graph.run('MATCH (p:Container)-[:contains]->(c:Structure)-[:hasScript]->(m:Operation)'
                        f'WHERE p.qualifiedName="{pkg_name}" AND m.kind="method"'
                        'RETURN p.qualifiedName, c.qualifiedName, c.kind, m.simpleName, m.sourceText')
  grouped_node_data = groupby(cursor, lambda x: (x['c.kind'],x['c.qualifiedName']))
  if grouped_node_data:
    for (kind,class_name), methods_data in grouped_node_data:
      print('##', class_name)
      class_desc = dict()
      class_desc['kind'] = kind
      class_desc['qualifiedName'] = class_name
      class_desc['methods'] = dict()
      if kind == 'enumeration':
        kind = 'enum'
      elif kind == 'abstract':
        kind = 'abstract class'
      for row in methods_data:
        method_name = row['m.simpleName']
        print('###', method_name)
        for rs in role_stereotypes:
          print('*', rs)
          prompt = rs_prompt_template.format(
            op_name= method_name, 
            struct_kind= kind, 
            struct_name= class_name, 
            op_src= remove_java_comments(row["m.sourceText"]),
            comment= goals[pkg_name]['classes'][class_name]['methods'][method_name]['description'])
          if only_print_prompt:
            print(role_stereotypes[rs] + '\n\n' + prompt)
            print()
          else:
            response = None
            try:
              response = client.chat.completions.create(
                model=model,
                messages=[{
                  "role": "user",
                  "content": role_stereotypes[rs] + '\n\n' + prompt
                }],
                max_tokens=2048,
                temperature=0)
              # method_goal = {'simpleName':row['m.simpleName'],
              #               # 'description':response['choices'][0]['message']['content']}
              #               'description':sentence(response.choices[0].message.content)}
            except:
              # method_goal = {'simpleName':row['m.simpleName'],
              #               'description':"(no description)"}
              pass
            print(response.choices[0].message.content)
      # goals[pkg_name]['classes'][class_name] = class_desc
      # print("  " + str(goals[pkg_name]['classes'][class_name]))
    print(str(goals[pkg_name]))
    print()
print(dict_to_pretty_json(goals))
