In [50]:
pip install groq scikit-learn openai

Note: you may need to restart the kernel to use updated packages.


# Load input file

In [51]:
import os

project_title_map = {
        "archives_space": "Archives Space Project",
        "neurohub": "NeuroHub Project",
        "open_spending": "Open Spending Project",
        "planning_poker": "Planning Poker Project",
        "recycling": "Recycling Project",
        "color_ide": "ColorIDE Project"
    }
projects = ["archives_space", "neurohub", "open_spending", "planning_poker", "recycling", "color_ide"]

#selected_project = os.environ.get("project")
#file_copy_index = os.environ.get("input_file_copy_index")
selected_project = projects[5]
file_name = "user_stories_{}.json".format(selected_project)
project_path = "./input_files/{}".format(file_name)
with open(project_path, 'r') as file:
        project_content = file.read()
        project_title = project_title_map[selected_project]

# Configure model parameters

In [52]:
import groq
from openai import OpenAI

client = OpenAI(api_key="", base_url="https://generativelanguage.googleapis.com/v1beta/openai/")
model_version = "gemini-2.0-flash"


#client = groq.Client(api_key="")
#model_version = "llama-3.3-70b-versatile"

#client = OpenAI(api_key="")
#model_version = "gpt-4o"

#model_version = "llama3.1:8b-instruct-fp16"

num_ctx = 15000 # context length is higher because of the refinement process
temperature = 0.000000001  # should be kept 0 for deterministic results, default value 0.8
#temperature = 0
top_p = 0.0000001
model_name = model_version

one_shot = False

add_pattern_descriptions = True

# Get analysis messages for each pattern

In [53]:

analysis_list = []
import time

patterns = ["Layered Architecture",
    "Event-Driven Architecture",
    "Microkernel Architecture",
    "Microservices Architecture",
    "Space-Based Architecture",
    "Pipeline Architecture",
    "Client-Server Architecture"]

analyzer_system_message = {'role': 'system', 'content': f"""
You are an expert software architect. Your task is to provide a comprehensive analysis of the suitability of a given architectural pattern for a software project described by user stories.
Consider various aspects, such as scalability, complexity, performance, maintainability, accessibility and any other factors you deem relevant to making a thorough assessment.
Provide a detailed reasoning for each aspect, referencing the relevant user stories.
Think step by step and explain your though process clearly.
After your analysis give a score for the suitability on this scale:

1: "Completely unsuitable"
2: "Partially suitable"
3: "Sufficient for requirements"
4: "Well-suited"
5: "Perfectly aligned"

Here is the format for an example run:

PROJECT TITLE: <title>

CATEGORIZED USER STORIES:

<user stories>

ARCHITECTURE PATTERN: <pattern>

ANALYSIS:

<analysis>

SCORE: <int>
"""}


start_time = time.time()
for pattern in patterns:
  analyzer_context = []
  analyzer_context.append(analyzer_system_message)
  analyzer_context.append({'role': 'user', 'content': f"""
  I will give you a list of categorized user stories and a description created for a software project titled {project_title}. 
  Analyze the following user stories and assess the suitability of the {pattern} architecture for this project.
  PROJECT TITLE: {project_title}

  CATEGORIZED USER STORIES:

  {project_content}

  ARCHITECTURE PATTERN: {pattern}

  ANALYSIS:
  
  """})
  response = client.chat.completions.create(model=model_name, messages=analyzer_context, 
  temperature=temperature, max_completion_tokens=num_ctx)

  message = response.choices[0].message
  print(message.content.strip())
  analysis_list.append(message.content.strip())
end_time = time.time()
analysis_duration = end_time - start_time

PROJECT TITLE: ColorIDE Project

CATEGORIZED USER STORIES:

{
"description": "This concerns ColorIDE, an offline Integrated Development Environment for professional developers built with a core that provides essential editing and debugging capabilities.",
"Usability & Accessibility": [
"As a Developer, I want a clean, intuitive interface with easily navigable menus, so that I can access core features without distraction.",
"As a Developer, I want an extensive set of keyboard shortcuts and quick-access commands, so that I can perform common tasks without relying on the mouse.",
"As a Developer, I want customizable workspace layouts that can be saved and reloaded, so that I can tailor the environment to different projects and personal preferences.",
"As a Developer, I want clear syntax highlighting and inline error markers, so that I can quickly identify and correct coding mistakes.",
"As a Developer, I want integrated code completion and real-time linting, so that I can write code effic

In [54]:
analysis_messages_merged =" - DETAILED ANALYSIS MESSAGES FOR EACH PATTERN -"


for pattern, analysis in zip(patterns, analysis_list):
    analysis_messages_merged = analysis_messages_merged + "\n\n Analysis for {}:\n".format(pattern) + analysis

decider_context = []
decider_system_message = {'role': 'system', 'content': f"""
You are a software architect. You will get a list of categorized user stories with a description and a list of analyses that are related to the suitability of each architecture pattern.
Your job is to re-evaluate the user stories and the given analyses together and come up with a final scoring for the architecture patterns. 
- Be as objective as possible during your reasoning. The analysis needs to be deterministic and reproducible, regardless of the order of elements presented or any random factors.
-Explain your thought process clearly.

Here are the score options and their corresponding meaning, your scores must be an integer from 1 to 5 matching these values:
1: "Completely unsuitable"
2: "Partially suitable"
3: "Sufficient for requirements"
4: "Well-suited"
5: "Perfectly aligned"

The final scores must be given in json format after the detailed reasoning for each architecture pattern:
{{
  "Layered Architecture": <int>,
  "Event-Driven Architecture": <int>,
  "Microkernel Architecture": <int>,
  "Microservices Architecture":<int>,
  "Space-Based Architecture": <int>,
  "Pipeline Architecture": <int>,
  "Client-Server Architecture": <int>
}}


Here would be the format for an example run:

PROJECT TITLE: <title>

CATEGORIZED USER STORIES:

<user stories>

- DETAILED ANALYSIS MESSAGES FOR EACH PATTERN -

<list of analyses>

REASONING:

<reasoning>

FINAL SCORE FORMAT:

{{
  "Layered Architecture": <int>,
  "Event-Driven Architecture": <int>,
  "Microkernel Architecture": <int>,
  "Microservices Architecture":<int>,
  "Space-Based Architecture": <int>,
  "Pipeline Architecture": <int>,
  "Client-Server Architecture": <int>
}}
"""}

decider_context.append(decider_system_message)

decider_user_message = {'role': 'user', 'content': f"""
I will give you a list of categorized user stories and a description created for a software project titled {project_title}.
And then I will give you a list of analyses that are related to the mentioned software architecture patterns.
Please re-evaluate them thoroughly and give me a final scoring in json format.

PROJECT TITLE: {project_title}

CATEGORIZED USER STORIES:

{project_content}

{analysis_messages_merged}

REASONING:

"""}
decider_context.append(decider_user_message)
response = client.chat.completions.create(model=model_name, messages=decider_context, 
  temperature=temperature, max_completion_tokens=num_ctx)  

message = response.choices[0].message
print(message.content.strip())
decider_context.append(message)
end_time = time.time()
decision_duration = end_time - start_time

The ColorIDE project is an offline Integrated Development Environment (IDE) that emphasizes modularity, extensibility, and performance efficiency. The user stories highlight the need for a minimal core with dynamic plugin management, a responsive user interface, and robust security measures. Given these requirements, we need to evaluate each architecture pattern's suitability for the project.

1. **Layered Architecture**: 
   - While it offers clear separation of concerns, which aids maintainability, it may struggle with the dynamic plugin management and performance requirements of ColorIDE. The need for extensive customization and dynamic features suggests that a more flexible architecture might be more appropriate. Thus, it is "Sufficient for requirements" but not optimal.

2. **Event-Driven Architecture**: 
   - This architecture supports modularity and extensibility, allowing components to operate independently and communicate through events. It aligns well with the need for asynch

In [55]:
import json

def parse_json_in_string(s):
    """
    Parses and returns the first JSON object found in the input string.

    Args:
        s (str): The input string that contains at least one JSON object.

    Returns:
        object: The Python representation of the parsed JSON object.

    Raises:
        ValueError: If no JSON object is found or if decoding fails.
    """
    # Find the first occurrence of '{'
    start = s.find('{')
    if start == -1:
         return None

    decoder = json.JSONDecoder()
    try:
        obj, _ = decoder.raw_decode(s, idx=start)
        return obj
    except json.JSONDecodeError as e:
        return None

# Calculate Kappa score

In [56]:
from sklearn.metrics import cohen_kappa_score
architecture_patterns = ["Layered Architecture", "Event-Driven Architecture",
 "Microkernel Architecture", "Microservices Architecture", "Space-Based Architecture", "Pipeline Architecture", "Client-Server Architecture"]

decider_context = [message if isinstance(message, dict) else message.dict() for message in decider_context]

with open('expert_answers.json') as f:
    expert_answers = json.load(f)
llm_answers = parse_json_in_string(decider_context[-1]["content"])
answers = [llm_answers[pattern] for pattern in architecture_patterns]
wck_scores = cohen_kappa_score(expert_answers[selected_project], answers, labels=[1, 2, 3, 4, 5], weights='quadratic')



/var/folders/r9/r79c48353r5cvd6d3tz920_h0000gn/T/ipykernel_60217/526098115.py:5: PydanticDeprecatedSince20: The `dict` method is deprecated; use `model_dump` instead. Deprecated in Pydantic V2.0 to be removed in V3.0. See Pydantic V2 Migration Guide at https://errors.pydantic.dev/2.10/migration/
  decider_context = [message if isinstance(message, dict) else message.dict() for message in decider_context]


# Print log to output file

In [57]:
from datetime import datetime
import json
from pathlib import Path
Path("./logs/separate-analyzer-zero-shot").mkdir(parents=True, exist_ok=True)

final_data = {
        "modelName": model_name,
        "temperature": temperature,
        "context_limit": num_ctx,
        "projectTitle": project_title,
        "file_name": file_name,
        "selfRefinement": True,
        "oneShot": one_shot,
        "wckScore": wck_scores,
        "patternDescriptionsAdded": add_pattern_descriptions,
        "timestamp": datetime.now().strftime("%Y%m%d_%H%M%S"),
        "json_output_parsed": parse_json_in_string(decider_context[-1]["content"]),
        "messages": [message["content"] for message in decider_context],
        "numberOfIterations": 0,
        "maxIterationsAllowed": 0,
        "overallDuration": decision_duration,
        "analysisDuration": analysis_duration
    }
    
# 3) Generate a filename based on model name and current timestamp
if one_shot:
    filename = f"./logs/separate-analyzer-one-shot/log_{model_version}_{final_data['timestamp']}.json"
else:
    filename = f"./logs/separate-analyzer-zero-shot/log_{model_version}_{final_data['timestamp']}.json"
# 4) Write the conversation to a JSON file
with open(filename, "w", encoding="utf-8") as f:
    json.dump(final_data, f, indent=2, ensure_ascii=False)

print(f"Assesment complete. The whole conversation is saved to {filename}")

Assesment complete. The whole conversation is saved to ./logs/separate-analyzer-zero-shot/log_gpt-4o_20250206_010522.json
