In [None]:
pip install ollama


# Load input file

In [None]:
project_title_map = {
        "archives_space": "Archives Space Project",
        "bad_camp": "BADCamp Project",
        "neurohub": "NeuroHub Project",
        "open_spending": "Open Spending Project",
        "planning_poker": "Planning Poker Project",
        "recycling": "Recycling Project"
    }
projects = ["archives_space", "bad_camp", "neurohub", "open_spending", "planning_poker", "recycling"]

selected_project = projects[5] # select which project you want to test with

project_path = "./input_files/user_stories_{}.json".format(selected_project)
with open(project_path, 'r') as file:
        project_content = file.read()
        project_title = project_title_map[selected_project]

# Load model and configure model parameters

In [None]:
import ollama
from ollama import Client

model_version = "llama3.1:8b-instruct-fp16"
#model_version = "llama3.1:70b"
num_ctx = 20480 # context length is higher because of the refinement process
temperature = 0 # should be kept 0 for deterministic results, default value 0.8

options = {"num_ctx": num_ctx, "temperature": temperature}
#model_name = "emir_" + model_version
model_name = model_version
#ollama.create(model=model_name, modelfile=modelfile)

one_shot = False

# Load one-shot example if set True

In [None]:
import json
if one_shot:
    with open('example_reasoning_1.txt', 'r') as file:
        example_run_text = file.read()
        example_run_prompt = f"""
        I will give you an example run for another software project just to show you the reasoning process and the output format. 
        VERY IMPORTANT NOTE: "NEVER COPY THE REASONING GIVEN IN THE EXAMPLE RUN! This Example is for Reference Only, Come Up With Your Own Reasoning for The User Input!"
        - EXAMPLE RUN START - 

        {example_run_text}
        
        - EXAMPLE RUN END -

        """
else:
    example_run_prompt = ""
example_run_prompt


# Get first assessment from model

In [None]:
client = Client(
  headers={}
)
messages = []


generator_system_message = {'role': 'system', 'content': f"""
You are a software architect. Your job is to get a list of categorized user stories with a description, analyze them in detail and
assign a score for each architecture pattern depending on their relevance to the project and 
if it would prove useful in the implementation of the described project.

When the user gives you a list of user stories with a description, analyze the given user stories, give the reasoning for each architecture pattern, and then print out the assigned scores.
Be as objective as possible during your scoring. The decisions need to be deterministic and reproducible, regardless of the order of elements presented or any random factors.

Here are the architecture patterns you will score:
*Layered Architecture 
*Event-Driven Architecture (Pub-Sub Architecture) 
*Microkernel Architecture 
*Microservices Architecture 
*Space-Based Architecture
*Pipeline Architecture (Pipe-Filter Architecture) 
*Client-Server Architecture

Here are the score options and their corresponding meaning:
1: "Completely unsuitable"
2: "Partially suitable"
3: "Moderately applicable"
4: "Well suited"
5: "Perfectly aligned"

IMPORTANT: The final scores must be given in this format after the detailed reasoning for each architecture pattern:

**Layered Architecture**: -integer score value-
**Event-Driven Architecture (Pub-Sub Architecture)**: -integer score value-
**Microkernel Architecture**: -integer score value-
**Microservices Architecture**: -integer score value-
**Space-Based Architecture**: -integer score value-
**Pipeline Architecture (Pipe-Filter Architecture)**: -integer score value-
**Client-Server Architecture**: -integer score value-

{example_run_prompt}

"""}
messages.append(generator_system_message)


messages.append({'role': 'user', 'content': f"""
I will give you a list of categorized user stories and a description created for a software project titled {project_title}. Please analyze them in detail and give me the scoring for each architecture pattern.
The final scores must be given in this format after the detailed reasoning for each architecture pattern:

**Layered Architecture**: -integer score value-
**Event-Driven Architecture (Pub-Sub Architecture)**: -integer score value-
**Microkernel Architecture**: -integer score value-
**Microservices Architecture**: -integer score value-
**Space-Based Architecture**: -integer score value-
**Pipeline Architecture (Pipe-Filter Architecture)**: -integer score value-
**Client-Server Architecture**: -integer score value-

Project Title: {project_title}

Categorized User Stories:

{project_content}

"""})
response = client.chat(model=model_name, messages=messages, options=options)
message = response['message']
print(message['content'])
messages.append(message)


# Self-refinement Process

In [None]:

# Let's capture the model's initial answer in a variable:
current_answer = message['content']  # e.g. the text with reasoning + final scores
evaluator_context = messages.copy() # this context will be used in evaluator messages
eval_system_message = {
        "role": "system",
        "content": """Your task is changed. You are now the Evaluator. 
Your job: By taking the previously given user stories into account, evaluate the given reasoning and scores for each architecture pattern.

Here are the score options and their corresponding meaning:
1: "Completely unsuitable"
2: "Partially suitable"
3: "Moderately applicable"
4: "Well suited"
5: "Perfectly aligned"

- Do not assume any new information about the project. The description and the user stories for the project remain the same.
- Only suggest a refinement for an assigned score when you are sure its score is wrong and it must change. IMPORTANT: If the current score is already reasonable DO NOT make a refinement suggestion.
- Also, go through your previous evaluations to ensure you don’t repeat the same refinement suggestion for an architecture pattern more than once for the same reason.

After your refinement suggestions, on a NEW line, end your response with exactly one of the two markers:
- REFINE (if refinements are needed)
- NO_REFINEMENT (if no refinements are needed)

Do not include any additional text after that marker.
Do not wrap it in quotes.

Important:
- Score can only take the integer values that are given as the score options, no float values.
- If you provide refinement for any of the pattern scores, do not use NO_REFINEMENT.
Because this will stop the whole process and the current scores will be left unrefined.
- Use NO_REFINEMENT only if you have zero refinements to suggest.
"""
}
evaluator_context.append(eval_system_message)

refiner_context = messages.copy() # this context will be used in refiner messages

refine_system_message = {
        "role": "system",
        "content": """Your task is changed. You are now the Refiner. 
You will be given the latest reasoning/scores for the project and the evaluator's feedback for the latest reasoning/scores.
Make changes on the current scores according to the given refinement.
IMPORTANT:  ONLY make a change for a score when the refinement suggestion for that score is reasonable and the assigned score must change. Otherwise keep it unchanged.

When giving your output refined scores, keep the same format in the updated response (reasoning and then the updated scores).
Be as objective as possible during your refinement. The decisions need to be deterministic and reproducible, regardless of the order of elements presented or any random factors.

Here are the score options and their corresponding meaning:
1: "Completely unsuitable"
2: "Partially suitable"
3: "Moderately applicable"
4: "Well suited"
5: "Perfectly aligned"

- Do not assume any new information about the project. The description and the user stories for the project remain the same.
"""
    }
refiner_context.append(refine_system_message)
# We'll define how many refinement loops to allow:
MAX_REFINEMENT_ITERATIONS = 3

refinement_iterations = MAX_REFINEMENT_ITERATIONS # if not set again in the loop, it means the model went through max iterations
for i in range(MAX_REFINEMENT_ITERATIONS):
    # 1) Evaluate the current answer
    
    evaluate_prompt = {
        "role": "user",
        "content": f"""
            Evaluate the current reasoning and scores by taking the original categorized user stories into account and make refinement suggestions for the current answer:

            Current Answer:
            {current_answer}

            Refinement Suggestions:
            """
    }
    evaluator_context.append(evaluate_prompt)
    eval_response = client.chat(model=model_name, messages=evaluator_context, options=options)
    eval_message = eval_response['message']
    eval_feedback = eval_message['content'].strip()
    evaluator_context.append(eval_message)
    
    print(f"\n=== Evaluator Feedback (Iteration {i+1}) ===\n{eval_feedback}\n")
    
    # 2) If the evaluator indicates no changes are needed, break out
    if "NO_REFINEMENT" in eval_feedback:
        print("Evaluator says no changes are needed. Stopping refinement.")
        refinement_iterations = i
        break
    
    # 3) Otherwise, refine
    refine_prompt = {
        "role": "user",
        "content": f"""
            Analyze the previous answer and the feedback of the evaluator, and then refine your previous answer by taking the feedback of the evaluator into account. 
            Keep the same format (reasoning and then the final scores).

            Previous Answer:
            {current_answer}

            Evaluator Feedback:
            {eval_feedback}

            Revised Answer:
            """
    }
    refiner_context.append(refine_prompt)
    
    refine_response = client.chat(model=model_name, messages=refiner_context, options=options)
    refine_message = refine_response['message']
    revised_answer = refine_message['content'].strip()
    refiner_context.append(refine_message)
    
    print(f"=== Refined Answer (Iteration {i+1}) ===\n{revised_answer}\n")
    
    # Update current_answer for potential next iteration
    current_answer = revised_answer

# After the loop, current_answer holds the final refined output:
print("=== Final Refined Scores & Reasoning ===\n", current_answer)

# Summarize the evaluation result

In [None]:
refiner_context.append({'role': 'system', 'content': """
Ok now the evaluation process is finished. take the first assessment and the last assessment from the user. 
And just return the final scores before and after the evaluation-refinement process
following this display format below:

**Layered Architecture**: 1 -> 2
**Event-Driven Architecture (Pub-Sub Architecture)**: 2 -> at
**Microkernel Architecture**: 3 -> 3
**Microservices Architecture**: 4 -> 5
**Space-Based Architecture**: 1 -> 3
**Pipeline Architecture (Pipe-Filter Architecture)**: 2 -> 1
**Client-Server Architecture**: 3 -> 3
"""})

refiner_context.append({'role': 'user', 'content': f"""
read the first assessment and the last assessment, and return the final scores before and after the evaluation-refinement process
following this display format below:

**Layered Architecture**: 1 -> 2
**Event-Driven Architecture (Pub-Sub Architecture)**: 2 -> 2
**Microkernel Architecture**: 3 -> 3
**Microservices Architecture**: 4 -> 5
**Space-Based Architecture**: 1 -> 3
**Pipeline Architecture (Pipe-Filter Architecture)**: 2 -> 1
**Client-Server Architecture**: 3 -> 3


first assessment:
{refiner_context[2]["content"].strip()}

last assessment:
{current_answer.strip()}
"""})
print(refiner_context[-1]["content"])
response = client.chat(model=model_name, messages=refiner_context, options={"num_ctx": num_ctx, "temperature": 0}) # higher temperatures are too varied
message = response['message']
refiner_context.append(message)
print(message["content"])


# Print log to output file

In [None]:
from datetime import datetime
import json
from pathlib import Path
Path("./logs/one-shot").mkdir(parents=True, exist_ok=True)
Path("./logs/zero-shot").mkdir(parents=True, exist_ok=True)

final_data = {
        "modelName": model_name,
        "temperature": temperature,
        "context_limit": num_ctx,
        "projectTitle": project_title,
        "selfRefinement": False,
        "oneShot": one_shot,
        "timestamp": datetime.now().strftime("%Y%m%d_%H%M%S"),
        "lastMessage": messages[-1]["content"],
        "numberOfIterations": 0,
        "messages": [message["content"] for message in messages],
    }
    
# 3) Generate a filename based on model name and current timestamp
if one_shot:
    filename = f"./logs/one-shot/log_{model_version}_{final_data['timestamp']}.json"
else:
    filename = f"./logs/zero-shot/log_{model_version}_{final_data['timestamp']}.json"


# 4) Write the conversation to a JSON file
with open(filename, "w", encoding="utf-8") as f:
    json.dump(final_data, f, indent=2, ensure_ascii=False)

print(f"Assesment complete. The whole conversation is saved to {filename}")