In [30]:
import json
import time
from langchain.prompts import FewShotPromptTemplate, PromptTemplate
from langchain.chains import LLMChain
from langchain_openai import ChatOpenAI  # Using OpenAI library
from langchain_mistralai import ChatMistralAI  # Using Mistral library
from langchain.callbacks.base import BaseCallbackHandler  # Import for callback handlers

LLM = "Mistral"

price_per_token = 0.00003  # Example cost per token in dollars

class TokenUsageCallbackHandler(BaseCallbackHandler):
    """
    Custom callback handler to extract token usage from LangChain responses.
    """
    def __init__(self):
        self.token_usage = {}

    def on_llm_end(self, response, **kwargs):
        # Extract token usage from the LLM response
        if "usage" in response:
            self.token_usage = {
                "prompt_tokens": response["usage"].get("prompt_tokens"),
                "completion_tokens": response["usage"].get("completion_tokens"),
                "total_tokens": response["usage"].get("total_tokens"),
                "price_usd": llm.completion_usage.get("total_tokens") * price_per_token
            }
        else:
            print("Token usage information is missing from the response.")

# Load API key
try:
    with open("secrets-master-llm.json") as f:
        if LLM == "Mistral":
            secrets = json.load(f)
            api_key = secrets["mistral_api_key"]
        elif LLM == "OpenAI":
            secrets = json.load(f)
            api_key = secrets["openai_api_key"]
except FileNotFoundError:
    print("File not found.")
    api_key = None
except KeyError:
    print("API Key not found in 'secrets-master-llm.json'.")
    api_key = None
    
# Initialize the LLM model with a callback
callback_handler = TokenUsageCallbackHandler()

# Configure ChatOpenAI for GPT-4
if LLM == "Mistral":
    llm_LangChain = ChatMistralAI(
        model="mistral-large-latest",
        temperature=0, # Creativity level (0=deterministic, 1=creative)
        max_retries=2, 
        mistral_api_key=api_key,
        callbacks=[callback_handler]
    )
    model_name = "mistral-large-latest"  # Manually define the model name
elif LLM == "OpenAI":
    llm_LangChain = ChatOpenAI(
        model="gpt-4-turbo-2024-04-09", 
        temperature=0, # Creativity level (0=deterministic, 1=creative)
        openai_api_key=api_key
    )
    model_name = "gpt-4-turbo-2024-04-09"  # Manually define the model name

# Utility functions
def load_file_content(file_path):
    with open(file_path, 'r') as file:
        return file.read()

def save_to_file(file_path, content):
    with open(file_path, 'w') as file:
        file.write(content)

def save_metadata(file_path, metadata):
    with open(file_path, 'w') as file:
        json.dump(metadata, file, indent=4)

# File paths
metamodel_path = "../../01-02-03_MSE/HEPSYCODE/org.univaq.hepsy/model/hepsy.ecore"
example_model_path = "../../01-02-03_MSE/HEPSYCODE/HEPSYCODE-Models/D1/HEPSY/2024-02-14 18.30 13%20-%20FIRFIRGCD_HPV-representations.aird.hepsy"
xes_trace_path = "../../04_Trace_Parser/D1_HEPSYCODE/XES-MORGAN/2024-02-14 18.30 13%20-%20FIRFIRGCD_HPV-representations.aird.xes"
model_path = "../../01-02-03_MSE/HEPSYCODE/HEPSYCODE-Models/D1/HEPSY/2024-02-13 16.44 00%20-%20DigitalCam%20Nominal-representations.aird.hepsy"
output_xes_path = "D2-HEPSYCODE/XES-MORGAN-LLM-gpt-4-turbo-2024-04-09/2024-02-14 18.30 13%20-%20FIRFIRGCD_HPV-representations.aird.xes"
metadata_path = "D2-HEPSYCODE/XES-MORGAN-LLM-gpt-4-turbo-2024-04-09/2024-02-14 18.30 13%20-%20FIRFIRGCD_HPV-representations.aird.json"

# Load content from files
hepsycode_metamodel = load_file_content(metamodel_path)
hepsycode_example_model = load_file_content(example_model_path)
xes_example_trace = load_file_content(xes_trace_path)
hepsycode_model = load_file_content(model_path)

# Few-shot examples
examples = [
    {
        "input": (
            "Context: We have developed an Electronic Design Automation tool for designing embedded systems called HEPSYCODE. "
            "The tool is built upon Eclipse Ecore metamodels and utilizes Sirius features. "
            "The metamodel is designed to model algorithms, functionalities, and applications of embedded systems as a process network.\n\n"
            "Input Demonstration:\n\n"
            "This is the HEPSY metamodel:\n\n"
            f"{metamodel_path}\n\n"
            "This is an example model based on HEPSYCODE metamodel:\n\n"
            f"{hepsycode_example_model}\n\n"
            "Output Demonstration: This is a XES trace file representing the modeling step done when we created the example model based on HEPSYCODE metamodel:\n\n"
            f"{xes_example_trace}\n\n"
            "Task: Generate the XES trace file from the following HEPSYCODE model, where the model represents another embedded application:\n\n"
            f"{hepsycode_model}"
            "Use the following format:"
            "event StructuredNode processes ADD"
            "event Process name SET"
            "event Port portExtension SET"
            "event Port portExtension SET"
            "event StructuredNode nChannels ADD"
            "event Channel nFrom SET"
            "event Channel nTo SET"
            "event StructuredNode nChannels ADD"
            "event Channel nFrom SET"
            "event Channel nTo SET"
            "event StructuredNode nChannels ADD"
            "Write only the events, not other information in output"
            "Do not add comments"
            "Do not add double quotation marks at the beginning and end of the  XES trace"      
        ),
        "output": (
            "<XES TRACE>"
        )
    }
]

# Template for single example
example_prompt_template = PromptTemplate(
    input_variables=["input", "output"],
    template="{input}\nOutput: {output}\n"
)

# Full prompt template
prompt = FewShotPromptTemplate(
    examples=examples,
    example_prompt=example_prompt_template,
    prefix="",
    suffix=(
        f"This is the HEPSYCODE metamodel:\n\n{hepsycode_metamodel}\n\n"
        f"This is an example model:\n\n{hepsycode_example_model}\n\n"
        f"Generate an XES trace file for the following model:\n\n{hepsycode_model}\nOutput:"
    ),
    input_variables=["hepsycode_metamodel", "hepsycode_example_model", "hepsycode_model"]
)

# Prepare input data
input_data = {
    "hepsycode_metamodel": hepsycode_metamodel,
    "hepsycode_example_model": hepsycode_example_model,
    "hepsycode_model": hepsycode_model
}

# Create the chain and run
llm_chain = LLMChain(llm=llm_LangChain, prompt=prompt)

# Run the chain and measure time
start_time = time.time()
response = llm_chain.run(input_data)
end_time = time.time()

# Extract metadata
xes_trace = response.strip()

# Extract metadata provided by LLM if available
'''
metadata = {
    #"output": response["text"],  # Generated output
    "response_length": len(xes_trace),
    "temperature": 0.7,
    "usage": {
        "prompt_tokens": llm_LangChain.completion_usage.get("prompt_tokens"),
        "completion_tokens": llm_LangChain.completion_usage.get("completion_tokens"),
        "total_tokens": llm_LangChain.completion_usage.get("total_tokens"),
    },
    "run_time": end_time - start_time,  # Runtime in seconds
    "model_name": model_name,  # Model used
    "price_per_token": price_per_token,
    "price_usd": llm_LangChain.completion_usage.get("total_tokens") * price_per_token,
}
'''

# Extract metadata
metadata = {
    # "output": response,  # Generated output
    "response_length": len(response),
    "temperature": 0.7,
    "usage": callback_handler.token_usage,  # Token usage metadata
    "run_time": end_time - start_time,  # Runtime in seconds
    "model_name": model_name  # Model used
}

# Save output and metadata
save_to_file(output_xes_path, xes_trace)
save_metadata(metadata_path, metadata)

print("Model Response saved to:", output_xes_path)
print("Metadata saved to:", metadata_path)

Token usage information is missing from the response.
Model Response saved to: D2-HEPSYCODE/XES-MORGAN-LLM-gpt-4-turbo-2024-04-09/2024-02-14 18.30 13%20-%20FIRFIRGCD_HPV-representations.aird.xes
Metadata saved to: D2-HEPSYCODE/XES-MORGAN-LLM-gpt-4-turbo-2024-04-09/2024-02-14 18.30 13%20-%20FIRFIRGCD_HPV-representations.aird.json


In [32]:
import json
import time
from langchain.prompts import FewShotPromptTemplate, PromptTemplate
from langchain.chains import LLMChain
from langchain_openai import ChatOpenAI
from langchain_mistralai import ChatMistralAI
from langchain.callbacks.base import BaseCallbackHandler

# Model configuration and cost per token
LLM = "Mistral"  # Change to "OpenAI" for GPT-4
PRICE_PER_TOKEN = 0.00003  # Cost per token in USD

class TokenUsageCallbackHandler(BaseCallbackHandler):
    """
    Callback to track token usage.
    """
    def __init__(self):
        self.token_usage = {}

    def on_llm_end(self, response, **kwargs):
        if "usage" in response:
            self.token_usage = {
                "prompt_tokens": response["usage"].get("prompt_tokens", 0),
                "completion_tokens": response["usage"].get("completion_tokens", 0),
                "total_tokens": response["usage"].get("total_tokens", 0),
                "price_usd": response["usage"].get("total_tokens", 0) * PRICE_PER_TOKEN
            }
        else:
            print("Token usage information is missing from the response.")

# Load API key
def load_api_key():
    try:
        with open("secrets-master-llm.json") as f:
            secrets = json.load(f)
            return secrets.get(f"{LLM.lower()}_api_key")
    except (FileNotFoundError, KeyError) as e:
        print(f"Error loading API Key: {e}")
        return None

api_key = load_api_key()

# LLM configuration
callback_handler = TokenUsageCallbackHandler()
llm_LangChain = None
model_name = ""

if LLM == "Mistral":
    llm_LangChain = ChatMistralAI(
        model="mistral-large-latest",
        temperature=0.7,  # Consistent with metadata
        max_retries=2,
        mistral_api_key=api_key,
        callbacks=[callback_handler]
    )
    model_name = "mistral-large-latest"
elif LLM == "OpenAI":
    llm_LangChain = ChatOpenAI(
        model="gpt-4-turbo-2024-04-09",
        temperature=0.7,  # Consistent with metadata
        openai_api_key=api_key
    )
    model_name = "gpt-4-turbo-2024-04-09"

# Utility functions for file handling
def load_file_content(file_path):
    try:
        with open(file_path, 'r') as file:
            return file.read()
    except FileNotFoundError:
        print(f"Error: File {file_path} not found.")
        return ""

def save_to_file(file_path, content):
    with open(file_path, 'w') as file:
        file.write(content)

def save_metadata(file_path, metadata):
    with open(file_path, 'w') as file:
        json.dump(metadata, file, indent=4)

# File paths
metamodel_path = "../../01-02-03_MSE/HEPSYCODE/org.univaq.hepsy/model/hepsy.ecore"
example_model_path = "../../01-02-03_MSE/HEPSYCODE/HEPSYCODE-Models/D1/HEPSY/2024-02-14 18.30 13%20-%20FIRFIRGCD_HPV-representations.aird.hepsy"
xes_trace_path = "../../04_Trace_Parser/D1_HEPSYCODE/XES-MORGAN/2024-02-14 18.30 13%20-%20FIRFIRGCD_HPV-representations.aird.xes"
model_path = "../../01-02-03_MSE/HEPSYCODE/HEPSYCODE-Models/D1/HEPSY/2024-02-13 16.44 00%20-%20DigitalCam%20Nominal-representations.aird.hepsy"
output_xes_path = "D2-HEPSYCODE/XES-MORGAN-LLM-gpt-4-turbo-2024-04-09/2024-02-14 18.30 13%20-%20FIRFIRGCD_HPV-representations.aird.xes"
metadata_path = "D2-HEPSYCODE/XES-MORGAN-LLM-gpt-4-turbo-2024-04-09/2024-02-14 18.30 13%20-%20FIRFIRGCD_HPV-representations.aird.json"

# Load file contents
hepsycode_metamodel = load_file_content(metamodel_path)
hepsycode_example_model = load_file_content(example_model_path)
xes_example_trace = load_file_content(xes_trace_path)
hepsycode_model = load_file_content(model_path)

# Few-shot examples
context = (
    "We have developed an Electronic Design Automation tool for designing embedded systems called HEPSYCODE. "
    "The tool is built upon Eclipse Ecore metamodels and utilizes Sirius features. "
    "The metamodel is designed to model algorithms, functionalities, and applications as a process network."
)

examples = [
    {
        "system": (
            "You are an expert in embedded systems design with deep knowledge of using Electronic Design"
            "Automation (EDA) tools at the system level. Your role is to assist designers and engineers" 
            "in developing system-level models for complex embedded applications."
            "- You must provide precise and detailed solutions following best practices in embedded systems engineering."
            "- You are expected to explain and implement concepts such as system-level simulation,"
            "hardware/software co-design, design space exploration, and event-driven or cycle-accurate modeling."
            "- You are proficient with tools like SystemC, MATLAB/Simulink, and similar frameworks, and know"
            "how to use them to describe functional behavior, system architecture, and constraints."
            "- Your approach should focus on verification, validation, and design optimization, "
            "considering parameters like performance, power consumption, cost, and time-to-market."
            "- You must be clear and concise, capable of providing practical examples and guidance on "
            "solving specific problems or optimizing the design workflow."
            "Ensure your communication is professional and tailored to a technical audience."
        ),
        "user": (
            f"This is the HEPSY metamodel:\n\n{hepsycode_metamodel}\n\n"
            f"This is an example model based on HEPSYCODE metamodel:\n\n{hepsycode_example_model}\n\n"
            f"This is a XES trace file representing the modeling step:\n\n{xes_example_trace}"
        )
    }
]

# Prompt template
example_prompt_template = PromptTemplate(
    input_variables=["user", "system"],
    template="User: {user}\nSystem: {system}\n"
)

# Full prompt
prompt = FewShotPromptTemplate(
    examples=examples,
    example_prompt=example_prompt_template,
    prefix=f"Context:\n{context}\n\n",
    suffix=(
        f"Generate an XES trace file for the following model:\n\n"
        "{hepsycode_model}\nOutput:"
        "Use the following format:"
        "event StructuredNode processes ADD"
        "event Process name SET"
        "event Port portExtension SET"
        "event Port portExtension SET"
        "event StructuredNode nChannels ADD"
        "event Channel nFrom SET"
        "event Channel nTo SET"
        "event StructuredNode nChannels ADD"
        "event Channel nFrom SET"
        "event Channel nTo SET"
        "event StructuredNode nChannels ADD"
        "Write only the events, not other information in output"
        "Do not add comments"
        "Do not add double quotation marks at the beginning and end of the  XES trace"   
    ),
    input_variables=["hepsycode_model"]
)

# Create and run the chain
llm_chain = LLMChain(llm=llm_LangChain, prompt=prompt)
start_time = time.time()
response = llm_chain.run({"hepsycode_model": hepsycode_model})
end_time = time.time()

# Extract and save results
xes_trace = response.strip()
metadata = {
    "response_length": len(xes_trace),
    "temperature": 0.7,
    "usage": callback_handler.token_usage,
    "run_time": end_time - start_time,
    "model_name": model_name
}

save_to_file(output_xes_path, xes_trace)
save_metadata(metadata_path, metadata)

print("Model response saved to:", output_xes_path)
print("Metadata saved to:", metadata_path)

Token usage information is missing from the response.
Model response saved to: D2-HEPSYCODE/XES-MORGAN-LLM-gpt-4-turbo-2024-04-09/2024-02-14 18.30 13%20-%20FIRFIRGCD_HPV-representations.aird.xes
Metadata saved to: D2-HEPSYCODE/XES-MORGAN-LLM-gpt-4-turbo-2024-04-09/2024-02-14 18.30 13%20-%20FIRFIRGCD_HPV-representations.aird.json


# GAN

In [None]:
import ollama
from ollama import chat
from ollama import ChatResponse
import random
from sklearn.model_selection import train_test_split
from sklearn.metrics.pairwise import cosine_similarity
import numpy as np

# LLM generator using Ollama
"""
def LLM_generator(prompt):
    try:
        response = ollama.chat(
            model="llama3.2",  # Specifica il modello da usare
            prompt=f"You are a helpful assistant for generating synthetic data.\nUser: {prompt}"
        )
        return response.get('content', '')  # Estrai il contenuto della risposta
    except Exception as e:
        print(f"Error in LLM_generator: {e}")
        return ""
"""

"""
def LLM_generator(prompt):
    try:
        response: ChatResponse = chat(model='llama3.2', messages=[
          {
            'role': 'user',
            'content': '{prompt}',
          },
        ])
        return response['message']['content']
    except Exception as e:
        print(f"Error in LLM_generator: {e}")
        return ""
"""

def LLM_generator(prompt):
    print(prompt)
    try:
        response = chat(
            model='llama3.2', 
            messages=[{'role': 'system', 'content': 'You are a helpful assistant for generating synthetic data.'}, 
                      {'role': 'user', 'content': 'Generate random numbers: {prompt}'}]
        )
        return response['message']['content']
    except Exception as e:
        print(f"Error in LLM_generator: {e}")
        return ""

# LLM optimizer using Ollama
def LLM_optimizer(instruction):
    print(instruction)
    try:
        response = chat(
            model='llama3.2', 
            messages=[{'role': 'system', 'content': 'You are a helpful assistant for generating synthetic data.'}, 
                      {'role': 'user', 'content': 'Generate random numbers: {prompt}'}]
        )
        return response['message']['content']
    except Exception as e:
        print(f"Error in LLM_optimizer: {e}")
        return ""

# Embedder to convert text to numeric vectors
def text_to_vector(text):
    try:
        return np.array([ord(char) for char in text])  # Simple embedding using ASCII values
    except Exception as e:
        print(f"Error in text_to_vector: {e}")
        return np.array([])

# Discriminator using cosine similarity
class Discriminator:
    def __init__(self):
        self.real_data = []

    def fit(self, X, y):
        self.real_data = [text_to_vector(x) for x, label in zip(X, y) if label == 1]

    def predict(self, X):
        predictions = []
        for x in X:
            x_vec = text_to_vector(x)
            if len(x_vec) == 0:  # Skip invalid vectors
                predictions.append(0)
                continue
            if self.real_data:
                similarities = []
                for real in self.real_data:
                    if len(real) == len(x_vec):  # Ensure vectors are of the same length
                        similarity = cosine_similarity(x_vec.reshape(1, -1), real.reshape(1, -1))[0][0]
                        similarities.append(similarity)
                avg_similarity = np.mean(similarities) if similarities else 0
                predictions.append(1 if avg_similarity > 0.5 else 0)
            else:
                predictions.append(0)
        return predictions

# Calculate accuracy
def get_accuracy(predictions, labels):
    return sum([p == l for p, l in zip(predictions, labels)]) / len(labels)

# Main function
def optimize_theta(theta):
    theta_score_pairs = []
    discriminator = Discriminator()

    for _ in range(max_epoch):
        for batch in D_train:
            # 1. Generate synthetic data
            prompt = f"Generate synthetic data for: {theta} + {batch}"
            D_syn = [LLM_generator(prompt) for _ in range(len(D_train))]

            # 2. Run the discriminator
            labels_syn = [0] * len(D_syn)
            labels_train = [1] * len(D_train)
            train, test, train_label, test_label = train_test_split(
                D_train + D_syn, labels_train + labels_syn, test_size=0.2
            )
            discriminator.fit(train, train_label)
            predictions = discriminator.predict(test)
            score = get_accuracy(predictions, test_label)

            # 3. Optimize theta
            optimization_instruction = f"Optimize theta given the following scores: {theta_score_pairs}"
            theta_score_pairs.append((theta, score))
            theta = LLM_optimizer(optimization_instruction)

    return theta

# Simulated input data
D_train = ["1 2 3", "4 5 6", "7 8 9"]
max_epoch = 5

# Run the optimization
initial_theta = "Initial theta"
optimized_theta = optimize_theta(initial_theta)
print("Optimized theta:", optimized_theta)

In [45]:
import ollama
from ollama import chat
from ollama import ChatResponse
import random
from sklearn.model_selection import train_test_split
from sklearn.metrics.pairwise import cosine_similarity
import numpy as np
from langchain.llms import Ollama

import json
import time
from langchain.prompts import FewShotPromptTemplate, PromptTemplate
from langchain.chains import LLMChain
from langchain_openai import ChatOpenAI
from langchain_mistralai import ChatMistralAI
from langchain.callbacks.base import BaseCallbackHandler

# Model configuration and cost per token
LLM = "Mistral"  # Change to "OpenAI" for GPT-4, Ollama or Mistral
PRICE_PER_TOKEN = 0.00003  # Cost per token in USD

class TokenUsageCallbackHandler(BaseCallbackHandler):
    """
    Callback to track token usage.
    """
    def __init__(self):
        self.token_usage = {}

    def on_llm_end(self, response, **kwargs):
        if "usage" in response:
            self.token_usage = {
                "prompt_tokens": response["usage"].get("prompt_tokens", 0),
                "completion_tokens": response["usage"].get("completion_tokens", 0),
                "total_tokens": response["usage"].get("total_tokens", 0),
                "price_usd": response["usage"].get("total_tokens", 0) * PRICE_PER_TOKEN
            }
        else:
            print("Token usage information is missing from the response.")

# Load API key
def load_api_key():
    try:
        with open("secrets-master-llm.json") as f:
            secrets = json.load(f)
            return secrets.get(f"{LLM.lower()}_api_key")
    except (FileNotFoundError, KeyError) as e:
        print(f"Error loading API Key: {e}")
        return None

api_key = load_api_key()

# LLM configuration
callback_handler = TokenUsageCallbackHandler()
llm_LangChain = None
model_name = ""

if LLM == "Mistral":
    llm_LangChain = ChatMistralAI(
        model="mistral-large-latest",
        temperature=0.7,  # Consistent with metadata
        max_retries=2,
        mistral_api_key=api_key,
        callbacks=[callback_handler]
    )
    model_name = "mistral-large-latest"
elif LLM == "OpenAI":
    llm_LangChain = ChatOpenAI(
        model="gpt-4-turbo-2024-04-09",
        temperature=0.7,  # Consistent with metadata
        openai_api_key=api_key
    )
    model_name = "gpt-4-turbo-2024-04-09"
elif LLM == "Ollama":
    llm_LangChain = Ollama(
        model="llama3.2-32k",  # Replace with the model name you want to use
        base_url="http://localhost:11434"  # Default Ollama API URL
    )

# Utility functions for file handling
def load_file_content(file_path):
    try:
        with open(file_path, 'r') as file:
            return file.read()
    except FileNotFoundError:
        print(f"Error: File {file_path} not found.")
        return ""

def save_to_file(file_path, content):
    with open(file_path, 'w') as file:
        file.write(content)

def save_metadata(file_path, metadata):
    with open(file_path, 'w') as file:
        json.dump(metadata, file, indent=4)

# File paths
metamodel_path = "../../01-02-03_MSE/HEPSYCODE/org.univaq.hepsy/model/hepsy.ecore"
example_model_path = "../../01-02-03_MSE/HEPSYCODE/HEPSYCODE-Models/D1/HEPSY/2024-02-14 18.30 13%20-%20FIRFIRGCD_HPV-representations.aird.hepsy"
example_xes_trace_path = "../../04_Trace_Parser/D1_HEPSYCODE/XES-MORGAN/2024-02-14 18.30 13%20-%20FIRFIRGCD_HPV-representations.aird.xes"
model_path = "../../01-02-03_MSE/HEPSYCODE/HEPSYCODE-Models/D1/HEPSY/2024-02-13 16.44 00%20-%20DigitalCam%20Nominal-representations.aird.hepsy"
output_xes_path = "D2-HEPSYCODE/XES-MORGAN-LLM-gpt-4-turbo-2024-04-09/2024-02-14 18.30 13%20-%20FIRFIRGCD_HPV-representations.aird.xes"
metadata_path = "D2-HEPSYCODE/XES-MORGAN-LLM-gpt-4-turbo-2024-04-09/2024-02-14 18.30 13%20-%20FIRFIRGCD_HPV-representations.aird.json"

# Load file contents
hepsycode_metamodel = load_file_content(metamodel_path)
hepsycode_example_model = load_file_content(example_model_path)
xes_example_trace = load_file_content(example_xes_trace_path)
hepsycode_model = load_file_content(model_path)

# Few-shot examples
context = (
    "We have developed an Electronic Design Automation tool for designing embedded systems called HEPSYCODE. "
    "The tool is built upon Eclipse Ecore metamodels and utilizes Sirius features. "
    "The metamodel is designed to model algorithms, functionalities, and applications as a process network."
)

examples = [
    {
        "system": (
            "You are an expert in embedded systems design with deep knowledge of using Electronic Design"
            "Automation (EDA) tools at the system level. Your role is to assist designers and engineers" 
            "in developing system-level models for complex embedded applications.\n"
            "You must provide precise and detailed solutions following best practices in embedded systems engineering.\n"
            "You are expected to explain and implement concepts such as system-level simulation,"
            "hardware/software co-design, design space exploration, and event-driven or cycle-accurate modeling.\n"
            "You are proficient with tools like SystemC, MATLAB/Simulink, and similar frameworks, and know"
            "how to use them to describe functional behavior, system architecture, and constraints.\n"
            "Your approach should focus on verification, validation, and design optimization, "
            "considering parameters like performance, power consumption, cost, and time-to-market.\n"
            "You must be clear and concise, capable of providing practical examples and guidance on "
            "solving specific problems or optimizing the design workflow.\n"
            "Ensure your communication is professional and tailored to a technical audience.\n"
        ),
        "user": (
            f"This is the HEPSY metamodel:\n\n{hepsycode_metamodel}\n\n"
            f"This is an example model based on HEPSYCODE metamodel:\n\n{hepsycode_example_model}\n\n"
            f"This is a XES trace file representing the modeling step:\n\n{xes_example_trace}"
        )
    }
]

# Prompt template
example_prompt_template = PromptTemplate(
    input_variables=["user", "system"],
    template="System: {system}\nUser: {user}\n"
)

# Full prompt
prompt = FewShotPromptTemplate(
    examples=examples,
    example_prompt=example_prompt_template,
    prefix=f"Context:\n{context}\n\n",
    suffix=(
        f"Generate an XES trace file for the following model:\n\n{hepsycode_model}\nOutput:"
        "Use the following format:"
        "event StructuredNode processes ADD\n"
        "event Process name SET\n"
        "event Port portExtension SET\n"
        "event Port portExtension SET\n"
        "event StructuredNode nChannels ADD\n"
        "event Channel nFrom SET\n"
        "event Channel nTo SET\n"
        "event StructuredNode nChannels ADD\n"
        "event Channel nFrom SET\n"
        "event Channel nTo SET\n"
        "event StructuredNode nChannels ADD\n"
        "Write only the events, not other information in output"
        "Do not add comments"
        "Do not add double quotation marks at the beginning and end of the XES trace"   
    ),
    input_variables=["hepsycode_model"]
)

# Create and run the chain
llm_chain = LLMChain(llm=llm_LangChain, prompt=prompt)
start_time = time.time()
response = llm_chain.run({"hepsycode_model": hepsycode_model})
end_time = time.time()

# Extract and save results
xes_trace = response.strip()
metadata = {
    "response_length": len(xes_trace),
    "temperature": 0.7,
    "usage": callback_handler.token_usage,
    "run_time": end_time - start_time,
    "model_name": model_name
}

save_to_file(output_xes_path, xes_trace)
save_metadata(metadata_path, metadata)

print("Model response saved to:", output_xes_path)
print("Metadata saved to:", metadata_path)

Token usage information is missing from the response.
Model response saved to: D2-HEPSYCODE/XES-MORGAN-LLM-gpt-4-turbo-2024-04-09/2024-02-14 18.30 13%20-%20FIRFIRGCD_HPV-representations.aird.xes
Metadata saved to: D2-HEPSYCODE/XES-MORGAN-LLM-gpt-4-turbo-2024-04-09/2024-02-14 18.30 13%20-%20FIRFIRGCD_HPV-representations.aird.json


# HEPSYCODE Few Shot in Context learning

In [47]:
import os
import json
import time
from langchain.prompts import FewShotPromptTemplate, PromptTemplate
from langchain.chains import LLMChain
from langchain_openai import ChatOpenAI
from langchain_mistralai import ChatMistralAI
from langchain.llms import Ollama
from langchain.callbacks.base import BaseCallbackHandler

# Model configuration and cost per token
LLM = "Mistral"  # Change to "OpenAI", "Ollama", or "Mistral"
PRICE_PER_TOKEN = 0.00003  # Cost per token in USD

class TokenUsageCallbackHandler(BaseCallbackHandler):
    """
    Callback to track token usage.
    """
    def __init__(self):
        self.token_usage = {}

    def on_llm_end(self, response, **kwargs):
        if "usage" in response:
            self.token_usage = {
                "prompt_tokens": response["usage"].get("prompt_tokens", 0),
                "completion_tokens": response["usage"].get("completion_tokens", 0),
                "total_tokens": response["usage"].get("total_tokens", 0),
                "price_usd": response["usage"].get("total_tokens", 0) * PRICE_PER_TOKEN
            }
        else:
            print("Token usage information is missing from the response.")

# Load API key
def load_api_key():
    try:
        with open("secrets-master-llm.json") as f:
            secrets = json.load(f)
            return secrets.get(f"{LLM.lower()}_api_key")
    except (FileNotFoundError, KeyError) as e:
        print(f"Error loading API Key: {e}")
        return None

api_key = load_api_key()

# LLM configuration
callback_handler = TokenUsageCallbackHandler()
llm_LangChain = None
model_name = ""

if LLM == "Mistral":
    llm_LangChain = ChatMistralAI(
        model="mistral-large-latest",
        temperature=0.7,
        max_retries=2,
        mistral_api_key=api_key,
        callbacks=[callback_handler]
    )
    model_name = "mistral-large-latest"
elif LLM == "OpenAI":
    llm_LangChain = ChatOpenAI(
        model="gpt-4-turbo-2024-04-09",
        temperature=0.7,
        openai_api_key=api_key
    )
    model_name = "gpt-4-turbo-2024-04-09"
elif LLM == "Ollama":
    llm_LangChain = Ollama(
        model="llama3.2-32k",
        base_url="http://localhost:11434"
    )
    model_name = "llama3.2-32k"
 
# File paths
metamodel_path = "../../01-02-03_MSE/HEPSYCODE/org.univaq.hepsy/model/hepsy.ecore"
example_model_path = "../../01-02-03_MSE/HEPSYCODE/HEPSYCODE-Models/D1/HEPSY/2024-02-14 18.30 13%20-%20FIRFIRGCD_HPV-representations.aird.hepsy"
example_xes_trace_path = "../../04_Trace_Parser/D1_HEPSYCODE/XES-MORGAN/2024-02-14 18.30 13%20-%20FIRFIRGCD_HPV-representations.aird.xes"

# Base paths
base_model_path = "../../01-02-03_MSE/HEPSYCODE/HEPSYCODE-Models/D1/HEPSY/"
base_output_dir = f"D2-HEPSYCODE/XES-MORGAN-LLM-{model_name.lower()}"

# Ensure output directory exists
os.makedirs(base_output_dir, exist_ok=True)

# Utility functions
def load_file_content(file_path):
    try:
        with open(file_path, 'r') as file:
            return file.read()
    except FileNotFoundError:
        print(f"Error: File {file_path} not found.")
        return ""

def save_to_file(file_path, content):
    with open(file_path, 'w') as file:
        file.write(content)

def save_metadata(file_path, metadata):
    with open(file_path, 'w') as file:
        json.dump(metadata, file, indent=4)

# Few-shot examples
context = (
    "We have developed an Electronic Design Automation tool for designing embedded systems called HEPSYCODE. "
    "The tool is built upon Eclipse Ecore metamodels and utilizes Sirius features. "
    "The metamodel is designed to model algorithms, functionalities, and applications as a process network."
)

examples = [
    {
        "system": (
            "You are an expert in embedded systems design with deep knowledge of using Electronic Design"
            "Automation (EDA) tools at the system level. Your role is to assist designers and engineers" 
            "in developing system-level models for complex embedded applications.\n"
            "You must provide precise and detailed solutions following best practices in embedded systems engineering.\n"
            "You are expected to explain and implement concepts such as system-level simulation,"
            "hardware/software co-design, design space exploration, and event-driven or cycle-accurate modeling.\n"
            "You are proficient with tools like SystemC, MATLAB/Simulink, and similar frameworks, and know"
            "how to use them to describe functional behavior, system architecture, and constraints.\n"
            "Your approach should focus on verification, validation, and design optimization, "
            "considering parameters like performance, power consumption, cost, and time-to-market.\n"
            "You must be clear and concise, capable of providing practical examples and guidance on "
            "solving specific problems or optimizing the design workflow.\n"
            "Ensure your communication is professional and tailored to a technical audience.\n"
        ),
        "user": (
            f"This is the HEPSY metamodel:\n\n{hepsycode_metamodel}\n\n"
            f"This is an example model based on HEPSYCODE metamodel:\n\n{hepsycode_example_model}\n\n"
            f"This is a XES trace file representing the modeling step:\n\n{xes_example_trace}"
        )
    }
]

# Prompt template
example_prompt_template = PromptTemplate(
    input_variables=["user", "system"],
    template="System: {system}\nUser: {user}\n"
)

# Full prompt
prompt = FewShotPromptTemplate(
    examples=examples,
    example_prompt=example_prompt_template,
    prefix=f"Context:\n{context}\n\n",
    suffix=(
        "Generate an XES trace file for the following model:\n\n{hepsycode_model}\nOutput:\n"
        "event StructuredNode processes ADD\n"
        "event Process name SET\n"
        "event Port portExtension SET\n"
        "event Port portExtension SET\n"
        "event StructuredNode nChannels ADD\n"
        "event Channel nFrom SET\n"
        "event Channel nTo SET\n"
        "Write only the events, not other information in output."
    ),
    input_variables=["hepsycode_model"]
)

# Process all .hepsy files
for file_name in os.listdir(base_model_path):
    if file_name.endswith(".hepsy"):
        # File paths
        input_file_path = os.path.join(base_model_path, file_name)
        output_xes_path = os.path.join(base_output_dir, file_name.replace(".hepsy", ".xes"))
        metadata_path = os.path.join(base_output_dir, file_name.replace(".hepsy", ".json"))

        # Load example file contents
        hepsycode_metamodel = load_file_content(metamodel_path)
        hepsycode_example_model = load_file_content(example_model_path)
        xes_example_trace = load_file_content(example_xes_trace_path)
        # Load the model content
        hepsycode_model = load_file_content(input_file_path)

        # Create and run the chain
        llm_chain = LLMChain(llm=llm_LangChain, prompt=prompt)
        start_time = time.time()
        response = llm_chain.run({"hepsycode_model": hepsycode_model})
        end_time = time.time()

        # Extract and save results
        xes_trace = response.strip()
        metadata = {
            "response_length": len(xes_trace),
            "temperature": 0.7,
            "usage": callback_handler.token_usage,
            "run_time": end_time - start_time,
            "model_name": model_name
        }

        save_to_file(output_xes_path, xes_trace)
        save_metadata(metadata_path, metadata)

        print(f"Processed: {file_name}")
        print(f"XES saved to: {output_xes_path}")
        print(f"Metadata saved to: {metadata_path}")


Token usage information is missing from the response.
Processed: 2024-02-13 16.44 00%20-%20DigitalCam%20Nominal-representations.aird.hepsy
XES saved to: D2-HEPSYCODE/XES-MORGAN-LLM-mistral-large-latest\2024-02-13 16.44 00%20-%20DigitalCam%20Nominal-representations.aird.xes
Metadata saved to: D2-HEPSYCODE/XES-MORGAN-LLM-mistral-large-latest\2024-02-13 16.44 00%20-%20DigitalCam%20Nominal-representations.aird.json
Token usage information is missing from the response.
Processed: 2024-02-13 16.59 00%20-%20DigitalCam%20Parallel-representations.aird.hepsy
XES saved to: D2-HEPSYCODE/XES-MORGAN-LLM-mistral-large-latest\2024-02-13 16.59 00%20-%20DigitalCam%20Parallel-representations.aird.xes
Metadata saved to: D2-HEPSYCODE/XES-MORGAN-LLM-mistral-large-latest\2024-02-13 16.59 00%20-%20DigitalCam%20Parallel-representations.aird.json
Token usage information is missing from the response.
Processed: 2024-02-13 17.34 01%20-%20FIRFIRGCD-representations.aird.hepsy
XES saved to: D2-HEPSYCODE/XES-MORGAN-L

# HEPSYCODE RAG

In [62]:
import os
import json
import time
from langchain.prompts import FewShotPromptTemplate, PromptTemplate
from langchain.chains import LLMChain
from langchain_openai import ChatOpenAI
from langchain_mistralai import ChatMistralAI
from langchain.llms import Ollama
from langchain.callbacks.base import BaseCallbackHandler
from langchain.vectorstores import FAISS
from langchain.document_loaders import UnstructuredURLLoader
from langchain.text_splitter import RecursiveCharacterTextSplitter
from langchain.embeddings.openai import OpenAIEmbeddings
# from langchain.chains.retrieval_qa import RetrievalQA
from langchain.chains import ConversationalRetrievalChain

# Model configuration and cost per token
LLM = "Mistral"  # Change to "OpenAI", "Ollama", or "Mistral"
PRICE_PER_TOKEN = 0.00003  # Cost per token in USD

class TokenUsageCallbackHandler(BaseCallbackHandler):
    """
    Callback to track token usage.
    """
    def __init__(self):
        self.token_usage = {}

    def on_llm_end(self, response, **kwargs):
        if "usage" in response:
            self.token_usage = {
                "prompt_tokens": response["usage"].get("prompt_tokens", 0),
                "completion_tokens": response["usage"].get("completion_tokens", 0),
                "total_tokens": response["usage"].get("total_tokens", 0),
                "price_usd": response["usage"].get("total_tokens", 0) * PRICE_PER_TOKEN
            }
        else:
            print("Token usage information is missing from the response.")

# Load API key
def load_api_key():
    try:
        with open("secrets-master-llm.json") as f:
            secrets = json.load(f)
            return secrets.get(f"{LLM.lower()}_api_key")
    except (FileNotFoundError, KeyError) as e:
        print(f"Error loading API Key: {e}")
        return None

api_key = load_api_key()

# LLM configuration
callback_handler = TokenUsageCallbackHandler()
llm_LangChain = None
model_name = ""

if LLM == "Mistral":
    llm_LangChain = ChatMistralAI(
        model="mistral-large-latest",
        temperature=0.7,
        max_retries=2,
        mistral_api_key=api_key,
        callbacks=[callback_handler]
    )
    model_name = "mistral-large-latest"
elif LLM == "OpenAI":
    llm_LangChain = ChatOpenAI(
        model="gpt-4-turbo-2024-04-09",
        temperature=0.7,
        openai_api_key=api_key
    )
    model_name = "gpt-4-turbo-2024-04-09"
elif LLM == "Ollama":
    llm_LangChain = Ollama(
        model="llama3.2-32k",
        base_url="http://localhost:11434"
    )
    model_name = "llama3.2-32k"

# Vector DB and Embedding Configuration
def build_or_load_db(urls, db_path="vector_db"):
    """
    Build or load a vector database from URLs.
    """
    if os.path.exists(db_path):
        return FAISS.load_local(db_path, OpenAIEmbeddings())
    
    loader = UnstructuredURLLoader(urls=urls)
    documents = loader.load()

    # Split into smaller chunks for embedding
    splitter = RecursiveCharacterTextSplitter(chunk_size=1000, chunk_overlap=200)
    docs = splitter.split_documents(documents)

    # Embed and save the database
    embeddings = OpenAIEmbeddings(openai_api_key=api_key)
    db = FAISS.from_documents(docs, embeddings)
    db.save_local(db_path)

    return db

# Example URLs for context
urls = [
    "https://github.com/hepsycode/HEPSYCODE-Eclipse-GUI",
    "https://github.com/hepsycode/HEPSYCODE-AIDOaRt/tree/main/HEPSYCODE-Workbench"
]

vector_db = build_or_load_db(urls)

# Create conversational retrieval chain
retrieval_chain = ConversationalRetrievalChain.from_llm(
    llm=llm_LangChain,
    retriever=vector_db.as_retriever(search_type="similarity", search_kwargs={"k": 3})
)

# Retrieval-based QA Chain
retriever = vector_db.as_retriever(search_type="similarity", search_kwargs={"k": 3})
retrieval_chain = RetrievalQA(llm=llm_LangChain, retriever=retriever)

# File paths
metamodel_path = "../../01-02-03_MSE/HEPSYCODE/org.univaq.hepsy/model/hepsy.ecore"
example_model_path = "../../01-02-03_MSE/HEPSYCODE/HEPSYCODE-Models/D1/HEPSY/2024-02-14 18.30 13%20-%20FIRFIRGCD_HPV-representations.aird.hepsy"
example_xes_trace_path = "../../04_Trace_Parser/D1_HEPSYCODE/XES-MORGAN/2024-02-14 18.30 13%20-%20FIRFIRGCD_HPV-representations.aird.xes"

# Base paths
base_model_path = "../../01-02-03_MSE/HEPSYCODE/HEPSYCODE-Models/D1/HEPSY/"
base_output_dir = f"D2-HEPSYCODE/XES-MORGAN-LLM-{model_name.lower()}"

# Ensure output directory exists
os.makedirs(base_output_dir, exist_ok=True)

# Utility functions
def load_file_content(file_path):
    try:
        with open(file_path, 'r') as file:
            return file.read()
    except FileNotFoundError:
        print(f"Error: File {file_path} not found.")
        return ""

def save_to_file(file_path, content):
    with open(file_path, 'w') as file:
        file.write(content)

def save_metadata(file_path, metadata):
    with open(file_path, 'w') as file:
        json.dump(metadata, file, indent=4)

# Few-shot examples
'''
context = (
    "We have developed an Electronic Design Automation tool for designing embedded systems called HEPSYCODE. "
    "The tool is built upon Eclipse Ecore metamodels and utilizes Sirius features. "
    "The metamodel is designed to model algorithms, functionalities, and applications as a process network."
)
'''

examples = [
    {
        "system": (
            "You are an expert in embedded systems design with deep knowledge of using Electronic Design"
            "Automation (EDA) tools at the system level. Your role is to assist designers and engineers" 
            "in developing system-level models for complex embedded applications.\n"
            "You must provide precise and detailed solutions following best practices in embedded systems engineering.\n"
            "You are expected to explain and implement concepts such as system-level simulation,"
            "hardware/software co-design, design space exploration, and event-driven or cycle-accurate modeling.\n"
            "You are proficient with tools like SystemC, MATLAB/Simulink, and similar frameworks, and know"
            "how to use them to describe functional behavior, system architecture, and constraints.\n"
            "Your approach should focus on verification, validation, and design optimization, "
            "considering parameters like performance, power consumption, cost, and time-to-market.\n"
            "You must be clear and concise, capable of providing practical examples and guidance on "
            "solving specific problems or optimizing the design workflow.\n"
            "Ensure your communication is professional and tailored to a technical audience.\n"
        ),
        "user": (
            f"This is the HEPSY metamodel:\n\n{hepsycode_metamodel}\n\n"
            f"This is an example model based on HEPSYCODE metamodel:\n\n{hepsycode_example_model}\n\n"
            f"This is a XES trace file representing the modeling step:\n\n{xes_example_trace}"
        )
    }
]

# Prompt template
example_prompt_template = PromptTemplate(
    input_variables=["user", "system"],
    template="System: {system}\nUser: {user}\n"
)

# Full prompt
prompt = FewShotPromptTemplate(
    examples=examples,
    example_prompt=example_prompt_template,
    prefix=f"Context:\n{retrieved_context}\n\n",
    suffix=(
        "Generate an XES trace file for the following model:\n\n{hepsycode_model}\nOutput:\n"
        "event StructuredNode processes ADD\n"
        "event Process name SET\n"
        "event Port portExtension SET\n"
        "event Port portExtension SET\n"
        "event StructuredNode nChannels ADD\n"
        "event Channel nFrom SET\n"
        "event Channel nTo SET\n"
        "Write only the events, not other information in output."
    ),
    input_variables=["hepsycode_model"]
)

# Process all .hepsy files
i = 0
for file_name in os.listdir(base_model_path):
    i = i + 1
    if file_name.endswith(".hepsy") and i < 2:
        # File paths
        input_file_path = os.path.join(base_model_path, file_name)
        output_xes_path = os.path.join(base_output_dir, file_name.replace(".hepsy", ".xes"))
        metadata_path = os.path.join(base_output_dir, file_name.replace(".hepsy", ".json"))

        # Load example file contents
        hepsycode_metamodel = load_file_content(metamodel_path)
        hepsycode_example_model = load_file_content(example_model_path)
        xes_example_trace = load_file_content(example_xes_trace_path)
        # Load the model content
        hepsycode_model = load_file_content(input_file_path)
        
        # Retrieve relevant context
        # retrieved_context = retrieval_chain.run(hepsycode_model)
        retrieved_context = retrieval_chain.run({"question": hepsycode_model})
        print(retrieved_context)

        # Prompt Template with Retrieved Context
        #full_context = f"{retrieved_context}\n\n{hepsycode_model}"
        #response = llm_LangChain.generate(full_context)
        
        # Create and run the chain
        llm_chain = LLMChain(llm=llm_LangChain, prompt=prompt)
        start_time = time.time()
        response = llm_chain.run({"hepsycode_model": hepsycode_model})
        end_time = time.time()

        # Extract and save results
        xes_trace = response.strip()
        metadata = {
            "response_length": len(xes_trace),
            "temperature": 0.7,
            "usage": callback_handler.token_usage,
            "run_time": end_time - start_time,
            "model_name": model_name
        }

        save_to_file(output_xes_path, xes_trace)
        save_metadata(metadata_path, metadata)

        print(f"Processed: {file_name}")
        print(f"XES saved to: {output_xes_path}")
        print(f"Metadata saved to: {metadata_path}")


Error fetching or processing https://github.com/hepsycode/HEPSYCODE-Eclipse-GUI, exception: 
**********************************************************************
  Resource [93mpunkt[0m not found.
  Please use the NLTK Downloader to obtain the resource:

  [31m>>> import nltk
  >>> nltk.download('punkt')
  [0m
  For more information see: https://www.nltk.org/data.html

  Attempted to load [93mtokenizers/punkt/english.pickle[0m

  Searched in:
    - 'C:\\Users\\vitto/nltk_data'
    - 'C:\\Users\\vitto\\anaconda3\\nltk_data'
    - 'C:\\Users\\vitto\\anaconda3\\share\\nltk_data'
    - 'C:\\Users\\vitto\\anaconda3\\lib\\nltk_data'
    - 'C:\\Users\\vitto\\AppData\\Roaming\\nltk_data'
    - 'C:\\nltk_data'
    - 'D:\\nltk_data'
    - 'E:\\nltk_data'
    - ''
**********************************************************************

Error fetching or processing https://github.com/hepsycode/HEPSYCODE-AIDOaRt/tree/main/HEPSYCODE-Workbench, exception: 
*************************************

IndexError: list index out of range

# HEPSYCODE GAN

In [52]:
import os
import json
import time
from langchain.prompts import FewShotPromptTemplate, PromptTemplate
from langchain.chains import LLMChain
from langchain_openai import ChatOpenAI
from langchain_mistralai import ChatMistralAI
from langchain.llms import Ollama
from langchain.callbacks.base import BaseCallbackHandler
from sklearn.metrics.pairwise import cosine_similarity
from sklearn.feature_extraction.text import TfidfVectorizer
import numpy as np

# Model configuration and cost per token
LLM = "Mistral"  # Change to "OpenAI", "Ollama", or "Mistral"
PRICE_PER_TOKEN = 0.00003  # Cost per token in USD

class TokenUsageCallbackHandler(BaseCallbackHandler):
    """
    Callback to track token usage.
    """
    def __init__(self):
        self.token_usage = {}

    def on_llm_end(self, response, **kwargs):
        if "usage" in response:
            self.token_usage = {
                "prompt_tokens": response["usage"].get("prompt_tokens", 0),
                "completion_tokens": response["usage"].get("completion_tokens", 0),
                "total_tokens": response["usage"].get("total_tokens", 0),
                "price_usd": response["usage"].get("total_tokens", 0) * PRICE_PER_TOKEN
            }
        else:
            print("Token usage information is missing from the response.")

# Load API key
def load_api_key():
    try:
        with open("secrets-master-llm.json") as f:
            secrets = json.load(f)
            return secrets.get(f"{LLM.lower()}_api_key")
    except (FileNotFoundError, KeyError) as e:
        print(f"Error loading API Key: {e}")
        return None

api_key = load_api_key()

# LLM configuration
callback_handler = TokenUsageCallbackHandler()
llm_LangChain = None
model_name = ""

if LLM == "Mistral":
    llm_LangChain = ChatMistralAI(
        model="mistral-large-latest",
        temperature=0.7,
        max_retries=2,
        mistral_api_key=api_key,
        callbacks=[callback_handler]
    )
    model_name = "mistral-large-latest"
elif LLM == "OpenAI":
    llm_LangChain = ChatOpenAI(
        model="gpt-4-turbo-2024-04-09",
        temperature=0.7,
        openai_api_key=api_key
    )
    model_name = "gpt-4-turbo-2024-04-09"
elif LLM == "Ollama":
    llm_LangChain = Ollama(
        model="llama3.2-32k",
        base_url="http://localhost:11434"
    )
    model_name = "llama3.2-32k"
 
# File paths
metamodel_path = "../../01-02-03_MSE/HEPSYCODE/org.univaq.hepsy/model/hepsy.ecore"
example_model_path = "../../01-02-03_MSE/HEPSYCODE/HEPSYCODE-Models/D1/HEPSY/2024-02-14 18.30 13%20-%20FIRFIRGCD_HPV-representations.aird.hepsy"
example_xes_trace_path = "../../04_Trace_Parser/D1_HEPSYCODE/XES-MORGAN/2024-02-14 18.30 13%20-%20FIRFIRGCD_HPV-representations.aird.xes"
example_xes_ground_truth = "../../04_Trace_Parser/D1_HEPSYCODE/XES-MORGAN/"

# Base paths
base_model_path = "../../01-02-03_MSE/HEPSYCODE/HEPSYCODE-Models/D1/HEPSY/"
base_output_dir = f"D2-HEPSYCODE/XES-MORGAN-LLM-{model_name.lower()}"

# Ensure output directory exists
os.makedirs(base_output_dir, exist_ok=True)

# Utility functions
def load_file_content(file_path):
    try:
        with open(file_path, 'r') as file:
            return file.read()
    except FileNotFoundError:
        print(f"Error: File {file_path} not found.")
        return ""

def save_to_file(file_path, content):
    with open(file_path, 'w') as file:
        file.write(content)

def save_metadata(file_path, metadata):
    with open(file_path, 'w') as file:
        json.dump(metadata, file, indent=4)

def calculate_cosine_similarity(text1, text2):
    vectorizer = TfidfVectorizer().fit_transform([text1, text2])
    vectors = vectorizer.toarray()
    return cosine_similarity(vectors)[0, 1]

# Few-shot examples
context = (
    "We have developed an Electronic Design Automation tool for designing embedded systems called HEPSYCODE. "
    "The tool is built upon Eclipse Ecore metamodels and utilizes Sirius features. "
    "The metamodel is designed to model algorithms, functionalities, and applications as a process network."
)

examples = [
    {
        "system": (
            "You are an expert in embedded systems design with deep knowledge of using Electronic Design"
            "Automation (EDA) tools at the system level. Your role is to assist designers and engineers" 
            "in developing system-level models for complex embedded applications.\n"
            "You must provide precise and detailed solutions following best practices in embedded systems engineering.\n"
            "You are expected to explain and implement concepts such as system-level simulation,"
            "hardware/software co-design, design space exploration, and event-driven or cycle-accurate modeling.\n"
            "You are proficient with tools like SystemC, MATLAB/Simulink, and similar frameworks, and know"
            "how to use them to describe functional behavior, system architecture, and constraints.\n"
            "Your approach should focus on verification, validation, and design optimization, "
            "considering parameters like performance, power consumption, cost, and time-to-market.\n"
            "You must be clear and concise, capable of providing practical examples and guidance on "
            "solving specific problems or optimizing the design workflow.\n"
            "Ensure your communication is professional and tailored to a technical audience.\n"
        ),
        "user": (
            f"This is the HEPSY metamodel:\n\n{hepsycode_metamodel}\n\n"
            f"This is an example model based on HEPSYCODE metamodel:\n\n{hepsycode_example_model}\n\n"
            f"This is a XES trace file representing the modeling step:\n\n{xes_example_trace}"
        )
    }
]

# Prompt template
example_prompt_template = PromptTemplate(
    input_variables=["user", "system"],
    template="System: {system}\nUser: {user}\n"
)

# Full prompt
def create_prompt(context, examples, model_content):
    return FewShotPromptTemplate(
        examples=examples,
        example_prompt=example_prompt_template,
        prefix=f"Context:\n{context}\n\n",
        suffix=(
            "Generate an XES trace file for the following model:\n\n{hepsycode_model}\nOutput:\n"
            "event StructuredNode processes ADD\n"
            "event Process name SET\n"
            "event Port portExtension SET\n"
            "event StructuredNode nChannels ADD\n"
            "event Channel nFrom SET\n"
            "event Channel nTo SET\n"
            "Write only the events, not other information in output."
        ),
        input_variables=["hepsycode_model"]
    )

# GAN Feedback Loop
i = 0
for file_name in os.listdir(base_model_path):
    i = i + 1
    if file_name.endswith(".hepsy") and i < 2:
        input_file_path = os.path.join(base_model_path, file_name)
        output_xes_path = os.path.join(base_output_dir, file_name.replace(".hepsy", ".xes"))
        ground_truth_xes_path = os.path.join(example_xes_ground_truth, file_name.replace(".hepsy", ".xes"))
        print(ground_truth_xes_path)
        metadata_path = os.path.join(base_output_dir, file_name.replace(".hepsy", ".json"))

        # Load example file contents
        hepsycode_metamodel = load_file_content(metamodel_path)
        hepsycode_example_model = load_file_content(example_model_path)
        xes_ground_truth_trace = load_file_content(ground_truth_xes_path)

        # Load the model content
        hepsycode_model = load_file_content(input_file_path)

        # Initial prompt and chain setup
        prompt = create_prompt(context, examples, hepsycode_model)
        llm_chain = LLMChain(llm=llm_LangChain, prompt=prompt)

        max_iterations = 5
        best_cosine_similarity = 0
        best_output = ""

        for _ in range(max_iterations):
            start_time = time.time()
            response = llm_chain.run({"hepsycode_model": hepsycode_model})
            end_time = time.time()

            xes_trace = response.strip()
            cosine_sim = calculate_cosine_similarity(xes_ground_truth_trace, xes_trace)

            if cosine_sim > best_cosine_similarity:
                best_cosine_similarity = cosine_sim
                best_output = xes_trace

            # Adjust context or prompt dynamically based on cosine similarity
            context += f"\nThe similarity score is {cosine_sim:.4f}. Adjusting for better alignment."
            print(context)
            prompt = create_prompt(context, examples, hepsycode_model)
            print(prompt)

        # Save final results
        metadata = {
            "best_similarity": best_cosine_similarity,
            "temperature": 0.7,
            "usage": callback_handler.token_usage,
            "run_time": end_time - start_time,
            "model_name": model_name
        }

        save_to_file(output_xes_path, best_output)
        save_metadata(metadata_path, metadata)

        print(f"Processed: {file_name}")
        print(f"Best Cosine Similarity: {best_cosine_similarity}")
        print(f"XES saved to: {output_xes_path}")
        print(f"Metadata saved to: {metadata_path}")

../../04_Trace_Parser/D1_HEPSYCODE/XES-MORGAN/2024-02-13 16.44 00%20-%20DigitalCam%20Nominal-representations.aird.xes
Token usage information is missing from the response.
We have developed an Electronic Design Automation tool for designing embedded systems called HEPSYCODE. The tool is built upon Eclipse Ecore metamodels and utilizes Sirius features. The metamodel is designed to model algorithms, functionalities, and applications as a process network.
The similarity score is 0.9980. Adjusting for better alignment.
input_variables=['hepsycode_model'] input_types={} partial_variables={} examples=[{'system': 'You are an expert in embedded systems design with deep knowledge of using Electronic DesignAutomation (EDA) tools at the system level. Your role is to assist designers and engineersin developing system-level models for complex embedded applications.\nYou must provide precise and detailed solutions following best practices in embedded systems engineering.\nYou are expected to explain

Token usage information is missing from the response.
We have developed an Electronic Design Automation tool for designing embedded systems called HEPSYCODE. The tool is built upon Eclipse Ecore metamodels and utilizes Sirius features. The metamodel is designed to model algorithms, functionalities, and applications as a process network.
The similarity score is 0.9980. Adjusting for better alignment.
The similarity score is 0.9978. Adjusting for better alignment.
input_variables=['hepsycode_model'] input_types={} partial_variables={} examples=[{'system': 'You are an expert in embedded systems design with deep knowledge of using Electronic DesignAutomation (EDA) tools at the system level. Your role is to assist designers and engineersin developing system-level models for complex embedded applications.\nYou must provide precise and detailed solutions following best practices in embedded systems engineering.\nYou are expected to explain and implement concepts such as system-level simulatio

Token usage information is missing from the response.
We have developed an Electronic Design Automation tool for designing embedded systems called HEPSYCODE. The tool is built upon Eclipse Ecore metamodels and utilizes Sirius features. The metamodel is designed to model algorithms, functionalities, and applications as a process network.
The similarity score is 0.9980. Adjusting for better alignment.
The similarity score is 0.9978. Adjusting for better alignment.
The similarity score is 0.9982. Adjusting for better alignment.
input_variables=['hepsycode_model'] input_types={} partial_variables={} examples=[{'system': 'You are an expert in embedded systems design with deep knowledge of using Electronic DesignAutomation (EDA) tools at the system level. Your role is to assist designers and engineersin developing system-level models for complex embedded applications.\nYou must provide precise and detailed solutions following best practices in embedded systems engineering.\nYou are expected 

Token usage information is missing from the response.
We have developed an Electronic Design Automation tool for designing embedded systems called HEPSYCODE. The tool is built upon Eclipse Ecore metamodels and utilizes Sirius features. The metamodel is designed to model algorithms, functionalities, and applications as a process network.
The similarity score is 0.9980. Adjusting for better alignment.
The similarity score is 0.9978. Adjusting for better alignment.
The similarity score is 0.9982. Adjusting for better alignment.
The similarity score is 0.9980. Adjusting for better alignment.
input_variables=['hepsycode_model'] input_types={} partial_variables={} examples=[{'system': 'You are an expert in embedded systems design with deep knowledge of using Electronic DesignAutomation (EDA) tools at the system level. Your role is to assist designers and engineersin developing system-level models for complex embedded applications.\nYou must provide precise and detailed solutions following be

Token usage information is missing from the response.
We have developed an Electronic Design Automation tool for designing embedded systems called HEPSYCODE. The tool is built upon Eclipse Ecore metamodels and utilizes Sirius features. The metamodel is designed to model algorithms, functionalities, and applications as a process network.
The similarity score is 0.9980. Adjusting for better alignment.
The similarity score is 0.9978. Adjusting for better alignment.
The similarity score is 0.9982. Adjusting for better alignment.
The similarity score is 0.9980. Adjusting for better alignment.
The similarity score is 0.9974. Adjusting for better alignment.
input_variables=['hepsycode_model'] input_types={} partial_variables={} examples=[{'system': 'You are an expert in embedded systems design with deep knowledge of using Electronic DesignAutomation (EDA) tools at the system level. Your role is to assist designers and engineersin developing system-level models for complex embedded application