In [2]:
!pip install -qU "semantic-router[local]"


[1m[[0m[34;49mnotice[0m[1;39;49m][0m[39;49m A new release of pip is available: [0m[31;49m23.1.2[0m[39;49m -> [0m[32;49m24.2[0m
[1m[[0m[34;49mnotice[0m[1;39;49m][0m[39;49m To update, run: [0m[32;49mpip install --upgrade pip[0m


In [17]:
GROQ_API_KEY = "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"

In [4]:
from llama_index.agent.openai import OpenAIAgent
from llama_index.llms.groq import Groq
from llama_index.core.tools import FunctionTool
from semantic_router import Route
from semantic_router.encoders import HuggingFaceEncoder
from semantic_router.layer import RouteLayer
from IPython.display import Markdown
from ema_workbench import load_results
from sklearn.preprocessing import StandardScaler
from sklearn.cluster import AgglomerativeClustering
from sklearn.decomposition import PCA
from sklearn.manifold import MDS
from sklearn import metrics
import pandas as pd
import matplotlib.pyplot as plt
import numpy as np
import scipy.cluster.hierarchy as sch
from sklearn_extra.cluster import KMedoids
import pickle
import seaborn as sns



In [12]:
class DecompositionSpace:
    def __init__(self, project_path, project_name):
        model_filename = f"{project_path}/{project_name}_128scenarios_nopolicies_sobol" #.tar.gz'
        experiments_df, outcomes = load_results(model_filename+ '.tar.gz')
        self.outcomes = pd.DataFrame(outcomes)
        self.experiments = experiments_df
        with open(model_filename+'_model.pkl', 'rb') as input:
            self.uncertainties_problem = pickle.load(input)
        with open(model_filename+'_partitions.pkl', 'rb') as input:
            self.partitions = pickle.load(input)
        similarity_filename = f"{project_path}/{project_name}_omega_scores.csv"
        self.partitions_distance = 1 - pd.read_csv(similarity_filename, index_col=0)
        stable_solutions_filename = f"{project_path}/{project_name}_stable_solutions.pkl"
        with open(stable_solutions_filename, 'rb') as f:
             self.stable_solutions = pickle.load(f)
             self.other_labels = self.stable_solutions.keys()
        mds = MDS(dissimilarity='precomputed', random_state=0)
        self.embeddings_2d_partitions = mds.fit_transform(self.partitions_distance)
        partition_labels_2d, _, silhouette = self.run_agglomerative(self.embeddings_2d_partitions, k=5, threshold=None, 
                                                       show_dendogram=True, normalize=True, n_pca=2)
        self.partition_labels = partition_labels_2d

    def run_agglomerative(self, df, k, threshold=200, n_pca=None, normalize=False, show_dendogram=False, archstructure=None):    
      if normalize:
        sample = StandardScaler().fit_transform(df)
      else:
        sample = df.values
    
      if n_pca is not None:
        pca = PCA(n_components=n_pca)
        sample_pca = sample
        model = AgglomerativeClustering(n_clusters=k, affinity='euclidean', linkage='ward',
                                        connectivity=archstructure, distance_threshold=threshold)
        model.fit(sample_pca)
        X = sample_pca
      else:
        model = AgglomerativeClustering(n_clusters=k, affinity='precomputed', linkage='single',
                                      connectivity=archstructure, distance_threshold=threshold)
        model.fit(sample)
      labels = model.labels_
      fixed_labels = np.where(model.labels_ < 0, 0, model.labels_)
      classes = set(fixed_labels)
      if len(classes) > 1:
        if n_pca is not None:
          silhouette = metrics.silhouette_score(sample_pca, fixed_labels)
        else:
          silhouette = metrics.silhouette_score(sample, fixed_labels)
      else:
        silhouette = 0.0    
      return fixed_labels, model, silhouette

    def get_decompositions_by_metric(self, metric: str, k: int, asc: bool) -> list:
        return self.outcomes.sort_values(by=[metric], ascending=[asc]).head(k)

    def get_xy_coordinates(self, labels, embeddings_2d, distance_df):
        xy_coordinates = []
        for lb in labels:
            idx = list(distance_df.columns).index(lb)
            xy_coordinates.append(embeddings_2d[idx])
        return xy_coordinates

    def get_decomposition_space(self, labels: [] = []) -> plt.plot: 
        fig = plt.figure(figsize=(8,6))
        df = pd.DataFrame(self.embeddings_2d_partitions, columns=['x', 'y'])
        df['cluster'] = self.partition_labels 
        ax = sns.scatterplot(data=df, x="x", y="y", hue="cluster", palette="tab10", alpha=0.3, legend='full', sizes=(20, 200))
        ax.set(xlabel=None)
        ax.set(ylabel=None)
        if len(labels) > 0:
            medoids = np.array(self.get_xy_coordinates(labels, self.embeddings_2d_partitions, self.partitions_distance))
            ax.plot(medoids[:,0], medoids[:,1], 'X', markersize=9, alpha=0.7, color='black')
            for idx, lb in enumerate(labels):
                    ax.annotate(lb, (medoids[idx,0], medoids[idx,1]))
            
        plt.grid(False)
        return plt

    #TODO translate/map quality attributes to metrics

In [None]:
class SemanticLayer:
    SYSTEM_PROMPT = """Your are an expert software architect that assists users to explore and understand a de.
        You have a deep understanding of monolith to microservices migration and microservices quality metrics.
        Your role is to help users to understand the decomposition space to pick the most suitable microservices decomposition according to the user need.
        """

    def __init__(self, decomposition_space=None) -> None:
        self.llm = Groq(model="llama3-8b-8192", api_key=GROQ_API_KEY, base_url="https://api.groq.com/openai/v1")
        self.agent = None
        self.intent_detector = None
        self.decomposition_space = decomposition_space


    def show_decomposition_space(self) -> plt.plot:
        """ Gets the decomposition space (plot)

        Returns:
            plt.plot: The plot of the decomposition space
        """
        return self.decomposition_space.get_decomposition_space().show()

    def get_decomposition_by_metric(self, k: int, metric: str, asc: bool) -> pd.DataFrame:
        """ Gets the K decompositions matching a metric condition

          Args:
            k (int): The number of decompositions to retrieve.
            metric (str): The metric to match the decompositions against.
            asc (bool): Whether to obtain the decompositions that match a metric higher or lower
    
        Returns:
            pd.DataFrame: The K decompositions ordered by asc param against the metric passed as parameter.
        """
        return self.decomposition_space.get_decompositions_by_metric(metric, k, asc)

    def show_decomposition_in_space(self, k: int, metric: str, asc: bool) -> plt.plot:
        """ Show in the decomposition space (plot) the desired K decompositions matching a metric condition

        Args:
            k (int): The number of decompositions to show.
            metric (str): The metric to match the decompositions against.
            asc (bool): Whether to show the decompositions that match a metric higher or lower
    

        Returns:
            plt.plot: The plot of the decomposition space with the decompositions found
        """
        decompositions = self.decomposition_space.get_decompositions_by_metric(metric, k, asc)
        indexes = decompositions.index.values
        labels = []
        for index in indexes:
            value = self.decomposition_space.experiments.iloc[index]
            print(value)
            labels.append(f"resolution_{value['resolution']}_k_{value['k']}")
        return self.decomposition_space.get_decomposition_space(labels).show()

   
    def _get_tools(self):
        return [ # All the functions that the agent can execute
            FunctionTool.from_defaults(fn=self.show_decomposition_space, return_direct=True),
            FunctionTool.from_defaults(fn=self.get_decomposition_by_metric, return_direct=True),
            FunctionTool.from_defaults(fn=self.show_decomposition_in_space, return_direct=True),
        ]

    def _configure_routes():
        return [
            Route(
                name="show_decomposition_space",
                utterances=[
                    "Which decompositions are generated?",
                    "Show me the decomposition space graphically",
                    "Get all decompositions graphically",
                    "Show me all decompositions",
                    "Show the decomposition space",
                    "Show the decomposition space graphically"
                ],
                description="Show the decomposition space graphically."
            ),
            Route(
                name="get_decomposition_by_metric",
                utterances=[
                    "Get the X decompositions with less Y",
                    "Get me X decompositions with more Y",
                    "Which is the decomposition with more X?",
                    "Which is the decomposition with less X?",
                    "Which is the decomposition of highest X?",
                    "Which is the decomposition of lowest X?",
                ],
                description="Get the K decompositions that match a preferring metric."
            ),
            Route(
                name="show_decomposition_in_space",
                utterances=[
                    "Show me the decomposition with more X",
                    "Get a plot of the decomposition X",
                    "Show me the decomposition with more X graphically",
                    "Show me the decomposition with lowest X graphically",
                    "Show me the decomposition with highest X graphically",
                    "Show me the decomposition of lowest X graphically",
                    "Show me the decomposition of highest X graphically",
                ],
                description="Show the K decompositions that match a preferring metric in the decomposition space."
            )
        ]

    def clear_memory(self):
        self.agent.reset()

    def chat(self, question, return_intent=False):
        if self.agent is None: 
            tools = self._get_tools()
            self.agent = OpenAIAgent.from_tools(tools, llm=self.llm, system_prompt=SemanticLayer.SYSTEM_PROMPT, verbose=True)
            encoder = HuggingFaceEncoder()
            self.intent_detector = RouteLayer(encoder=encoder, routes=SemanticLayer._configure_routes(), llm=self.llm)

        intent  = self.intent_detector(question)
        print("Intent detected:", intent.name)

        if intent.name is None:
            msg = "I'm sorry, I did not understand your question or I'm no able to answer it. Please try again..."
            if return_intent:
                return None
            return display(Markdown(f"<b>{msg}</b>"))

        function_name = "\nTry to execute tool "+intent.name if (intent.name is not None) and (intent.name != 'misc') else ""
        response = self.agent.chat(question+function_name)

        if response.sources is None or len(response.sources) == 0:
            return display(Markdown(f"<b>{response.response}</b>"))

        obj = response.sources[0].raw_output

        show_op = getattr(obj, "show", None)
        if callable(show_op):
            msg =  "This is a graphical representation of the results for your question."
            display(Markdown(f"<b>{msg}</b>"))
            show_op()
            return None
        if return_intent:
            return intent.name, response.sources[0].raw_input.get("kwargs"), response
        return obj

In [None]:
decomposition_space = DecompositionSpace("../jpetstore", "jpetstore")

In [None]:
layer = SemanticLayer(decomposition_space)

In [None]:
layer.chat("Get the 10 decompositions with higher density")

In [None]:
layer.chat("Get the decomposition with lowest ned")

In [None]:
layer.chat("Which is the decomposition of highest modularity?") 

In [None]:
layer.chat("Show me the decomposition with highest density graphically") 

In [None]:
layer.chat("Show me the 10 decompositions with lowest modularity graphically") 

In [None]:
print(layer.chat("Show me the 10 decompositions with lowest modularity graphically"))

# Evaluation

In [None]:
from llama_index.core.evaluation import DatasetGenerator, RelevancyEvaluator
from llama_index.core import SimpleDirectoryReader, VectorStoreIndex, Response
from llama_index.core import Settings
from llama_index.embeddings.huggingface import HuggingFaceEmbedding
from llama_index.core.tools import FunctionTool
from llama_index.core.schema import Document
from llama_index.core.prompts.base import BasePromptTemplate, PromptTemplate

In [None]:
Settings.llm = Groq(model="llama3-8b-8192", api_key="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", base_url="https://api.groq.com/openai/v1")
Settings.embed_model = HuggingFaceEmbedding(
    model_name="BAAI/bge-small-en-v1.5"
)
import nest_asyncio

nest_asyncio.apply()

In [None]:
DEFAULT_TEXT_QA_PROMPT = """\
Context information is below.
The only available metrics are ned, density, and modularity
---------------------
{context_str}
---------------------
Based on the following function documentation, generate natural questions that someone might ask and this function answer. The answer should always be the function name along its arguments. The questions should relate to its description and expected outcomes in a conversational manner, also to the example questions. The documentation is as follows
{query_str}
"""
text_question_template = PromptTemplate(DEFAULT_TEXT_QA_PROMPT)
text_qa_template = DEFAULT_TEXT_QA_PROMPT
question_gen_query = (f"You are a Teacher/Professor. Your task is to setup \
                        5 questions for an upcoming \
                        quiz/examination. The questions should be diverse in nature \
                            across the document. Restrict the questions to the \
                                context information provided. Tip: use the example questions as base" )
    
documentation = [Document(text="""
        Function: show_decomposition_in_space
        Example questions:  "Show me the decomposition with more X",
                    "Get a plot of the decomposition X",
                    "Show me the decomposition with more X graphically",
                    "Show me the decomposition with lowest X graphically",
                    "Show me the decomposition with highest X graphically",
                    "Show me the decomposition of lowest X graphically",
                    "Show me the decomposition of highest X graphically",
        Description: Show in the decomposition space (plot) the desired K decompositions matching a metric condition

        Args:
            k (int): The number of decompositions to show.
            metric (str): The metric to match the decompositions against.
            asc (bool): Whether to show the decompositions that match a metric higher or lower
    

        Returns:
            plt.plot: The plot of the decomposition space with the decompositions found"""),
                Document(text="""
                Function: get_decomposition_by_metric
                Example questions:  "Get the X decompositions with less Y",
                    "Get me X decompositions with more Y",
                    "Which is the decomposition with more X?",
                    "Which is the decomposition with less X?",
                    "Which is the decomposition of highest X?",
                    "Which is the decomposition of lowest X?",
                Description: Gets the K decompositions matching a metric condition

          Args:
            k (int): The number of decompositions to retrieve.
            metric (str): The metric to match the decompositions against.
            asc (bool): Whether to obtain the decompositions that match a metric higher or lower
    
        Returns:
            pd.DataFrame: The K decompositions ordered by asc param against the metric passed as parameter.
            """),
                Document(text="""
                Function: show_decomposition_space
                Example questions:  "Which decompositions are generated?",
                    "Show me the decomposition space graphically",
                    "Get all decompositions graphically",
                    "Show me all decompositions",
                    "Show the decomposition space",
                    "Show the decomposition space graphically"
                Description: Gets the decomposition space (plot)

        Returns:
            plt.plot: The plot of the decomposition space
            """)]
data_generator = DatasetGenerator.from_documents(documentation, text_question_template=text_question_template, text_qa_template=text_qa_template, question_gen_query=question_gen_query)
eval_questions = data_generator.generate_questions_from_nodes()
eval_questions

In [None]:
evaluator = RelevancyEvaluator()

In [None]:
vector_index = VectorStoreIndex.from_documents(documentation)

In [None]:
def display_eval_df(query: str, response: Response, eval_result: str) -> None:
    eval_df = pd.DataFrame(
        {
            "Query": [query],
            "Response": [str(response)],
            "Source": [(
                response.source_nodes[0].node.get_content()[:1000] + "..."
            )],
            "Evaluation Result": [eval_result.passing],
        },
        index=[0],
    )
    eval_df = eval_df.style.set_properties(
        **{
            "inline-size": "600px",
            "overflow-wrap": "break-word",
        },
        subset=["Response", "Source"]
    )
    display(eval_df)


In [None]:
response_vector, args, response = layer.chat(eval_questions[3], return_intent = True)
response2 = f"{response_vector}("
response2 += ', '.join(f"{key}={value!r}" for key, value in args.items())
response2 += ")"
response

In [None]:
eval_result = evaluator.evaluate_response(
    query=eval_questions[3], response=response
)

In [None]:
eval_result

In [None]:
eval_result = evaluator.evaluate_response(
    query=eval_questions[4], response=response
)

In [None]:

eval_questions[4]