In [2]:
import pandas as pd
import numpy as np
import dspy

In [9]:

# Loading the created dataset

df = pd.read_csv('third_prompt_answers.csv')

df

Unnamed: 0,Category,Question,Expected Response,actual_response,Correct_Service
0,Information Retrieval,What is the definition of 'machine learning'?,GeneralKnowledger,GeneralKnowledger,1
1,Information Retrieval,Who is the CEO of Amazon?,GeneralKnowledger,GeneralKnowledger,1
2,Information Retrieval,What is the capital of Australia?,GeneralKnowledger,GeneralKnowledger,1
3,Information Retrieval,What is the definition of 'artificial intellig...,GeneralKnowledger,GeneralKnowledger,1
4,Information Retrieval,Who is the author of the book 'To Kill a Mocki...,GeneralKnowledger,GeneralKnowledger,1
5,Information Retrieval,What is the capital of France?,SearchDocs,GeneralKnowledger,0
6,Information Retrieval,What is the definition of 'natural language pr...,GeneralKnowledger,GeneralKnowledger,1
7,Information Retrieval,Who is the founder of Google?,GeneralKnowledger,GeneralKnowledger,1
8,Information Retrieval,What is the capital of Japan?,GeneralKnowledger,GeneralKnowledger,1
9,Information Retrieval,How do we calculate revenue?,SearchDocs,QueryDatabase,0


In [11]:

df['Correct_Service'] = [1 if x==y else 0 for x,y in zip(df['Expected Response'],df['actual_response'])]
# Calculating the previous accuracy
print('Accuracy: ',format(100*df['Correct_Service'].sum()/len(df), '0.2f')+'%')

Accuracy:  76.92%


In [11]:
# Recreating the Pydantic output parser

from pydantic import BaseModel, Field
from typing import List
class SelectedService(BaseModel):
    Action: str = Field(description="Name of the service.", default='none')
    ActionInput: str = Field(description="Action input require for successful completion of this action, output 'none' as default", default='none')
    Instructions: str = Field(description="Instructions of what needs to be done by this action.", default='none')
    DependsOn: List[str] = Field(description="List of names of actions this action depends on.", default='none')
    Further_Explanation: str = Field(description="Includes the explanation and extra details. Add all extra details here Remove'\_' with _", default='none')
    # @field_validator("ActionInput","Action","Instructions","Further_Explanation", mode='before')
    # def transform_id_to_str(cls, value) -> str:
    #         return str(value).replace('\_','')
    








# DSPy signature for the service_selection 
class GenerateService(dspy.Signature):
    """You are an experienced Data Analyst. You received the following user query: "{query}".
    Do not repeat the query in your answer

    Your task is to create an action list based on the available services provided. Please ensure that:

    The list of actions is minimized to only what is necessary to fulfill the user's query.
    Actions are not duplicated unless required for the workflow.
    Actions are combined where possible to streamline the process.
    Each action is linked to one of the available services.
    For each action, provide an action input and detailed instructions.


    Available Services:
    """

    available_services = dspy.InputField(description='All available services for the agent')
    query = dspy.InputField(description='User specified query')
    
    selected_service : SelectedService = dspy.OutputField(description='The details of the service selected, only the service selected')
    

In [12]:
# Basic test train split 
from sklearn.model_selection import train_test_split
# Split the data into training and testing sets
train_df, test_df = train_test_split(df, test_size=0.3)

# Print the shapes of the training and testing sets
print("Training set shape:", train_df.shape)
print("Testing set shape:", test_df.shape)

Training set shape: (29, 5)
Testing set shape: (13, 5)


In [13]:
train_df

Unnamed: 0,Category,Question,Expected Response,actual_response,Correct_Service
31,Edge Cases,Can you retrieve information on a fictional to...,No Service Available,SearchDocs,0
0,Information Retrieval,What is the definition of 'machine learning'?,SearchDocs,SearchDocs,1
7,Information Retrieval,Who is the founder of Google?,SearchDocs,fail service,0
21,SQL Generation,Write a SQL query to retrieve the average orde...,QueryDatabase,fail service,0
10,Information Retrieval,What other tables does table X depend on?,SearchDocs,fail service,0
23,SQL Generation,Create a SQL query to retrieve the top 3 produ...,QueryDatabase,QueryDatabase,1
14,SQL Generation,Write a SQL query to retrieve the top 5 custom...,QueryDatabase,fail service,0
20,SQL Generation,Create a SQL query to retrieve all orders for ...,QueryDatabase,QueryDatabase,1
29,General Knowledge,What is the definition of 'cloud computing'?,GeneralKnowledger,fail service,0
12,Information Retrieval,What tables have tax information?,SearchDocs,fail service,0


In [14]:
test_df

Unnamed: 0,Category,Question,Expected Response,actual_response,Correct_Service
17,SQL Generation,Write a SQL query to retrieve the total sales ...,QueryDatabase,QueryDatabase,1
35,Error Handling,Can you generate a SQL query with an invalid s...,invalid query,SearchDocs,0
5,Information Retrieval,What is the capital of France?,SearchDocs,QueryDatabase,0
3,Information Retrieval,What is the definition of 'artificial intellig...,SearchDocs,SearchDocs,1
38,Data Management,Refresh table X and all other tables that depe...,DataManager,QueryDatabase,0
27,General Knowledge,Who is the author of the book '1984'?,GeneralKnowledger,QueryDatabase,0
11,Information Retrieval,Where do we apply currency conversion?,SearchDocs,SearchDocs,1
32,Edge Cases,Can you generate a SQL query to retrieve infor...,No Service Available,SearchDocs,0
16,SQL Generation,Write a SQL query to retrieve the average orde...,QueryDatabase,fail service,0
2,Information Retrieval,What is the capital of Australia?,SearchDocs,QueryDatabase,0


In [22]:
# help(dspy)
# Defining the LLM, I tried different LLM's but Ultimately used Mistral, add your api_key

from groq import Groq

# lm = dspy.GROQ(model = 'llama3-70b-8192', api_key='', max_tokens=1200)

lm = dspy.Mistral(model='open-mistral-7b', api_key="", max_tokens=1200)
# lm = dspy.OpenAI(model='gpt-3.5', api_key ='', max_tokens=100)
dspy.settings.configure(lm=lm)



In [16]:
# help( dspy.GROQ)

# help(lm)
# lm.__dict__

In [17]:
# Creating the Module for the service selection
class Service(dspy.Module):
    def __init__(self):
        super().__init__()

        # self.retrieve = dspy.Retrieve(k=1)
        self.generate_answer = dspy.ChainOfThought(GenerateService)
    
    def forward(self, query, available_services):
        pred = self.generate_answer(query=query, available_services = available_services)
        return dspy.Prediction(selected_service = pred.selected_service)
        
# service = dspy.ChainOfThought(GenerateService)

In [18]:
# from dspy.datasets import HotPotQA


In [23]:
# Testing the untrained module on a query
q = 'What is the capital of Pakistan?'
available_services = """
name: QueryDatabase description: "Use this service to query data from the company database. Useful for straightforward query asking for a specific piece of data or information. It requests the current or recent numerical value. This service should be used to calculate KPIs, metrics, trends. Use for questions that typically expect an answer that provides a figure or amount. Use when users asks questions that require analysis or tabular data. Example questions: who are our top customers? What was our revenue last year? Show me the trend of our sales."

name: SearchDocs description: "The service to search the documentation for information. Should be used for when user is asking for an explanation for a description of the methodology or process used to determine something. Example questions: How do we calculate profit? Show me code related to revenue? Where do we process transactions?"

name: DataManager description: "This service is to run pre-configured data pipelines like DAGs ETL"

name: General_Knowledger description: "This service answer general knowledge querys"
"""
s = dspy.Predict(GenerateService)


print(s(query=q, available_services=available_services).selected_service)

Available Services: name: QueryDatabase description: "Use this service to query data from the company database.", name: General_Knowledger description: "This service answer general knowledge queries"
Query: What is the capital of Pakistan?
Selected Service: General_Knowledger

Action Input: Capital of Pakistan
Detailed Instructions: Utilize the General_Knowledger service to answer the user's query about the capital of Pakistan. This service is designed to provide general knowledge answers, making it the best choice for this specific question.


In [24]:
# Creating a metric for evaluation
# It adds 1 if the correct service is mentioned and divides by the score when the answer mentions other services.
def validate_service(example, pred, trace=None):
    s = ['SearchDocs', 'QueryDatabase', 'GeneralKnowledger',
       'DataManager']
    excluded_service = [x for x in s if x!= example.selected_service]
    score =0
    neg_score = 1
    try:
        w= pred.selected_service.split('Selected Service')[1]
        if example.selected_service in w:
            score+=1
        for e_s in excluded_service:
            if e_s in w:
                neg_score+=1
        
        
        # print(score)
        return score/neg_score
    except:
        w = pred.selected_service
        if example.selected_service in w:
            score+=1
        for e_s in excluded_service:
            if e_s in w:
                neg_score+=1
        
        
        # print(score)
        return score/neg_score
        

# Adding the training and testing set into the DSPy example object
trainset = [dspy.Example(query =q,available_services =available_services ,selected_service=a).with_inputs("query", "available_services") for q,a in zip(train_df['Question'],train_df['Expected Response'])]
testset = [dspy.Example(query =q,available_services =available_services ,selected_service=a).with_inputs("query", "available_services") for q,a in zip(test_df['Question'],test_df['Expected Response'])]

# validate_service(trainset[],trainset[0])


In [26]:
from dspy.teleprompt import *

# Set up the optimizer: we want to "bootstrap" (i.e., self-generate) 8-shot examples of your program's steps.
# The optimizer will repeat this 10 times (plus some initial attempts) before selecting its best attempt on the devset.
config = dict(max_bootstrapped_demos=4, max_labeled_demos=1, num_candidate_programs=2, num_threads=4)

kwargs = dict(num_threads=64, display_progress=True, display_table=0) # Used in Evaluate class in the optimization process

# One optimizer for the FewShot examples we should feed into the prompt
optimizer_d = BootstrapFewShotWithRandomSearch(metric=validate_service, **config)
# One for the optimal instructions
optimizer = COPRO(metric=validate_service, prompt_model=lm, init_temperature=0.2)

d = optimizer_d.compile(Service(),trainset=trainset, valset=testset)
c = optimizer.compile(Service(),trainset=trainset, eval_kwargs=kwargs)


Average Metric: 3.3333333333333335 / 13  (25.6): 100%|█████████████████████████████████| 13/13 [00:18<00:00,  1.42s/it]
Average Metric: 5.0 / 13  (38.5): 100%|████████████████████████████████████████████████| 13/13 [00:28<00:00,  2.18s/it]
 21%|█████████████████▏                                                                 | 6/29 [01:08<04:21, 11.36s/it]
Average Metric: 5.166666666666666 / 13  (39.7): 100%|██████████████████████████████████| 13/13 [00:54<00:00,  4.16s/it]
 28%|██████████████████████▉                                                            | 8/29 [01:13<03:12,  9.17s/it]
Average Metric: 3.0 / 13  (23.1): 100%|████████████████████████████████████████████████| 13/13 [00:14<00:00,  1.15s/it]
 14%|███████████▍                                                                       | 4/29 [00:09<01:02,  2.49s/it]
Average Metric: 4.5 / 13  (34.6): 100%|████████████████████████████████████████████████| 13/13 [00:19<00:00,  1.48s/it]





You are an instruction optimizer for large language models. I will give you a ``signature`` of fields (inputs and outputs) in English. Your task is to propose an instruction that will lead a good language model to perform the task well. Don't be afraid to be creative.

---

Follow the following format.

Basic Instruction: The initial instructions before optimization
Proposed Instruction: The improved instructions for the language model
Proposed Prefix For Output Field: The string at the end of the prompt, which will help the model start solving the task

---

Basic Instruction: You are an experienced Data Analyst. You received the following user query: "{query}". Do not repeat the query in your answer Your task is to create an action list based on the available services provided. Please ensure that: The list of actions is minimized to only what is necessary to fulfill the user's query. Actions are not duplicated unless required for the workflow. Actions are combined where possible t

Average Metric: 8.499999999999998 / 29  (29.3): 100%|██████████████████████████████████| 29/29 [00:21<00:00,  1.37it/s]





Given the user query: "{query}", your task is to create an efficient and streamlined action plan using the available services. Ensure that each action is unique, necessary, and linked to a specific service. Here's the proposed instruction:

Proposed Instruction: Analyze the user query "{query}" and devise an action plan using the following steps:

1. **Preprocess Data (NLP Service)**: Break down the query into individual components (keywords, entities, etc.) to better understand the user's intent.
   - Action Input: User Query
   - Instructions: Use Natural Language Processing (NLP) techniques to preprocess the user query and identify key elements.

2. **Identify Data Source (Data Catalog Service)**: Locate the relevant data sources required to answer the user's query.
   - Action Input: Preprocessed Query Components
   - Instructions: Search the data catalog for data sources that contain the necessary information to answer the user's query.

3. **Extract Data (ETL Service)**: Retri

Average Metric: 12.0 / 29  (41.4): 100%|███████████████████████████████████████████████| 29/29 [00:14<00:00,  1.96it/s]





Given the user query: "{query}", your task is to create a streamlined action plan using the available services. Here's the proposed instruction:

Proposed Instruction: Analyze the user query "{query}" and devise a concise action plan using the following format:

1. [Action Name]: [Service Name]
   - [Action Input]: [Detailed instructions]

Ensure the action plan is minimal, non-duplicative, and combines actions where possible.

---

Follow the following format.

Available Services: All available services for the agent

Query: User specified query

Reasoning: Let's think step by step in order to ${produce the selected_service}. We ...

Action Plan: The details of the service selected, only the service selected

---

Available Services: name: QueryDatabase description: "Use this service to query data from the company database. Useful for straightforward query asking for a specific piece of data or information. It requests the current or recent numerical value. This service should be u

Average Metric: 12.0 / 29  (41.4): 100%|███████████████████████████████████████████████| 29/29 [00:20<00:00,  1.40it/s]





Given the user query "{query}", analyze the request and identify the necessary steps to fulfill it using the available services. Minimize the action list, avoid duplication, and combine actions where possible to streamline the process. For each action, provide an action input and detailed instructions, ensuring each action is linked to one of the available services.

Proposed Instruction: Generate a concise action plan for the user query "{query}" using the available services, ensuring minimal steps, no duplication, and streamlined workflow.

---

Follow the following format.

Available Services: All available services for the agent

Query: User specified query

Reasoning: Let's think step by step in order to ${produce the selected_service}. We ...

Action Plan:

Example:

Basic Instruction: The user query is "What is the average temperature in New York City for the month of January?"

Proposed Instruction: Generate a concise action plan for the user query "What is the average tempe

Average Metric: 7.999999999999998 / 29  (27.6): 100%|██████████████████████████████████| 29/29 [00:18<00:00,  1.58it/s]





Given a user query, generate a streamlined action list using the available services. The action list should be concise, avoiding duplication, and combining actions where possible. Each action should be linked to a specific service, with an action input and detailed instructions.

Proposed Instruction: Given a user query "{query}", create a prioritized action list using the following services: {list of services}. Minimize the number of actions, avoid duplication, and combine actions where possible. Each action should have an action input and detailed instructions.

---

Follow the following format.

Available Services: All available services for the agent

Query: User specified query

Reasoning: Let's think step by step in order to ${produce the selected_service}. We ...

Action List:

Example:

Basic Instruction: You are an experienced Data Analyst. You received the following user query: "Find the average sales revenue for each product category in the last quarter."

Proposed Instru

Average Metric: 13.0 / 29  (44.8): 100%|███████████████████████████████████████████████| 29/29 [00:18<00:00,  1.55it/s]





Given the user query "{query}", your task is to create a streamlined action plan using the available services. Here's a proposed instruction:

Proposed Instruction: Generate a concise action list by prioritizing the necessary steps, eliminating duplicates, and combining actions where possible. Each action should be linked to a specific service, and for each action, provide an action input and detailed instructions.

---

Follow the following format.

Available Services: All available services for the agent

Query: User specified query

Reasoning: Let's think step by step in order to ${produce the selected_service}. We ...

Action List:

Example:

Basic Instruction: The user query is: "What is the average sales revenue for the last quarter for each product category?"

Proposed Instruction: Generate a concise action list by prioritizing the necessary steps, eliminating duplicates, and combining actions where possible. Each action should be linked to a specific service, and for each ac

Average Metric: 14.5 / 29  (50.0): 100%|███████████████████████████████████████████████| 29/29 [00:18<00:00,  1.54it/s]





Given the user query "{query}", your task is to create an efficient and streamlined action plan using the available services. Here's the proposed instruction:

Proposed Instruction: Analyze the user query, identify the necessary steps, and prioritize them to minimize actions, avoid duplication, and combine where possible. For each action, specify the service to use, provide an action input, and give detailed instructions.

---

Follow the following format.

Available Services: All available services for the agent

Query: User specified query

Reasoning: Let's think step by step in order to ${produce the selected_service}. We ...

Action Plan:

Example:

Basic Instruction: You are an experienced Data Analyst. You received the following user query: "Find the average sales revenue for each product category in the last quarter."

Proposed Instruction: Analyze the user query, identify the necessary steps, and prioritize them to minimize actions, avoid duplication, and combine where possi

Average Metric: 5.999999999999998 / 29  (20.7): 100%|██████████████████████████████████| 29/29 [00:20<00:00,  1.41it/s]





Given the user query: "{query}", as an efficient Data Analyst, your task is to create a streamlined action plan utilizing the available services. The action list will be concise, avoiding redundancy, and will be structured in a way that optimizes the workflow. Each action will be linked to a specific service and will include an action input and detailed instructions.

Proposed Instruction:
Answer the user query by creating a prioritized action list based on the available services. Minimize the number of actions, avoid duplication, and combine actions where possible. Each action should be linked to one of the available services, and include an action input and detailed instructions.

---

Follow the following format.

Available Services: All available services for the agent

Query: User specified query

Reasoning: Let's think step by step in order to ${produce the selected_service}. We ...

Action List:

1. Action Input: {Input for Action 1}
   Service: {Service for Action 1}
   Inst

Average Metric: 10.833333333333332 / 29  (37.4): 100%|█████████████████████████████████| 29/29 [00:20<00:00,  1.43it/s]





Given the user query "{query}", your task is to create a streamlined action plan using the available services. Here's the proposed instruction:

Proposed Instruction: "Generate a concise action plan for the given user query '{query}', utilizing the most efficient services. Ensure the plan is minimal, non-duplicative, and combines actions where possible. For each action, provide the service name, action input, and detailed instructions.

---

Follow the following format.

Available Services: All available services for the agent

Query: User specified query

Reasoning: Let's think step by step in order to ${produce the selected_service}. We ...

Action Plan:"

Example:

Basic Instruction: "You are an experienced Data Analyst. You received the following user query: 'Find the average sales for each product category in the last quarter.' Do not repeat the query in your answer."

Proposed Instruction: "Generate a concise action plan for the given user query 'Find the average sales for eac

Average Metric: 9.499999999999998 / 29  (32.8): 100%|██████████████████████████████████| 29/29 [00:20<00:00,  1.42it/s]





Given a user query, generate a streamlined action list using the available services. Ensure the list is concise, actions are unique, and they are combined where possible. Each action should be linked to a specific service, with an action input and detailed instructions.

Proposed Instruction: Analyze the given query "{query}" and generate a minimal action list using the following services: {list of services}. Each action will have an input and detailed instructions.

---

Follow the following format.

Available Services: All available services for the agent

Query: User specified query

Reasoning: Let's think step by step in order to ${produce the selected_service}. We ...

Action List:

Example:

Basic Instruction: You are an experienced Data Analyst. You received the following user query: "Find the average sales for each product category for the last quarter."

Proposed Instruction: Analyze the given query "Find the average sales for each product category for the last quarter" and

Average Metric: 10.666666666666668 / 29  (36.8): 100%|█████████████████████████████████| 29/29 [00:16<00:00,  1.71it/s]





You are an experienced Data Analyst. You received the following user query: "{query}".
    Do not repeat the query in your answer

    Your task is to create an action list based on the available services provided. Please ensure that:

    The list of actions is minimized to only what is necessary to fulfill the user's query.
    Actions are not duplicated unless required for the workflow.
    Actions are combined where possible to streamline the process.
    Each action is linked to one of the available services.
    For each action, provide an action input and detailed instructions.


    Available Services:

---

Follow the following format.

Available Services: All available services for the agent

Query: User specified query

Reasoning: Let's think step by step in order to ${produce the selected_service}. We ...

Selected Service: The details of the service selected, only the service selected

---

Available Services: name: QueryDatabase description: "Use this service to query 

  0%|                                                                                           | 0/29 [00:00<?, ?it/s]2024-06-08T11:15:12.471813Z [error    ] Error for example in dev set: 		 Status: 520. Message: <!DOCTYPE html>
<!--[if lt IE 7]> <html class="no-js ie6 oldie" lang="en-US"> <![endif]-->
<!--[if IE 7]>    <html class="no-js ie7 oldie" lang="en-US"> <![endif]-->
<!--[if IE 8]>    <html class="no-js ie8 oldie" lang="en-US"> <![endif]-->
<!--[if gt IE 8]><!--> <html class="no-js" lang="en-US"> <!--<![endif]-->
<head>


<title>api.mistral.ai | 520: Web server is returning an unknown error</title>
<meta charset="UTF-8" />
<meta http-equiv="Content-Type" content="text/html; charset=UTF-8" />
<meta http-equiv="X-UA-Compatible" content="IE=Edge" />
<meta name="robots" content="noindex, nofollow" />
<meta name="viewport" content="width=device-width,initial-scale=1" />
<link rel="stylesheet" id="cf_styles-css" href="/cdn-cgi/styles/main.css" />


</head>
<body>
<div id="cf-wrappe




Attempted Instructions:
[1] «Instruction #1: Given the user query: "{query}", as an efficient Data Analyst, your task is to create a streamlined action plan utilizing the available services. The action list will be concise, avoiding redundancy, and will be structured in a way that optimizes the workflow. Each action will be linked to a specific service and will include an action input and detailed instructions.

Proposed Instruction: Analyze the user query "{query}" and devise a concise, efficient, and optimized action plan using the available services. The action list will be structured in a way that minimizes actions, avoids duplication, and combines actions where possible. Each action will be linked to a specific service, will include an action input, and will provide detailed instructions.»
[2] «Prefix #1: Optimized Action Plan:

Example:

Basic Instruction: You are an experienced Data Analyst. You received the following user query: "Find the average sales revenue for each produ

Average Metric: 6.333333333333331 / 29  (21.8): 100%|██████████████████████████████████| 29/29 [00:37<00:00,  1.30s/it]





Attempted Instructions:
[1] «Instruction #1: Given the user query: "{query}", as an efficient Data Analyst, your task is to create a streamlined action plan utilizing the available services. The action list will be concise, avoiding redundancy, and will be structured in a way that optimizes the workflow. Each action will be linked to a specific service and will include an action input and detailed instructions.

Proposed Instruction: Given the user query "{query}", create a prioritized, concise, and efficient action plan using the available services. Minimize the number of actions, avoid duplication, and combine actions where possible. Each action should be linked to a specific service, with an action input and detailed instructions.»
[2] «Prefix #1: Action Plan:

Example:

Basic Instruction: You are an experienced Data Analyst. You received the following user query: "Find the average sales revenue for each product category in the last quarter."

Proposed Instruction: Given the user

Average Metric: 5.333333333333333 / 29  (18.4): 100%|██████████████████████████████████| 29/29 [00:22<00:00,  1.26it/s]





Attempted Instructions:
[1] «Instruction #1: Given the user query: "{query}", as an efficient Data Analyst, your task is to create a streamlined action plan utilizing the available services. The action list will be concise, avoiding redundancy, and will be structured in a way that optimizes the workflow. Each action will be linked to a specific service and will include an action input and detailed instructions.

Proposed Instruction: Given the user query "{query}", create a prioritized and optimized action plan using the available services. Minimize the number of actions, avoid duplication, and combine actions where possible. Each action should be linked to a specific service, with an action input and detailed instructions.

---

Follow the following format.

Available Services: All available services for the agent

Query: User specified query

Reasoning: Let's think step by step in order to ${produce the selected_service}. We ...

Action Plan:
1. Action: {Action Name}
   - Service:

Average Metric: 12.333333333333334 / 29  (42.5): 100%|█████████████████████████████████| 29/29 [00:22<00:00,  1.28it/s]





Attempted Instructions:
[1] «Instruction #1: Given the user query: "{query}", as an efficient Data Analyst, your task is to create a streamlined action plan utilizing the available services. The action list will be concise, avoiding redundancy, and will be structured in a way that optimizes the workflow. Each action will be linked to a specific service and will include an action input and detailed instructions.

Proposed Instruction: Given the user query "{query}", create a prioritized and optimized action plan using the available services. The action list will be concise, avoiding duplication, and combining actions where possible. Each action should be linked to a specific service, with an action input and detailed instructions.

---

Follow the following format.

Available Services: All available services for the agent

Query: User specified query

Reasoning: Let's think step by step in order to ${produce the selected_service}. We ...

Action Plan:
1. Action: {Action Name}
   - Se

Average Metric: 2.6666666666666665 / 29  (9.2): 100%|██████████████████████████████████| 29/29 [00:19<00:00,  1.47it/s]





Attempted Instructions:
[1] «Instruction #1: Given the user query: "{query}", as an efficient Data Analyst, your task is to create a streamlined action plan utilizing the available services. The action list will be concise, avoiding redundancy, and will be structured in a way that optimizes the workflow. Each action will be linked to a specific service and will include an action input and detailed instructions.

Proposed Instruction: Analyze the user query "{query}", and create a prioritized, concise, and non-redundant action plan using the available services. Each action should be linked to a specific service, with an action input and detailed instructions. The plan should be structured in a way that optimizes the workflow and minimizes the number of actions.»
[2] «Prefix #1: Action Plan:

Example:

Basic Instruction: You are an experienced Data Analyst. You received the following user query: "Find the average sales revenue for each product category in the last quarter."

Proposed 

Average Metric: 9.000000000000002 / 29  (31.0): 100%|██████████████████████████████████| 29/29 [00:33<00:00,  1.15s/it]





Attempted Instructions:
[1] «Instruction #1: Given the user query: "{query}", as an efficient Data Analyst, your task is to create a streamlined action plan utilizing the available services. The action list will be concise, avoiding redundancy, and will be structured in a way that optimizes the workflow. Each action will be linked to a specific service and will include an action input and detailed instructions.

Proposed Instruction: Given the user query "{query}", create a prioritized, concise, and efficient action plan using the available services. Minimize the number of actions, avoid duplication, and combine actions where possible. Each action should be linked to a specific service, with an action input and detailed instructions.

---

Follow the following format.

Available Services: All available services for the agent

Query: User specified query

Reasoning: Let's think step by step in order to ${produce the selected_service}. We ...

Action Plan:
1. Action: {Action Name}
   

Average Metric: 5.833333333333333 / 29  (20.1): 100%|██████████████████████████████████| 29/29 [00:25<00:00,  1.15it/s]





Attempted Instructions:
[1] «Instruction #1: Given the user query: "{query}", as an efficient Data Analyst, your task is to create a streamlined action plan utilizing the available services. The action list will be concise, avoiding redundancy, and will be structured in a way that optimizes the workflow. Each action will be linked to a specific service and will include an action input and detailed instructions.

Proposed Instruction: Given the user query "{query}", create a prioritized, concise, and non-redundant action plan using the available services. Each action should be linked to a specific service, have an action input, and detailed instructions. The plan should be structured in a way that optimizes the workflow and provides the most efficient solution.»
[2] «Prefix #1: Optimized Action Plan:

Example:

Basic Instruction: You are an experienced Data Analyst. You received the following user query: "Find the average sales revenue for each product category in the last quarter."


Average Metric: 2.0 / 29  (6.9): 100%|█████████████████████████████████████████████████| 29/29 [00:24<00:00,  1.18it/s]





Attempted Instructions:
[1] «Instruction #1: Given the user query: "{query}", as an efficient Data Analyst, your task is to create a streamlined action plan utilizing the available services. The action list will be concise, avoiding redundancy, and will be structured in a way that optimizes the workflow. Each action will be linked to a specific service and will include an action input and detailed instructions.

Proposed Instruction: Given the user query "{query}", create a prioritized and optimized action plan using the available services. Minimize the number of actions, avoid duplication, and combine actions where possible. Each action should be linked to a specific service, with an action input and detailed instructions.»
[2] «Prefix #1: Optimized Action Plan:

Example:

Basic Instruction: You are an experienced Data Analyst. You received the following user query: "Find the average sales revenue for each product category in the last quarter."

Proposed Instruction: Given the user

Average Metric: 7.166666666666664 / 29  (24.7): 100%|██████████████████████████████████| 29/29 [00:23<00:00,  1.23it/s]





Attempted Instructions:
[1] «Instruction #1: Given the user query: "{query}", as an efficient Data Analyst, your task is to create a streamlined action plan utilizing the available services. The action list will be concise, avoiding redundancy, and will be structured in a way that optimizes the workflow. Each action will be linked to a specific service and will include an action input and detailed instructions.

Proposed Instruction: Given the user query "{query}", create a prioritized, concise, and non-redundant action plan using the available services. Each action should be linked to a specific service, include an action input, and provide detailed instructions.

---

Follow the following format.

Available Services: All available services for the agent

Query: User specified query

Reasoning: Let's think step by step in order to ${produce the selected_service}. We ...

Action Plan:
1. Action: {Service Name}
   - Action Input: {Detailed instructions}

---
[2] «Instruction #2: Give

Average Metric: 9.333333333333332 / 29  (32.2): 100%|██████████████████████████████████| 29/29 [00:23<00:00,  1.22it/s]





Attempted Instructions:
[1] «Instruction #1: Given the user query: "{query}", as an efficient Data Analyst, your task is to create a streamlined action plan utilizing the available services. The action list will be concise, avoiding redundancy, and will be structured in a way that optimizes the workflow. Each action will be linked to a specific service and will include an action input and detailed instructions.

Proposed Instruction: Given the user query "{query}", create a prioritized and optimized action plan using the available services. Minimize the number of actions, avoid duplication, and combine actions where possible. Each action should be linked to one of the available services, with an action input and detailed instructions.

---

Follow the following format.

Available Services: All available services for the agent

Query: User specified query

Reasoning: Let's think step by step in order to ${produce the selected_service}. We ...

Action Plan:
1. Action: {Action Name}
  

Average Metric: 8.0 / 29  (27.6): 100%|████████████████████████████████████████████████| 29/29 [00:23<00:00,  1.22it/s]





Attempted Instructions:
[1] «Instruction #1: Given the user query: "{query}", as an efficient Data Analyst, your task is to create a streamlined action plan utilizing the available services. The action list will be concise, avoiding redundancy, and will be structured in a way that optimizes the workflow. Each action will be linked to a specific service and will include an action input and detailed instructions.

Proposed Instruction: Given the user query "{query}", create a prioritized, concise, and efficient action plan using the available services. Minimize the number of actions, avoid duplication, and combine actions where possible. Each action should be linked to a specific service, with an action input and detailed instructions.

---

Follow the following format.

Available Services: All available services for the agent

Query: User specified query

Reasoning: Let's think step by step in order to ${produce the selected_service}. We ...

Action Plan:
1. Action: {Action Name}
   

Average Metric: 7.333333333333334 / 29  (25.3): 100%|██████████████████████████████████| 29/29 [00:23<00:00,  1.23it/s]





Attempted Instructions:
[1] «Instruction #1: Given the user query: "{query}", as an efficient Data Analyst, your task is to create a streamlined action plan utilizing the available services. The action list will be concise, avoiding redundancy, and will be structured in a way that optimizes the workflow. Each action will be linked to a specific service and will include an action input and detailed instructions.

Proposed Instruction: Given the user query "{query}", create a prioritized, concise, and efficient action plan using the available services. Minimize the number of actions, avoid duplication, and combine actions where possible. Each action should be linked to a specific service, with an action input and detailed instructions.

---

Follow the following format.

Available Services: All available services for the agent

Query: User specified query

Reasoning: Let's think step by step in order to ${produce the selected_service}. We ...

Action Plan:
1. Action: {Action Name}
   

Average Metric: 7.0 / 29  (24.1): 100%|████████████████████████████████████████████████| 29/29 [00:23<00:00,  1.21it/s]





Attempted Instructions:
[1] «Instruction #1: Given the user query: "{query}", as an efficient Data Analyst, your task is to create a streamlined action plan utilizing the available services. The action list will be concise, avoiding redundancy, and will be structured in a way that optimizes the workflow. Each action will be linked to a specific service and will include an action input and detailed instructions.

Proposed Instruction: Given the user query "{query}", create a prioritized, concise, and efficient action plan using the available services. Minimize the number of actions, avoid duplication, and combine actions where possible. Each action should be linked to a specific service, with an action input and detailed instructions.

---

Follow the following format.

Available Services: All available services for the agent

Query: User specified query

Reasoning: Let's think step by step in order to ${produce the selected_service}. We ...

Action Plan:
1. Action: {Action Name}
   

Average Metric: 9.333333333333334 / 29  (32.2): 100%|██████████████████████████████████| 29/29 [00:28<00:00,  1.01it/s]





Attempted Instructions:
[1] «Instruction #1: Given the user query: "{query}", as an efficient Data Analyst, your task is to create a streamlined action plan utilizing the available services. The action list will be concise, avoiding redundancy, and will be structured in a way that optimizes the workflow. Each action will be linked to a specific service and will include an action input and detailed instructions.

Proposed Instruction: Given the user query "{query}", create a prioritized, concise, and efficient action plan using the available services. Minimize the number of actions, avoid duplication, and combine actions where possible. Each action should be linked to a specific service, with an action input and detailed instructions.

---

Follow the following format.

Available Services: All available services for the agent

Query: User specified query

Reasoning: Let's think step by step in order to ${produce the selected_service}. We ...

Action Plan:
1. Action: {Action Name}
   

  0%|                                                                                           | 0/29 [00:00<?, ?it/s]2024-06-08T11:23:45.816798Z [error    ] Error for example in dev set: 		 Status: 520. Message: <!DOCTYPE html>
<!--[if lt IE 7]> <html class="no-js ie6 oldie" lang="en-US"> <![endif]-->
<!--[if IE 7]>    <html class="no-js ie7 oldie" lang="en-US"> <![endif]-->
<!--[if IE 8]>    <html class="no-js ie8 oldie" lang="en-US"> <![endif]-->
<!--[if gt IE 8]><!--> <html class="no-js" lang="en-US"> <!--<![endif]-->
<head>


<title>api.mistral.ai | 520: Web server is returning an unknown error</title>
<meta charset="UTF-8" />
<meta http-equiv="Content-Type" content="text/html; charset=UTF-8" />
<meta http-equiv="X-UA-Compatible" content="IE=Edge" />
<meta name="robots" content="noindex, nofollow" />
<meta name="viewport" content="width=device-width,initial-scale=1" />
<link rel="stylesheet" id="cf_styles-css" href="/cdn-cgi/styles/main.css" />


</head>
<body>
<div id="cf-wrappe




Attempted Instructions:
[1] «Instruction #1: Given the user query: "{query}", as an efficient Data Analyst, your task is to create a streamlined action plan utilizing the available services. The action list will be concise, avoiding redundancy, and will be structured in a way that optimizes the workflow. Each action will be linked to a specific service and will include an action input and detailed instructions.

Proposed Instruction: Given the user query "{query}", create an optimized and efficient action plan using the available services. Minimize the number of actions, avoid duplication, and combine actions where possible. Each action should be linked to a specific service, with an action input and detailed instructions.

---

Follow the following format.

Available Services: All available services for the agent

Query: User specified query

Reasoning: Let's think step by step in order to ${produce the selected_service}. We ...

Optimized Action Plan:
1. Action: {Action Name}
   -

Average Metric: 8.0 / 29  (27.6): 100%|████████████████████████████████████████████████| 29/29 [00:24<00:00,  1.17it/s]





Attempted Instructions:
[1] «Instruction #1: Given the user query: "{query}", as an efficient Data Analyst, your task is to create a streamlined action plan utilizing the available services. The action list will be concise, avoiding redundancy, and will be structured in a way that optimizes the workflow. Each action will be linked to a specific service and will include an action input and detailed instructions.

Proposed Instruction: Given the user query "{query}", create a prioritized, concise, and efficient action plan using the available services. Minimize the number of actions, avoid duplication, and combine actions where possible. Each action should be linked to a specific service, with an action input and detailed instructions.

---

Follow the following format.

Available Services: All available services for the agent

Query: User specified query

Reasoning: Let's think step by step in order to ${produce the selected_service}. We ...

Action Plan:
1. Action: {Action Name}
   

Average Metric: 9.0 / 29  (31.0): 100%|████████████████████████████████████████████████| 29/29 [00:18<00:00,  1.55it/s]





Attempted Instructions:
[1] «Instruction #1: Given the user query: "{query}", as an efficient Data Analyst, your task is to create a streamlined action plan utilizing the available services. The action list will be concise, avoiding redundancy, and will be structured in a way that optimizes the workflow. Each action will be linked to a specific service and will include an action input and detailed instructions.

Proposed Instruction: Given the user query "{query}", create a prioritized, concise, and efficient action plan using the available services. Minimize the number of actions, avoid duplication, and combine actions where possible. Each action should be linked to a specific service, with an action input and detailed instructions.

---

Follow the following format.

Available Services: All available services for the agent

Query: User specified query

Reasoning: Let's think step by step in order to ${produce the selected_service}. We ...

Action Plan:
1. Action: {Action Name}
   

Average Metric: 10.0 / 29  (34.5): 100%|███████████████████████████████████████████████| 29/29 [00:22<00:00,  1.29it/s]





Attempted Instructions:
[1] «Instruction #1: Given the user query: "{query}", as an efficient Data Analyst, your task is to create a streamlined action plan utilizing the available services. The action list will be concise, avoiding redundancy, and will be structured in a way that optimizes the workflow. Each action will be linked to a specific service and will include an action input and detailed instructions.

Proposed Instruction: Given the user query "{query}", create a prioritized, concise, and efficient action plan using the available services. Minimize the number of actions, avoid duplication, and combine actions where possible. Each action should be linked to a specific service, with an action input and detailed instructions.

---

Follow the following format.

Available Services: All available services for the agent

Query: User specified query

Reasoning: Let's think step by step in order to ${produce the selected_service}. We ...

Action Plan:
1. Action: {Action Name}
   

Average Metric: 7.333333333333333 / 29  (25.3): 100%|██████████████████████████████████| 29/29 [00:25<00:00,  1.16it/s]





Attempted Instructions:
[1] «Instruction #1: Given the user query: "{query}", as an efficient Data Analyst, your task is to create a streamlined action plan utilizing the available services. The action list will be concise, avoiding redundancy, and will be structured in a way that optimizes the workflow. Each action will be linked to a specific service and will include an action input and detailed instructions.

Proposed Instruction: Given the user query "{query}", create a prioritized, concise, and efficient action plan using the available services. Minimize the number of actions, avoid duplication, and combine actions where possible. Each action should be linked to a specific service, with an action input and detailed instructions.

---

Follow the following format.

Available Services: All available services for the agent

Query: User specified query

Reasoning: Let's think step by step in order to ${produce the selected_service}. We ...

Action Plan:
1. Action: {Action Name}
   

Average Metric: 9.0 / 29  (31.0): 100%|████████████████████████████████████████████████| 29/29 [00:23<00:00,  1.23it/s]





Attempted Instructions:
[1] «Instruction #1: Given the user query: "{query}", as an efficient Data Analyst, your task is to create a streamlined action plan utilizing the available services. The action list will be concise, avoiding redundancy, and will be structured in a way that optimizes the workflow. Each action will be linked to a specific service and will include an action input and detailed instructions.

Proposed Instruction: Given the user query "{query}", create a prioritized, concise, and efficient action plan using the available services. Minimize the number of actions, avoid duplication, and combine actions where possible. Each action should be linked to a specific service, with an action input and detailed instructions.

---

Follow the following format.

Available Services: All available services for the agent

Query: User specified query

Reasoning: Let's think step by step in order to ${produce the selected_service}. We ...

Action Plan:
1. Action: {Action Name}
   

In [141]:
# help(COPRO.compile)
# trainset
# evaluate(c)

In [27]:
# print(lm.inspect_history(n=1))

# help(optimizer.compile)
# c
# c(available_services=testset[0].available_services, query=testset[0].query)
d(available_services=testset[7].available_services, query=testset[7].query)

Prediction(
    selected_service='Available Services: name: QueryDatabase description: "Use this service to query data from the company database. Useful for straightforward query asking for a specific piece of data or information. It requests the current or recent numerical value. This service should be used to calculate KPIs, metrics, trends. Use for questions that typically expect an answer that provides a figure or amount. Use when users asks questions that require analysis or tabular data. Example questions: who are our top customers? What was our revenue last year? Show me the trend of our sales." name: SearchDocs description: "The service to search the documentation for information. Should be used for when user is asking for an explanation for a description of the methodology or process used to determine something. Example questions: How do we calculate profit? Show me code related to revenue? Where do we process transactions?" name: DataManager description: "This service is to r

In [28]:
# This contains the FewShot examples we should add into the prompt
# Ignore the first phase as the optimizer for the instructions is in the next cell, only read after 
lm.inspect_history(n=1)





You are an experienced Data Analyst. You received the following user query: "{query}".
    Do not repeat the query in your answer

    Your task is to create an action list based on the available services provided. Please ensure that:

    The list of actions is minimized to only what is necessary to fulfill the user's query.
    Actions are not duplicated unless required for the workflow.
    Actions are combined where possible to streamline the process.
    Each action is linked to one of the available services.
    For each action, provide an action input and detailed instructions.


    Available Services:

---

Follow the following format.

Available Services: All available services for the agent

Query: User specified query

Reasoning: Let's think step by step in order to ${produce the selected_service}. We ...

Selected Service: The details of the service selected, only the service selected

---

Available Services: name: QueryDatabase description: "Use this service to query 

'\n\n\nYou are an experienced Data Analyst. You received the following user query: "{query}".\n    Do not repeat the query in your answer\n\n    Your task is to create an action list based on the available services provided. Please ensure that:\n\n    The list of actions is minimized to only what is necessary to fulfill the user\'s query.\n    Actions are not duplicated unless required for the workflow.\n    Actions are combined where possible to streamline the process.\n    Each action is linked to one of the available services.\n    For each action, provide an action input and detailed instructions.\n\n\n    Available Services:\n\n---\n\nFollow the following format.\n\nAvailable Services: All available services for the agent\n\nQuery: User specified query\n\nReasoning: Let\'s think step by step in order to ${produce the selected_service}. We ...\n\nSelected Service: The details of the service selected, only the service selected\n\n---\n\nAvailable Services: name: QueryDatabase descri

In [205]:
# help(lm.history)
print(lm.history[-1]['prompt'])

You are an experienced Data Analyst. You received the following user query: "{query}".
    Do not repeat the query in your answer

    Your task is to create an action list based on the available services provided. Please ensure that:

    The list of actions is minimized to only what is necessary to fulfill the user's query.
    Actions are not duplicated unless required for the workflow.
    Actions are combined where possible to streamline the process.
    Each action is linked to one of the available services.
    For each action, provide an action input and detailed instructions.


    Available Services:

---

Follow the following format.

Available Services: All available services for the agent

Query: User specified query

Reasoning: Let's think step by step in order to ${produce the selected_service}. We ...

Selected Service: The details of the service selected, only the service selected

---

Available Services: name: QueryDatabase description: "Use this service to query dat

In [218]:
# help(c)
# testset[0]

# c.parameters()
# help(c)
# c.__dict__.keys()

c.save()

generate_answer = ChainOfThought(GenerateService(available_services, query -> selected_service
    instructions='You are an experienced Data Analyst. You received the following user query: "{query}".\n    Do not repeat the query in your answer\n\n    Your task is to create an action list based on the available services provided. Please ensure that:\n\n    The list of actions is minimized to only what is necessary to fulfill the user\'s query.\n    Actions are not duplicated unless required for the workflow.\n    Actions are combined where possible to streamline the process.\n    Each action is linked to one of the available services.\n    For each action, provide an action input and detailed instructions.\n\n\n    Available Services:\n    '
    available_services = Field(annotation=str required=True description='All available services for the agent' json_schema_extra={'__dspy_field_type': 'input', 'desc': 'All available services for the agent', 'prefix': 'Available Services:'})
    que

In [222]:
# c.parameters()
# c.load()
# c.__dict__['candidate_programs']
c.save()
"""Analyze the user query "{query}" 
and determine the necessary steps to fulfill it. Organize these steps into a concise action list
, ensuring each action is unique, relevant, and linked to an available service. For each action
, provide an action input and detailed instructions.
"""
c.__dict__['candidate_programs'][0]['instruction']

[{'score': 44.83,
  'program': generate_answer = ChainOfThought(GenerateService(available_services, query -> selected_service
      instructions='You are an experienced Data Analyst. You received the following user query: "{query}".\n    Do not repeat the query in your answer\n\n    Your task is to create an action list based on the available services provided. Please ensure that:\n\n    The list of actions is minimized to only what is necessary to fulfill the user\'s query.\n    Actions are not duplicated unless required for the workflow.\n    Actions are combined where possible to streamline the process.\n    Each action is linked to one of the available services.\n    For each action, provide an action input and detailed instructions.\n\n\n    Available Services:\n    '
      available_services = Field(annotation=str required=True description='All available services for the agent' json_schema_extra={'__dspy_field_type': 'input', 'desc': 'All available services for the agent', 'prefi

In [162]:
# """Identify the key question(s) in the query. Break it down into simpler,
# significant subtasks which are precisely matched to the available services. 
# Work on far-reaching and reliably integrated solutions instead of individual actions. 
# Consolidate the consequential tasks whenever possible and assign each one of them to servicable modalities.
# For each liable subtask, 
# enumerate and denote predisposed inputs by explicit semantics. Use {query_variables} therein for placeholders referring to elements in the data."""

"""You are an experienced Data Analyst. You received the following user query: "{query}".
    Do not repeat the query in your answer

    Your task is to create an action list based on the available services provided. Please ensure that:

    The list of actions is minimized to only what is necessary to fulfill the user's query.
    Actions are not duplicated unless required for the workflow.
    Actions are combined where possible to streamline the process.
    Each action is linked to one of the available services.
    For each action, provide an action input and detailed instructions.""

In [215]:
# df['Expected Response'].unique()
print(c.__dict__['candidate_programs'][0]['prefix'])

Action List:

Example:

Basic Instruction: The user query is: "Find the average sales revenue for the last quarter for each product category."

Proposed Instruction: Analyze the user query "Find the average sales revenue for the last quarter for each product category." and determine the necessary steps to fulfill it. Organize these steps into a concise action list, ensuring each action is unique, relevant, and linked to an available service. For each action, provide an action input and detailed instructions.

Proposed Prefix For Output Field: Action List:

1. Action: Filter sales data for the last quarter
   Action Input: Start date (last quarter start date), End date (last quarter end date)
   Instructions: Use the 'Filter Data' service to filter the sales data for the last quarter.

2. Action: Group sales data by product category
   Action Input: Column name (product category column)
   Instructions: Use the 'Group Data' service to group the filtered sales data by product category.



In [187]:
c.save('basic_optimized')

In [199]:
# help(d)
d.named_parameters()
# ['candidate_programs']

[('generate_answer',
  ChainOfThought(GenerateService(available_services, query -> selected_service
      instructions='You are an experienced Data Analyst. You received the following user query: "{query}".\n    Do not repeat the query in your answer\n\n    Your task is to create an action list based on the available services provided. Please ensure that:\n\n    The list of actions is minimized to only what is necessary to fulfill the user\'s query.\n    Actions are not duplicated unless required for the workflow.\n    Actions are combined where possible to streamline the process.\n    Each action is linked to one of the available services.\n    For each action, provide an action input and detailed instructions.\n\n\n    Available Services:\n    '
      available_services = Field(annotation=str required=True description='All available services for the agent' json_schema_extra={'__dspy_field_type': 'input', 'desc': 'All available services for the agent', 'prefix': 'Available Services:'}

In [207]:
d.__dict__['candidate_programs']

[(38.46,
  [0.3333333333333333,
   0.0,
   1.0,
   1.0,
   1.0,
   0.0,
   0.3333333333333333,
   0.3333333333333333,
   0.0,
   0.3333333333333333,
   0.0,
   0.3333333333333333,
   0.3333333333333333],
  -1,
  generate_answer = ChainOfThought(GenerateService(available_services, query -> selected_service
      instructions='You are an experienced Data Analyst. You received the following user query: "{query}".\n    Do not repeat the query in your answer\n\n    Your task is to create an action list based on the available services provided. Please ensure that:\n\n    The list of actions is minimized to only what is necessary to fulfill the user\'s query.\n    Actions are not duplicated unless required for the workflow.\n    Actions are combined where possible to streamline the process.\n    Each action is linked to one of the available services.\n    For each action, provide an action input and detailed instructions.\n\n\n    Available Services:\n    '
      available_services = Field(an

In [None]:
d