In [1]:
from typing import Dict, List, Type, Union
from typing import List
import json
import os
from openai import OpenAI
from dotenv import load_dotenv

import sglang as sgl
from pydantic import BaseModel, conlist
from typing import List
from sglang.srt.constrained import build_regex_from_object

import ast

load_dotenv("env_variable.env")
client = OpenAI()

In [2]:
class ConceptsList(BaseModel):
    #the list name has an important effect on the response! choose it wisely!
    Concepts_List: conlist(str, max_length=10)


@sgl.function
def pydantic_gen_ex(s, list_element):
    s += list_element
    s += sgl.gen(
        "",
        max_tokens=1024,
        temperature=0,
        regex=build_regex_from_object(ConceptsList),  # Requires pydantic >= 2.0
    )

sgl.set_default_backend(sgl.RuntimeEndpoint("http://localhost:30000"))

In [3]:
def create_mistral_total_prompt(system_prompt, input):
    final_input = f"""text:{input}
Concepts_List:"""    
    final_prompt = system_prompt + "\n" + final_input

    return final_prompt

In [4]:
def local_llm_call(input):
    state = pydantic_gen_ex.run(input)
    return str(json.loads(state.text()[len(input):])["Concepts_List"])

In [5]:
from dataclasses import dataclass

@dataclass
class PromptsClass:
    """Class for keeping track of an item in inventory."""
    mistral_system_prompts: List[str]
    target_input: List[str]
    target_output: List[str]
    mistral_responses: List[List[str]]

In [6]:
input_evaluation_1 = "She said: 'today was supposed to be a day of celebration and joy in Kansas, instead it is another day where America has experience senselense gun violence' in response to what happened in Kansas near coca-cola branch"

output_evaluation_1 = ["Gun violence", "Coca-cola", "Kansas city"]

input_evaluation_2 = "I would say the best place to go for your honeymoon is Paris, but some say it's overrated"

output_evaluation_2 = ["Paris", "Honeymoon"]

In [7]:
promptTracker = PromptsClass([],[],[],[])
promptTracker.target_input.append(input_evaluation_1)
promptTracker.target_input.append(input_evaluation_2)
promptTracker.target_output.append(str(output_evaluation_1))
promptTracker.target_output.append(str(output_evaluation_2))

In [8]:
promptTracker.target_output

["['Gun violence', 'Coca-cola', 'Kansas city']", "['Paris', 'Honeymoon']"]

In [9]:
promptTracker.target_input

["She said: 'today was supposed to be a day of celebration and joy in Kansas, instead it is another day where America has experience senselense gun violence' in response to what happened in Kansas near coca-cola branch",
 "I would say the best place to go for your honeymoon is Paris, but some say it's overrated"]

### Creating initial mistral input output

In [10]:
init_sys_prompt = """You are an AI designed to find a LIMITED list of GENERAL concepts associated with a given piece of text. The list size should NOT exceed 10. You Must use standardized words.

###
Here are some examples:


Text: "israel supporters attacks female palestine activist"
Concepts_List: ["Hate speech", "Palestine"]
###
"""

In [11]:
promptTracker.mistral_system_prompts.append(init_sys_prompt)

In [12]:
promptTracker.mistral_responses.append([])
for i in range(len(promptTracker.target_input)):
    initial_mistral_inputs = create_mistral_total_prompt(promptTracker.mistral_system_prompts[-1], promptTracker.target_input[i])
    output_from_mistral = local_llm_call(initial_mistral_inputs)
    promptTracker.mistral_responses[-1].append(output_from_mistral)

### Using OpenAI GPT4 for refinement of system prompt 

In [17]:
openai_sys_prompt = """You are an AI assistant who is expert in creating promtps for LLMs. you job is to modify and enhance a prompt for a 7b mistral instruct model. The mistral model is supposed to receive an input text, and return a list of strings, entities, brand names, etc in that input text. This LLM is going to be used for .... The prompt to the mistral model can include some examples that lead the model's behavior. Mistral model performs constrained decoding, meaning that it only generates a list of strings.

A number of experiments have been done on different system prompts for mistral and the output. Those experiments which include tested system prompt, tested INPUTs TO MISTRAL, and the resulting outputs from Mistral are provided to you. Your job is to observe the experiments, and come up with a better system prompt for Mistral to achieve the expected output. you can provide some examples, or remove some examples in your suggested system prompt. Remember that total number of examples should be limited to 3, because it adds extra computation and we can't afford it. Note that the examples given in the system prompt of mistral should be enclosed by ### ###. Pay attention to the fact that, you are NOT allowed to use EVALUATION INPUT TO MISTRAL texts in your examples for your suggested mistral system prompt.
"""

In [18]:
promptTracker.target_output

["['Gun violence', 'Coca-cola', 'Kansas city']", "['Paris', 'Honeymoon']"]

In [19]:
def create_openai_user_prompt(prompttracker):
    total_prompt = ""

    for i in range(len(prompttracker.mistral_responses)):
        total_prompt += f"""\n\n\n\n
Experiment {i}
Mistral System Prompt:
{prompttracker.mistral_system_prompts[i]}
"""
        for j in range(len(prompttracker.target_input)):
            total_prompt += f"""\n\n

EVALUATION INPUT {j} TO MISTRAL:
{prompttracker.target_input[j]}
EVALUATION OUTPUT {j} FROM MISTRAL:
{prompttracker.mistral_responses[i][j]}
what was EXPECTED to be EVALUATION OUTPUT {j} from MISTRAL:
{str(prompttracker.target_output[j])} \n\n
"""
        
    return total_prompt

In [20]:
class EnhancedSystemPrompt(BaseModel):
    #the list name has an important effect on the response! choose it wisely!
    Enhanced_System_Prompt: str


def request_to_openai(prompttracker):
    openai_user_prompt = create_openai_user_prompt(prompttracker=prompttracker)

    response = client.chat.completions.create(
        temperature = 0.1,
        model="gpt-4-0125-preview",
        messages=[
            {"role": "system", "content": openai_sys_prompt},
            {"role": "user", "content": openai_user_prompt},
            ],
        functions=[
            {
            "name": "Enhanced_System_Prompt",
            "description": "Enhanced System Prompt for Mistral LLM",
            "parameters": EnhancedSystemPrompt.model_json_schema()
            }
        ],
        function_call={"name": "Enhanced_System_Prompt"}
    )
    return json.loads(response.choices[0].message.function_call.arguments)['Enhanced_System_Prompt']

In [21]:
def refine_system_prompt_with_gpt4(number_of_iterations):
    for i in range(number_of_iterations):
        openai_suggestion = request_to_openai(prompttracker=promptTracker)
        promptTracker.mistral_system_prompts.append(openai_suggestion)
        promptTracker.mistral_responses.append([])
        for j in range(len(promptTracker.target_input)):
            mistral_inputs = create_mistral_total_prompt(promptTracker.mistral_system_prompts[-1], promptTracker.target_input[j])
            output_from_mistral = local_llm_call(mistral_inputs)
            promptTracker.mistral_responses[-1].append(output_from_mistral)

In [22]:
refine_system_prompt_with_gpt4(5)

In [28]:
print(promptTracker.target_output)

["['Gun violence', 'Coca-cola', 'Kansas city']", "['Paris', 'Honeymoon']"]


In [26]:
for i in range(len(promptTracker.mistral_responses)):
    print(promptTracker.mistral_responses[i])

["['Gun violence', 'Celebration', 'Joy', 'America', 'Kansas']", "['Honeymoon', 'Paris', 'Travel']"]
["['Kansas', 'America', 'Gun violence']", "['Paris', 'Honeymoon']"]
["['Kansas', 'America', 'Gun violence', 'Coca-Cola']", "['Paris', 'Honeymoon']"]
["['Kansas', 'America', 'Gun violence', 'Coca-Cola']", "['Paris']"]
["['Kansas', 'America', 'Gun violence', 'Coca-Cola']", "['Paris']"]
["['America', 'Gun violence', 'Kansas', 'Coca-Cola']", "['Paris']"]


In [37]:
print(promptTracker.mistral_system_prompts[-1])

You are an AI designed to extract a concise list of specific entities, brand names, or unique concepts from a given piece of text. The list should not exceed 10 items and must focus on the most distinctive elements mentioned, prioritizing locations, notable brands, and any specific details directly referenced in the text. Use standardized words for entities and concepts, and ensure to capture specific locations (e.g., 'Kansas City' instead of 'Kansas') and brand names as mentioned in the text. Avoid general terms unless they are central to the text's meaning. Emphasize capturing events or occasions if they are a focal point of the text. Additionally, when a sentiment or event is mentioned, identify and list it if it significantly impacts the context of the text.

###
Text: "The new iPhone 12 was released yesterday, sparking interest among tech enthusiasts and Apple fans alike."
Concepts_List: ["iPhone 12", "Tech enthusiasts", "Apple"]
###

###
Text: "A devastating earthquake hit the Sa