In [292]:
import os
import re
import textwrap

from langchain.llms import OpenAI, Cohere, Ollama
from langchain.chains import LLMChain, SequentialChain
from langchain.chat_models import ChatOpenAI, ChatOllama
from langchain.prompts import PromptTemplate, FewShotPromptTemplate

from ipywidgets import fixed, interactive, interact_manual, FloatSlider

# API Keys Here
os.environ['OPENAI_API_KEY'] = 'OPENAI_API_KEY'
os.environ['COHERE_API_KEY'] = 'COHERE_API_KEY'

In [2]:
query = "How should I tour France?"

# Manual Prompt Design

In [3]:
# "Parameters of LLMs"

# Two Types of LLMs: text / chat
# We integrate companies / models
# As one drop-down menu here for convenience

# 1) Models:
chat_models = ["OpenAI gpt-4", "OpenAI gpt-4-0314", "OpenAI gpt-4-0613", "META llama2"]
text_models = ["OpenAI text-davinci-003", "Cohere command", "Cohere command-light", "META llama2"]

# 2) Temperature
temperature = FloatSlider(min=0.0, max=1.0, step=0.01, value=0.0)

# 3) Response Length
length = FloatSlider(min=50.0, max=150.0, step=1, value=0.0)

"""
ASPECTS OF LANGUAGE BELOW (SUBJECT TO INTERPRETATION)
"""

# 4) Diction
diction = ["neutral", "formal", "informal"]

# 5) Tone
tone = ["neutral", "excited", "nonchalant"]

# 6) Confidence
confidence = ["neutral", "confident", "reserved"]

# 7) Comprehension
comprehension = ["neutral", "simple", "complex"]

# 8) Appeal
appeal = ["neutral", "emotional", "logical"]

# 9) Diversity
diversity = ["neutral", "diverse", "narrow"] 


In [4]:
def model_loader(name: str, temperature: float):

    match name.split()[0]:
        case "OpenAI":
            model = OpenAI(model_name = name.split()[1], temperature = temperature)
        case "Cohere":
            model = Cohere(model = name.split()[1], temperature = temperature)
        case "META":
            model = Ollama(model = name.split()[1], temperature = temperature)

    return model

In [5]:
def manual_output(query, text_models, temperature, length, diction, tone, confidence, comprehension, appeal, diversity):

    max_char = 100

    model = model_loader(text_models, temperature)

    prompt_template = PromptTemplate(

        input_variables = ["query", "length", "diction", "tone", "confidence", "comprehension", "appeal", "diversity"],
        
        template = """Directions: your tone should be {tone}. Your diction should be {diction}. 
        You should speak in a {confidence} manner. Your output should be {comprehension}. 
        Your appeal should be {appeal}. Your range of topics discussed should be {diversity}.
        Please keep your response to strictly within {length} words. 
        \n\n Question: {query} \n\n Your response here: """,

    )

    prompt = prompt_template.format(query = query, length = length, diction = diction, tone = tone, confidence = confidence, \
                                                      comprehension = comprehension, appeal = appeal, diversity = diversity)

    lines = [line + "\n" for line in textwrap.wrap(model(prompt), width = max_char, break_long_words=False)]
    
    output = "".join(lines)
    print(output + "\n")
    print(f"Word Count: {len(output.split())}")

In [6]:
interact_manual(manual_output, query = fixed(query), text_models = text_models, temperature = temperature, length = length, diction = diction, tone = tone, confidence = confidence, comprehension = comprehension, appeal = appeal, diversity = diversity)

interactive(children=(Dropdown(description='text_models', options=('OpenAI text-davinci-003', 'Cohere command'…

<function __main__.manual_output(query, text_models, temperature, length, diction, tone, confidence, comprehension, appeal, diversity)>

# "LLM as Optimizers": —> LLM for Prompt Generation[?]

In [7]:
prompt_template_in = PromptTemplate(

    input_variables = ["length", "diction", "tone", "confidence", "comprehension", "appeal", "diversity"],
    
    template = """You are a helpful AI bot that provides instructions on how other LLMs should generate output.
    Please generate a set of instructions that tells other LLMs to do the following: 1) {diction} diction 
    2) {tone} tone 3) {confidence} manner 4) {comprehension} comprehension 5) {appeal} appeal 
    6) {diversity} topics discussed 7) {length} words or less. Your output should only include 
    the related instructions and nothing else. \n\n Instructions:""",

)

prompt_template_out = PromptTemplate(

    input_variables = ["LLM_prompt", "query"],
    
    template = """Please generate your output by following the instructions below.
        Directions: {LLM_prompt} \n\n Question: {query} \n\n Your response here: """,

)

In [8]:
def LLM_output(query, text_models, temperature, length, diction, tone, confidence, comprehension, appeal, diversity):

    max_char = 100

    model = model_loader(text_models, temperature)
    generator = OpenAI(model_name = "text-davinci-003", temperature = 1.0)

    prompt_in = prompt_template_in.format(length = length, diction = diction, tone = tone, confidence = confidence, \
                                                      comprehension = comprehension, appeal = appeal, diversity = diversity)
    
    LLM_prompt = generator(prompt_in)
    
    prompt_out = prompt_template_out.format(LLM_prompt = LLM_prompt, query = query)

    lines = [line + "\n" for line in textwrap.wrap(model(prompt_out), width = max_char, break_long_words=False)]
    print(LLM_prompt + "\n")

    output = "".join(lines)
    print(output + "\n")
    print(f"Word Count: {len(output.split())}")

In [9]:
interact_manual(LLM_output, query = fixed(query), text_models = text_models, temperature = temperature, length = length, diction = diction, tone = tone, confidence = confidence, comprehension = comprehension, appeal = appeal, diversity = diversity)

interactive(children=(Dropdown(description='text_models', options=('OpenAI text-davinci-003', 'Cohere command'…

<function __main__.LLM_output(query, text_models, temperature, length, diction, tone, confidence, comprehension, appeal, diversity)>

# Few-Shot Templating —> LLM Evaluation


In [10]:
def identity(text_models, temperature, length, diction, tone, confidence, comprehension, appeal, diversity):

    return text_models, temperature, length, diction, tone, confidence, comprehension, appeal, diversity

def output_mod(text_models, temperature, length, diction, tone, confidence, comprehension, appeal, diversity):

    parameters = interactive(identity, query = fixed(query), text_models = text_models, \
                        temperature = temperature, length = length, diction = diction, tone = tone, \
                        confidence = confidence, comprehension = comprehension, appeal = appeal, diversity = diversity)
    
    display(parameters)
    
    return parameters

In [263]:
prompt_template_out2 = PromptTemplate(

    input_variables = ["LLM_prompt", "query", "examples"],
    
    template = """Please generate {examples} outputs by following the directions below.
        Each output will be graded from a scale of 1 to 10, with 1 being the worst and 
        10 being the best, according to how well it follows the directions. You should 
        definitely not score the responses yourself and should only generate {examples} 
        responses, each of which should independently follow the directions below. \n\n
        Directions: {LLM_prompt} \n\n Question: {query} \n\n Your responses here: """,

)

prompt_template_grader = PromptTemplate(

    input_variables = ["LLM_prompt", "query", "examples", "output"],

    template = """You are a harsh AI grader that is given {examples} different responses to the question {query}. 
        For each response, you will assign an integer from 1 to 10 only (not /10), with 1 being the worst and 10 being 
        the best,according to how closely the response follows the directions. You should deduct 1 to 2 points 
        for each listed direction that the generated response did not follow. You should give a response 0 points if 
        it did not effectively address the query {query}. Your output should resemble the following: 
        \n 1: 8 \n 2: 6 \n\n Directions: {LLM_prompt} \n\n Response: {output} \n\n Scores: """,
)

In [264]:
def rate_examples(params: list, examples: int, query: str):

    text_models, temperature, length, diction, tone, confidence, comprehension, appeal, diversity = [param for param in params]

    model = model_loader(text_models, temperature)
    generator = OpenAI(model_name = "text-davinci-003", temperature = 1.0)

    prompt_in = prompt_template_in.format(length = length, diction = diction, tone = tone, confidence = confidence, \
                                                      comprehension = comprehension, appeal = appeal, diversity = diversity)
    
    LLM_prompt = generator(prompt_in)
    
    prompt_out = prompt_template_out2.format(LLM_prompt = LLM_prompt, query = query, examples = examples)
    few_shot_responses = model(prompt_out)

    grader = OpenAI(model_name = "text-davinci-003", temperature = 0.0)

    prompt_grader = prompt_template_grader.format(LLM_prompt = LLM_prompt, query = query, examples = examples, output = few_shot_examples)
    few_shot_scores = grader(prompt_grader)

    return LLM_prompt, few_shot_responses, few_shot_scores, model
    
    

In [321]:
def LLM_output(params: tuple, examples: int, query: str):
    
    max_char = 100

    LLM_prompt, few_shot_examples, few_shot_scores, model = rate_examples(params, examples, query)

    few_shot_examples = list(filter(None, few_shot_examples.splitlines()))
    few_shot_scores = list(filter(None, few_shot_scores.splitlines()))
    few_shot_scores = [item.strip() for item in [re.sub(r'^.*? ', ' ', score.strip()) for score in few_shot_scores]]

    template_examples = []

    for i in range(examples):
        template_examples.append({"Output": few_shot_examples[i], "Score": few_shot_scores[i]})
    
    template = """
    Response: {Output}
    Score: {Score}  
    """

    prefix = """In the following, the AI follows the directions below to generate a response to the query. 
    Each response is associated with a score from 1 (worst) to 10 (best). \n\n Here is the question: {query} 
    \n\n Here are the directions: {LLM_prompt} Here are some examples: \n\n """

    suffix = """Please generate a response to the query by following the directions. Aim for the highest score possible.
    \n\n Directions: {LLM_prompt} \n\n Query: {query} \n\n Your response here (do not include score or word count): """

    prompt_template = PromptTemplate(
        input_variables=["Output", "Score"],
        template=template
    )

    few_shot_template = FewShotPromptTemplate(
    examples=template_examples, 
    example_prompt=prompt_template,
    prefix=prefix,
    suffix=suffix,
    input_variables=["LLM_prompt", "query"],
    example_separator="\n"
)

    few_shot_prompt = few_shot_template.format(LLM_prompt = LLM_prompt, query = query)

    print(few_shot_prompt)

    output = model(few_shot_prompt)

    lines = [line + "\n" for line in textwrap.wrap(model(few_shot_prompt), width = max_char, break_long_words=False)]
    # print(LLM_prompt + "\n")

    output = "".join(lines)
    print(output + "\n")
    print(f"Word Count: {len(output.split())}")

    # return model(few_shot_prompt), few_shot_prompt

In [326]:
params = output_mod(text_models, temperature, length, diction, tone, confidence, comprehension, appeal, diversity)

interactive(children=(Dropdown(description='text_models', options=('OpenAI text-davinci-003', 'Cohere command'…

In [327]:
print(params.result)

('OpenAI text-davinci-003', 0.45, 85.0, 'neutral', 'neutral', 'neutral', 'neutral', 'neutral', 'neutral')


In [328]:
LLM_output(params.result, 3, query)

In the following, the AI follows the directions below to generate a response to the query. 
    Each response is associated with a score from 1 (worst) to 10 (best). 

 Here is the question: How should I tour France? 
    

 Here are the directions:  
1. Use a neutral diction that is simple and concise. 
2. Maintain a neutral, even tone when speaking. 
3. Speak in a professional manner. 
4. Comprehend any conversations in an impartial manner. 
5. Appeal to the listener in a balanced way. 
6. Discuss neutral topics relevant to the context. 
7. Once complete, limit the output to 85 words. Here are some examples: 

 

    Response: 1. France is a beautiful country with many different areas to explore. Start by researching the different regions and cities to determine which ones you'd like to visit. Consider the type of activities you'd like to do and the attractions you'd like to see. Plan your route accordingly and make sure to book accommodation in advance. Once you have your plan, make