# CAI Claude Instant

In [75]:
# %load ./claude.py
from typing import NamedTuple, Optional
import os
import anthropic
from enum import Enum
from dotenv import load_dotenv
load_dotenv()


anthropic_client = anthropic.Client(os.getenv('ANTHROPIC_API_KEY'))


class Model(Enum):
    claude_v1_latest = 'claude-v1'
    claude_v1_0 = "claude-v1.0"
    claude_v1_2 = "claude-v1.2"
    claude_v1_3 = "claude-v1.3"
    claude_instant_v1_latest = "claude-instant-v1"
    claude_instant_v1_0 = "claude-instant-v1.0"


class PromptType(NamedTuple):
    human_message: str
    model: Optional[Model] = Model.claude_v1_latest
    temp_0_1: Optional[float] = 0.5
    max_tokens_to_sample: Optional[int] = 1024
    assistant_prefix: Optional[str] = None
    response_prefix: Optional[str] = None


# default_prompt_args = PromptType(
#     model=Model.claude_v1_latest,
#     human_message=None,
#     temp_0_1=0.5,
#     max_tokens_to_sample=1024,
#     assistant_prefix=None
# )


def get_claude_response(
    prompt_args: PromptType
) -> str:
    wrapped_prompt = f"{anthropic.HUMAN_PROMPT} {prompt_args.human_message}{anthropic.AI_PROMPT}"
    if (prompt_args.assistant_prefix is not None):
        wrapped_prompt += f" {prompt_args.assistant_prefix}"

    response = anthropic_client.completion(
        prompt=wrapped_prompt,
        stop_sequences=[anthropic.HUMAN_PROMPT],
        model=prompt_args.model.value,
        max_tokens_to_sample=prompt_args.max_tokens_to_sample,
        temperature=prompt_args.temp_0_1,
    )

    text = response['completion']

    if (prompt_args.response_prefix is not None):
        # This is often useful when we've used `assistant_prefix` e.g. to start a numbered list of items.
        text = f"{prompt_args.response_prefix}{text}"

    return text


In [89]:
model = Model.claude_instant_v1_latest

num_samples = 5

constitution_rules = [
    "Instructions must explicitly describe the tone that the editor adpots.",
    "Instructions must explicitly state the number of words that editor's written response should be, and the number of words should be appropriate for the task.",
    "Instructions should explicitly reference the article content at the beginning, for example 'Here is an article:\n\n<article>ARTICLE_CONTENT</article>\n\n[Rest of instruction goes here]'.",
    "Instructions which involve multiple steps should be broken down into numbered subtasks.",
    # "Instructions must include explicit examples. For example, if the instruction is to check for spelling errors, it should include an example typo and its correction.",
]
constitution_str = "\n".join(map(lambda x: f"{x[0]+1}. {x[1]}", enumerate(constitution_rules)))


def get_concepts_prompt(n: int) -> PromptType:
    return PromptType(
        model=model,
        temp_0_1=0.7,
        human_message=f"Generate a list of {n} tasks that a Copy Editor might do as part of their job, given a written article. The output of each completed task should be written text. Write each task on a separate, numbered line.",
        assistant_prefix=f"Here are {n} tasks:\n\n1.",
        response_prefix="1.",
    )


def get_naive_response_prompt(concept: str) -> PromptType:
    return PromptType(
        model=model,
        temp_0_1=0.3,
        human_message=f"""I have an AI agent which acts as a Copy Editor and I want it to complete the following task:

<task>
{concept}
</task>

The agent only responds with written text. Write a concise instruction for the agent, asking it to complete this task. Don't include any preamble, just respond directly with the instruction for the agent.""",
    )


def get_critique_prompt(naive_response: str) -> PromptType:
    return PromptType(
        model=model,
        temp_0_1=0.5,
        human_message=f"""Here is an instruction to an AI agent which acts as a Copy Editor:

<instruction>
{naive_response}
</instruction>

There may be some problems with this instruction. In particular, the instruction must abide by the following rules:

<rules>
{constitution_str}
</rules>

List each rule that the instruction breaks. State the rule verbatim, then describe how the instruction breaks the rule.

For example, if the instruction breaks rule 1, you would write:

The instruction breaks the following rules:
1. Rule: {constitution_rules[0]} - Reason: ...""",
    )


def get_rewrite_prompt(naive_response: str, critique: str) -> PromptType:
    return PromptType(
        model=model,
        temp_0_1=0.5,
        human_message=f"""Here is an instruction to an AI agent which acts as a Copy Editor:

<instruction>
{naive_response}
</instruction>

The instruction is supposed to follow certain rules, but it breaks them as follows:

<issues>
{critique}
</issues>

Rewrite the instruction to address these issues. Do not enclose your answer in <instruction> tags.""",
        assistant_prefix="Here is the rewritten instruction:\n\n",
    )


## Generate concepts

In [82]:
concepts_string = get_claude_response(get_concepts_prompt(num_samples))
print(concepts_string)


1. Check for spelling and grammatical errors.
2. Ensure consistency in style, formatting, and terminology. 
3. Verify facts and quotations within the text.
4. Identify and resolve ambiguous or unclear phrases.  
5. Make recommendations for restructuring or reorganizing the content for clarity and flow.


In [83]:
import re


def extract_items(text):
    items = []
    lines = text.split('\n')
    for line in lines:
        stripped = line.strip()
        if stripped and re.match(r'^\d+\.\s', stripped):
            items.append(re.sub(r'^\d+\.\s', '', stripped))
    return items


concepts = extract_items(concepts_string)
if len(concepts) != num_samples:
    raise Exception(f"Expected {num_samples} concepts, but got {len(concepts)}")
else:
    print(f"Got {len(concepts)} concepts")


Got 5 concepts


## Generate naive responses

> **Terminology** - here we're prompting an LLM to generate prompts, which is confusing. We'll use the word "prompt" to refer to the initial input, and "response" to refer to the output

In [84]:
def get_naive_response(concept):
    return get_claude_response(get_naive_response_prompt(concept))


# Concurrently (speeds up openai responses, not possible with anthropic)
# with concurrent.futures.ThreadPoolExecutor() as executor:
#     naive_responses = list(executor.map(lambda x: get_naive_response(
#         x[0], x[1]), [(concept, i) for i, concept in enumerate(concepts)]))

# Sequentially
naive_responses = []
for concept in concepts:
    naive_responses.append(get_naive_response(concept))

tasksResponsesString = "\n\n\n===\n\n\n".join(
    [f"TASK:\n{task}\n\nNAIVE RESPONSE:\n{response}" for task, response in zip(concepts, naive_responses)])
print(tasksResponsesString)


TASK:
Check for spelling and grammatical errors.

NAIVE RESPONSE:
 Check text for spelling and grammar issues.


===


TASK:
Ensure consistency in style, formatting, and terminology.

NAIVE RESPONSE:
 Revise text for consistent style, format, and word choice throughout.


===


TASK:
Verify facts and quotations within the text.

NAIVE RESPONSE:
 Verify that all facts and quotations within the text are accurate.


===


TASK:
Identify and resolve ambiguous or unclear phrases.

NAIVE RESPONSE:
 Revise text to resolve ambiguity and clarify meaning.


===


TASK:
Make recommendations for restructuring or reorganizing the content for clarity and flow.

NAIVE RESPONSE:
 Restructure and reorder content to improve clarity and flow. Provide recommendations in writing.


Most of these responses are slightly more detailed rewrites of the corresponding task. This is a good start, but ultimately not very useful and doesn't demonstrate most of the good practices in prompt design.

## Critique

Here's what an example prompt looks like:

In [85]:
print(get_critique_prompt(naive_responses[0]).human_message)


Here is an instruction to an AI agent which acts as a Copy Editor:

<instruction>
 Check text for spelling and grammar issues.
</instruction>

There may be some problems with this instruction. In particular, the instruction must abide by the following rules:

<rules>
1. Instructions must explicitly describe the tone that the editor adpots.
2. Instructions must explicitly state the number of words that editor's written response should be, and the number of words should be appropriate for the task.
3. Instructions should explicitly reference the article content at the beginning, for example 'Here is an article:

<article>ARTICLE_CONTENT</article>

[Rest of instruction goes here]'.
4. Instructions which involve multiple steps should be broken down into numbered subtasks.
</rules>

List each rule that the instruction breaks. State the rule verbatim, then describe how the instruction breaks the rule.

For example, if the instruction breaks rule 1, you would write:

The instruction breaks th

Now let's execute the critique prompt on each of the naive responses:

In [86]:
def get_critique(naive_response):
    return get_claude_response(get_critique_prompt(naive_response))

# Concurrently:
# with concurrent.futures.ThreadPoolExecutor() as executor:
#     critiques = list(executor.map(lambda x: get_critique(x[0], x[1]), [
#                      (response, i) for i, response in enumerate(naive_responses)]))


# Sequentially:
critiques = []
for naive_response in naive_responses:
    critiques.append(get_critique(naive_response))

responseCritiqueString = "\n\n\n===\n\n\n".join(
    [f"NAIVE RESPONSE:\n{response}\n\nCRITIQUE:\n{critique}" for response, critique in zip(naive_responses, critiques)])
print(responseCritiqueString)


NAIVE RESPONSE:
 Check text for spelling and grammar issues.

CRITIQUE:
 The instruction breaks the following rules:

1. Rule: Instructions must explicitly describe the tone that the editor adopts.  
         - Reason: The instruction does not specify the tone that the editor should adopt.

2. Rule: Instructions must explicitly state the number of words that editor's written response should be, and the number of words should be appropriate for the task.
         - Reason: The instruction does not specify the number of words in the editor's response.

3. Rule: Instructions should explicitly reference the article content at the beginning, for example 'Here is an article:  
 <article>ARTICLE_CONTENT</article>
[Rest of instruction goes here].'
         - Reason: The instruction does not reference any article content.

4. Rule: Instructions which involve multiple steps should be broken down into numbered subtasks.
         - Reason: The instruction consists of a single step and is not broke

## Rewrite

Here's what an example rewrite prompt looks like:

In [90]:
print(get_rewrite_prompt(naive_responses[0], critiques[0]).human_message)


Here is an instruction to an AI agent which acts as a Copy Editor:

<instruction>
 Check text for spelling and grammar issues.
</instruction>

The instruction is supposed to follow certain rules, but it breaks them as follows:

<issues>
 The instruction breaks the following rules:

1. Rule: Instructions must explicitly describe the tone that the editor adopts.  
         - Reason: The instruction does not specify the tone that the editor should adopt.

2. Rule: Instructions must explicitly state the number of words that editor's written response should be, and the number of words should be appropriate for the task.
         - Reason: The instruction does not specify the number of words in the editor's response.

3. Rule: Instructions should explicitly reference the article content at the beginning, for example 'Here is an article:  
 <article>ARTICLE_CONTENT</article>
[Rest of instruction goes here].'
         - Reason: The instruction does not reference any article content.

4. Rule: 

Now let's execute the rewrite prompt on each of the naive responses:

In [91]:
def get_rewrite(naive_response, critique):
    return get_claude_response(get_rewrite_prompt(naive_response, critique))

# Concurrently:
# with concurrent.futures.ThreadPoolExecutor() as executor:
#     rewrites = list(executor.map(lambda x: get_rewrite(x[0], x[1], x[2]), [
#         (response, critique, i) for i, (response, critique) in enumerate(zip(naive_responses, critiques))]))


# Sequentially:
rewrites = []
for i, (naive_response, critique) in enumerate(zip(naive_responses, critiques)):
    rewrites.append(get_rewrite(naive_response, critique))

rewriteString = "\n\n\n===\n\n\n".join(
    [f"NAIVE RESPONSE:\n{response}\n\nREWRITE:\n{rewrite}" for response, rewrite in zip(naive_responses, rewrites)])
print(rewriteString)


NAIVE RESPONSE:
 Check text for spelling and grammar issues.

REWRITE:
 Check text for spelling and grammar issues, adopting a polite and professional tone. Provide a concise response of no more than 50 words summarizing the key issues found.

1. Examine the article for any spelling mistakes. Mark each misspelled word.  

2. Check sentences for grammatical errors such as incorrect punctuation, run-on sentences, subject-verb agreement errors, and tense inconsistencies. Mark each error found.

3. Summarize the main spelling and grammar issues found in no more than  50 words. Here is an article:   
 <article>ARTICLE_CONTENT</article>


===


NAIVE RESPONSE:
 Revise text for consistent style, format, and word choice throughout.

REWRITE:
 Revise the following article for consistent style, format, and word choice throughout in a professional and formal tone:  

Here is an article:  
<article>ARTICLE_CONTENT</article>

1. Check for consistent verb tense, noun form, and spelling throughout th