In [5]:
# Load env variables and create client
from dotenv import load_dotenv
from anthropic import Anthropic
from IPython.display import Markdown
import json

load_dotenv()

client = Anthropic()
model = "claude-3-5-haiku-latest"
num_tokens = 1000

In [6]:
# Helper functions
def add_user_message(messages, text):
    user_message = {
        "role": "user",
        "content": text
        }
    messages.append(user_message)


def add_assistant_message(messages, text):
    assistant_message = {
        "role": "assistant",
        "content": text
        }
    messages.append(assistant_message)


def chat(messages, system=None, temperature=1.0, stop_sequences=[]):
    params = {
        "model": model,
        "max_tokens": num_tokens,
        "messages": messages,
        "temperature": temperature,
    }

    if system:
        params["system"] = system
    
    if stop_sequences:
        params["stop_sequences"] = stop_sequences

    with client.messages.stream(**params) as stream:
        for text in stream.text_stream:
            print(text, end="")
            pass
    response = stream.get_final_message()
    #message = stream.get_final_message()
    
    # message = client.messages.create(**params)
    return response.content[0].text
    # return message.content[0].text

def run_prompt(test_case):
    """Merges the prompt and test case input, then returns the result"""
    prompt = f"""
Please solve the following task:

{test_case["task"]}
"""

    messages = []
    add_user_message(messages, prompt)
    output = chat(messages)
    return output

def run_test_case(test_case):
    """Calls run_prompt, then grades the result"""
    output = run_prompt(test_case)

    # TODO - Grading
    score = 10

    return {
        "output": output,
        "test_case": test_case,
        "score": score
    }

def run_eval(dataset):
    """Loads the dataset and calls run_test_case with each case"""
    results = []

    for test_case in dataset:
        result = run_test_case(test_case)
        results.append(result)

    return results

In [7]:
# Function to grade a test case + output using a model
def grade_by_model(test_case, output):
    eval_prompt = f"""
You are an expert AWS code reviewer. Your task is to evaluate the following AI-generated solution.

Original Task:
<task>
{test_case["task"]}
</task>

Solution to Evaluate:
<solution>
{output}
</solution>

Output Format
Provide your evaluation as a structured JSON object with the following fields, in this specific order:
- "strengths": An array of 1-3 key strengths
- "weaknesses": An array of 1-3 key areas for improvement
- "reasoning": A concise explanation of your overall assessment
- "score": A number between 1-10

Respond with JSON. Keep your response concise and direct.
Example response shape:
{{
    "strengths": string[],
    "weaknesses": string[],
    "reasoning": string,
    "score": number
}}
    """

    messages = []
    add_user_message(messages, eval_prompt)
    add_assistant_message(messages, "```json")
    eval_text = chat(messages, stop_sequences=["```"])
    return json.loads(eval_text)

In [8]:
# Passes a test case into Claude
def run_prompt(test_case):
    prompt = f"""
Please solve the following task:

{test_case["task"]}
"""

    messages = []
    add_user_message(messages, prompt)
    output = chat(messages)
    return output

In [9]:
import json


def generate_dataset():
    prompt = """
Generate a evaluation dataset for a prompt evaluation. The dataset will be used to evaluate prompts
that generate Python, JSON, or Regex specifically for AWS-related tasks. Generate an array of JSON objects,
each representing a task that requires Python, JSON, or a Regex to complete.

Example output:
```json
[
    {
        "task": "Description of task",
    },
    ...additional
]
```

* Focus on tasks that can be solved by writing a single Python function, a single JSON object, or a regular expression.
* Focus on tasks that do not require writing much code

Please generate 3 objects.
"""
    messages = []
    add_user_message(messages, prompt)
    add_assistant_message(messages, "```json")
    text = chat(messages, stop_sequences=["```"])
    return json.loads(text)

In [11]:
with open("dataset.json", "r") as f:
   dataset = json.load(f)

results = run_eval(dataset)

Here's a Python function that validates an AWS IAM username based on the specified criteria:

```python
import re

def validate_iam_username(username):
    """
    Validates an AWS IAM username.
    
    Rules:
    - Must start with a letter
    - Contains only lowercase letters, numbers, and these specific characters: +, =, ,, ., @, _
    - Maximum length is 64 characters
    
    Args:
        username (str): The username to validate
    
    Returns:
        bool: True if the username is valid, False otherwise
    """
    # Check if username is a string and not empty
    if not isinstance(username, str) or not username:
        return False
    
    # Check length (AWS IAM username max length is 64 characters)
    if len(username) > 64:
        return False
    
    # Regular expression pattern for validation
    pattern = r'^[a-z][a-z0-9+=,.@_]*$'
    
    # Use regex to validate the username
    return bool(re.match(pattern, username))

# Test cases
def test_validate_iam_username(

Traceback (most recent call last):
  File "/home/jx-creator/mambaforge/envs/ctorch/lib/python3.13/site-packages/IPython/core/interactiveshell.py", line 3667, in run_code
    exec(code_obj, self.user_global_ns, self.user_ns)
    ~~~~^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^
  File "/tmp/ipykernel_76891/818910915.py", line 4, in <module>
    results = run_eval(dataset)
  File "/tmp/ipykernel_76891/1032798972.py", line 74, in run_eval
    result = run_test_case(test_case)
  File "/tmp/ipykernel_76891/1032798972.py", line 58, in run_test_case
    output = run_prompt(test_case)
  File "/tmp/ipykernel_76891/2191347282.py", line 11, in run_prompt
    output = chat(messages)
  File "/tmp/ipykernel_76891/1032798972.py", line 33, in chat
    for text in stream.text_stream:
                ^^^^^^^^^^^^^^^^^^
  File "/home/jx-creator/mambaforge/envs/ctorch/lib/python3.13/site-packages/anthropic/lib/streaming/_messages.py", line 129, in __stream_text__
    for chunk in self:
                 ^^

In [None]:
outcome = json.dumps(results, indent=2)
print(outcome)

[
  {
    "output": "Here's a Python function that converts AWS EC2 instance state codes to their corresponding human-readable descriptions:\n\n```python\ndef convert_ec2_instance_state(state_code):\n    \"\"\"\n    Convert AWS EC2 instance state code to human-readable description.\n    \n    Args:\n        state_code (int): The EC2 instance state code\n    \n    Returns:\n        str: Human-readable description of the instance state\n    \"\"\"\n    # Dictionary mapping state codes to their descriptions\n    state_mapping = {\n        0: 'pending',\n        16: 'running',\n        32: 'shutting-down',\n        48: 'terminated',\n        64: 'stopping',\n        80: 'stopped'\n    }\n    \n    # Return the description if found, otherwise return 'unknown'\n    return state_mapping.get(state_code, 'unknown')\n\n# Example usage\ndef main():\n    # Test the function with different state codes\n    test_states = [0, 16, 32, 48, 64, 80, 99]\n    \n    for state in test_states:\n        print

In [None]:
outcome.split("\\n\\n", "\n\n")
Markdown(outcome)

Unexpected exception formatting exception. Falling back to standard exception


Traceback (most recent call last):
  File "/home/jx-creator/mambaforge/envs/ctorch/lib/python3.13/site-packages/IPython/core/interactiveshell.py", line 3667, in run_code
    exec(code_obj, self.user_global_ns, self.user_ns)
    ~~~~^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^
  File "/tmp/ipykernel_2414066/2736595205.py", line 1, in <module>
    outcome.split("\\n\\n", "\n\n")
    ~~~~~~~~~~~~~^^^^^^^^^^^^^^^^^^
TypeError: 'str' object cannot be interpreted as an integer

During handling of the above exception, another exception occurred:

Traceback (most recent call last):
  File "/home/jx-creator/mambaforge/envs/ctorch/lib/python3.13/site-packages/IPython/core/interactiveshell.py", line 2176, in showtraceback
    stb = self.InteractiveTB.structured_traceback(
        etype, value, tb, tb_offset=tb_offset
    )
  File "/home/jx-creator/mambaforge/envs/ctorch/lib/python3.13/site-packages/IPython/core/ultratb.py", line 1182, in structured_traceback
    return FormattedTB.structured_tra