In [None]:
from rich.console import Console
from rich.rule import Rule

from dotenv import load_dotenv
load_dotenv()

In [None]:
from anthropic import Anthropic

client = Anthropic()

for model in client.models.list():
    print(model.id, model.model_config)
    print(model)

In [None]:

# model = "claude-3-5-haiku-20241022"
# MAX_TOKENS=10

In [None]:

# message = client.messages.create(
#     model=model,
#     max_tokens=MAX_TOKENS,
#     messages=[
#         {
#             "role": "user",
#             "content": "What is quantum computing? Answer in one sentence"
#         }
#     ]
# )

In [None]:

print(message.content)

In [None]:
from anthropic import NOT_GIVEN, Anthropic, NotGiven
import loguru

logger = loguru.logger

class Conversation:
    def __init__(self, model:str = "claude-3-5-haiku-20241022", max_tokens:int = 10, system_msg:str | NotGiven = NOT_GIVEN):
        self.client = Anthropic()
        self.params = {
            "model": model,
            "max_tokens": max_tokens,
        }
        self.system_msg = system_msg
        self.messages = []

    def _add_message(self, role:str, text:str | None):
        if not text:
            logger.warning("No text provided for message")        
        match (role):
            case "user":
                new_message = {"role": "user", "content": text if text else ""}
            case "assistant":
                new_message = {"role": "assistant", "content": text if text else ""}
            case _:
                raise ValueError(f"Invalid role: {role}")
        self.messages.append(new_message)

    def chat(self, role:str = "user", text:str | None = None, prefill_text:str | None = None, temperature:float = 0.0, streaming:bool = True, stop_sequences:list[str] = []):
        self._add_message(role, text)
        self.params["system"] = self.system_msg
        self.params["messages"] = self.messages
        self.params["temperature"] = temperature
        self.params["stop_sequences"] = stop_sequences
        if prefill_text:
            self._add_message("assistant", prefill_text)
        if streaming:
            message = ""
            with self.client.messages.stream(**self.params) as stream:
                for text in stream.text_stream:
                    pass
                message = stream.get_final_message()
        else:
            message = self.client.messages.create(**self.params)
        message_content = message.content[0].text
        self._add_message("assistant", message_content)
        return message_content
    
    def __str__(self):
        return "\n".join([f"{message['role']}: {message['content']}\n" for message in self.messages])


In [None]:
class Chat:
    def __init__(self, model:str = "claude-3-5-haiku-20241022", max_tokens:int = 10, system_msg:str | NotGiven = NOT_GIVEN):
        print(max_tokens)
        self.conversation = Conversation(model=model, max_tokens=max_tokens, system_msg=system_msg)

    def run(self, temperature:float = 0.0, prefill_text:str | None = None, stop_sequences=["```"]):
        console = Console()
        while True:
            try:
                console.print(Rule("[bold blue]User[/bold blue]"))
                user_input = input("> ")
                console.print(f"> {user_input}")
                self.conversation.chat(role="user", text=user_input, prefill_text=prefill_text, temperature=temperature, streaming=True, stop_sequences=stop_sequences)
                console.print(Rule("[bold blue]Assistant[/bold blue]"))
                console.print(self.conversation.messages[-1]['content'])
                # console.print(self.conversation)
            except KeyboardInterrupt:
                logger.info("Ctrl+C pressed. Exiting...")
                break

In [None]:
SYSTEM_MSG = """
You are an expert Python programmer that provides code examples in the most efficient 
and concise way to the questions asked by the user. Just answer with code.  
"""

chat = Chat(max_tokens=1000, system_msg=SYSTEM_MSG)
chat.run()

In [None]:
SYSTEM_MSG = """
You are an expert in AWS services. You are able to answer questions about AWS services and provide code examples in the most efficient 
and concise way to the questions asked by the user. Just answer with the client code to use the AWS services. Do not include any explanation
nor create any code blocks.
"""

chat = Chat(max_tokens=1000, system_msg=SYSTEM_MSG)
chat.run()

In [None]:
import json


def generate_dataset(file_name:str = "dataset.json"):
    prompt = """
Generate a evaluation dataset for a prompt evaluation. The dataset will be used to evaluate prompts
that generate Python, JSON, or Regex specifically for AWS-related tasks. Generate an array of JSON objects,
each representing task that requires Python, JSON, or a Regex to complete.

Example output:
```json
[
    {
        "task": "Description of task",
        "format": "python" | "json" | "regex"
    },
    ...additional
]
```

* Focus on tasks that can be solved by writing a single Python function, a single JSON object, or a regular expression.
* Focus on tasks that do not require writing much code

Please generate 3 objects.
"""

    chat = Chat(max_tokens=1000)
    message = chat.conversation.chat(role="user", text=prompt, prefill_text="```json", stop_sequences=["```"])
    logger.info(message)
    json_message = json.loads(message)
    logger.info(json_message)
    with open(file_name, "w") as f:
        json.dump(json_message, f, indent=2)

generate_dataset()
    

1000


[32m2025-08-05 18:02:54.839[0m | [1mINFO    [0m | [36m__main__[0m:[36mgenerate_dataset[0m:[36m30[0m - [1m
[
    {
        "task": "Create a regular expression to validate an AWS IAM username, which must be between 1-64 characters, contain only alphanumeric characters and certain symbols (.-_+=)",
        "format": "regex"
    },
    {
        "task": "Write a Python function to convert an AWS EC2 instance state code to its human-readable description (e.g., 16 -> 'running', 0 -> 'pending')",
        "format": "python"
    },
    {
        "task": "Create a JSON configuration for an AWS Lambda function's basic event trigger from an S3 bucket, specifying the bucket name and event types",
        "format": "json"
    }
]
[0m
[32m2025-08-05 18:02:54.845[0m | [1mINFO    [0m | [36m__main__[0m:[36mgenerate_dataset[0m:[36m32[0m - [1m[{'task': 'Create a regular expression to validate an AWS IAM username, which must be between 1-64 characters, contain only alphanumeric char

In [None]:
# Generate three different sample AWS CLI commands. Each shold be very short.

In [55]:
import ast
import re
import json

class Grader:
    
    EVAL_PROMPT = """
    You are an expert code reviewer. Evaluate this AI-generated solution.
    
    Task: {task}
    Solution: {solution}
    
    Provide your evaluation as a structured JSON object with:
    - "strengths": An array of 1-3 key strengths
    - "weaknesses": An array of 1-3 key areas for improvement  
    - "reasoning": A concise explanation of your assessment
    - "score": A number between 1-10
    """
    
    def grade_by_model(self, test_case, output):
        eval_prompt = self.EVAL_PROMPT.format(task=test_case["task"], solution=output)
        conversation = Conversation(max_tokens=1000)
        eval_text = conversation.chat(role="user", text=eval_prompt, prefill_text="```json", stop_sequences=["```"])
        return json.loads(eval_text)
    
    def validate_json(self, text):
        try:
            json.loads(text.strip())
            return 10
        except json.JSONDecodeError:
            return 0

    def validate_python(self, text):
        try:
            ast.parse(text.strip())
            return 10
        except SyntaxError:
            return 0

    def validate_regex(self, text):
        try:
            re.compile(text.strip())
            return 10
        except re.error:
            return 0


    def grade_syntax(self, response, test_case):
        format = test_case["format"]
        match format:
            case "json":
                return self.validate_json(response)
            case "python":
                return self.validate_python(response)
            case "regex":
                return self.validate_regex(response)    
    
    

In [None]:
import re
import uuid
import json

def generate_filename_from_prompt(prompt: str, extension: str = "json") -> str:
    """
    Generate a filename from the first 10 characters of a prompt combined with a UUID.
    
    Args:
        prompt (str): The input prompt
        extension (str): File extension (default: "json")
    
    Returns:
        str: Generated filename in format: first_10_chars_uuid.extension
    """
    # Clean the prompt: remove special characters and convert to lowercase
    cleaned_prompt = re.sub(r'[^a-zA-Z0-9\s]', '', prompt.lower())
    
    # Get first 10 characters, pad with 'x' if shorter
    first_10 = cleaned_prompt[:10].strip()
    if len(first_10) < 10:
        first_10 = first_10.ljust(10, 'x')
    
    # Generate UUID (version 4 - random)
    unique_id = str(uuid.uuid4())[:8]  # Use first 8 chars of UUID for shorter filename
    
    # Combine and create filename
    filename = f"{first_10}_{unique_id}.{extension}"
    
    return filename


class EvalPipeline:

    def __init__(self, dataset_file:str = "dataset.json", prompt:str = "Please solve the following task: {task}"):
        self.dataset = self.load_dataset(dataset_file)
        self.prompt = prompt
        self.conversation = Conversation(max_tokens=1000)
        self.saved_file = generate_filename_from_prompt(prompt)
        self.grader = Grader()
        
    def load_dataset(self, dataset_file:str):
        with open(dataset_file, "r") as f:
            return json.load(f)

    def run_prompt(self, test_case, prefill_text:str = "```code"):
        """Merges the prompt and test case input, then returns the result"""
        return self.conversation.chat(role="user", text=self.prompt.format(task=test_case["task"]), prefill_text=prefill_text, stop_sequences=["```"])


    def run_test_case(self, test_case):
        """Calls run_prompt, then grades the result"""
        output = self.run_prompt(test_case)
        
        # Grade the output
        model_grade = self.grader.grade_by_model(test_case, output)
        syntax_grade = self.grader.grade_syntax(test_case, output)
        
        # Calculate the final score
        final_score = (model_grade["score"] + syntax_grade) / 2
        
        return {
            "task": test_case["task"],
            "generated_output": output,
            "score": final_score,
            "reasoning": model_grade["reasoning"],
            "strengths": model_grade["strengths"],
            "weaknesses": model_grade["weaknesses"]
        }
    
    def run(self):
        """Loads the dataset and calls run_test_case with each case"""
        results = []
        
        for test_case in self.dataset:
            result = self.run_test_case(test_case)
            results.append(result)
    
        eval_results = {
            "prompt": self.prompt,
            "results": results
        }
    
        with open(self.saved_file, "w") as f:
            json.dump(eval_results, f, indent=2)
        return eval_results
            

In [56]:
eval_pipeline = EvalPipeline()
eval_pipeline.run()

{'prompt': 'Please solve the following task: {task}',
 'results': [{'task': 'Create a regular expression to validate an AWS IAM username, which must be between 1-64 characters, contain only alphanumeric characters and certain symbols (.-_+=)',
   'generated_output': 'Here\'s a solution with explanation:\n\n```python\nimport re\n\ndef validate_iam_username(username):\n    # Regular expression pattern for AWS IAM username validation\n    pattern = r\'^[a-zA-Z0-9.-_+=]{1,64}$\'\n    \n    return re.match(pattern, username) is not None\n\n# Test cases\ntest_usernames = [\n    "john.doe",           # Valid\n    "jane_smith123",      # Valid\n    "user+admin=test",    # Valid\n    "",                   # Invalid (too short)\n    "a" * 65,             # Invalid (too long)\n    "user name",          # Invalid (contains space)\n    "user@example",       # Invalid (contains @ symbol)\n]\n\nfor username in test_usernames:\n    print(f"{username}: {validate_iam_username(username)}")\n```\n\nRegex 

In [57]:
PROMPT = """
Please solve the following task:
{task}

* Respond only with Python, JSON, or a plain Regex
* Do not add any comments or commentary or explanation
"""

eval_pipeline = EvalPipeline(prompt=PROMPT)
eval_pipeline.run()

{'prompt': '\nPlease solve the following task:\n{task}\n\n* Respond only with Python, JSON, or a plain Regex\n* Do not add any comments or commentary or explanation\n',
 'results': [{'task': 'Create a regular expression to validate an AWS IAM username, which must be between 1-64 characters, contain only alphanumeric characters and certain symbols (.-_+=)',
   'generated_output': '^[a-zA-Z0-9.-_+=]{1,64}$',
   'score': 7,
   'reasoning': 'The regex captures most AWS IAM username requirements but misses some nuanced validation rules. While technically functional, it would benefit from more precise constraints to fully align with AWS specifications.',
   'strengths': ['Correctly uses character class to allow alphanumeric characters',
    'Implements length constraint between 1-64 characters',
    'Includes allowed special characters per AWS IAM username rules'],
   'weaknesses': ['Does not enforce starting with a letter/number (AWS requirement)',
    'No explicit prevention of consecutive