# DSBA 6165 Milestone One Prompt Testing

## Software Set up

### Install Modules

In [None]:
!pip install openai
!pip install langfuse

### Import Modules

In [5]:
# Gemini
import google.generativeai as genai
from openai import OpenAI
from google.colab import userdata

# Core Python Libraries
import json
import os
import sys
import requests

from datetime import date
from scipy.optimize import minimize

# Data Science Specific Libraries
import pandas as pd
import numpy as np

# Google Collab Goodies
from IPython.display import display_markdown, display_html
from google.colab import data_table
data_table.enable_dataframe_formatter()
from google.colab import drive

import networkx as nx
import matplotlib.pyplot as plt
import plotly.graph_objects as go

from PIL import Image
import pytest

from langfuse import Langfuse
from langfuse.decorators import observe
from langfuse.openai import openai # OpenAI integration


### Constants

In [6]:
CLIENT = OpenAI(api_key=userdata.get("DSBA_LLAMA3_KEY"))
CLIENT.base_url = userdata.get("MODAL_BASE_URL")
MODEL = "/models/NousResearch/Meta-Llama-3-8B-Instruct"


langfuse = Langfuse(
  secret_key=userdata.get("lang-fuse-api"),
  public_key="pk-lf-7123ad96-768d-4e17-98e7-acbcd0058798",
  host="https://us.cloud.langfuse.com"
)

### Reusable Code

In [7]:
@observe()
def submit_prompt(
    messages,
    stream=True,
    temperature=0.7,             # Controls creativity (0 to 2, higher means more creative)
    max_tokens=128,              # Limits the response length
    top_p=0.9,                   # Controls diversity via nucleus sampling (0 to 1)
    #top_k=50,                    # Limits token sampling (0 disables this, higher gives more variety)
    presence_penalty=0.6,        # Discourages repeating tokens (ranges from -2 to 2)
    frequency_penalty=0.4,       # Penalty based on how often tokens appear in the response (ranges from -2 to 2)
    stop=["\n"]                  # Stop sequences to control when the model stops generating
):
    stream = CLIENT.chat.completions.create(
        model=MODEL,
        messages=messages,
        stream=stream,
        temperature=temperature,
        max_tokens=max_tokens,
        top_p=top_p,
       # top_k=top_k,
        presence_penalty=presence_penalty,
        frequency_penalty=frequency_penalty,
        stop=stop
    )
    response=""
    for chunk in stream:
      if chunk.choices[0].delta.content is not None:
          response += chunk.choices[0].delta.content
    return response


def get_clarification(message):
  messages = [
    {
        "role": "system",
        "content": "You are a copy editor, please clarify the text that I provide with two versions, one that maximizes clarity and one that preserves my voice.",
    },
    {
        "role": "user",
        "content": message,
    },
  ]

  response = submit_prompt(messages)
  return response

def translate(message, to_lang, from_lang="en_US",temperature=0.7,max_tokens=250,top_p=1):
  messages = [
    {
        "role": "system",
        "content": f"translate the message provided from {from_lang} to {to_lang}. Please do not add any quotes, or trailing or leading spaces",
    },
    {
        "role": "user",
        "content": message,
    },
  ]

  response = submit_prompt(messages,temperature=temperature,top_p=top_p,max_tokens=max_tokens)
  return response


## Tests

### Translations

In [None]:
response = translate('''
hood
''',to_lang="en_GB",temperature=1.5,top_p=.5,max_tokens=200)
display_markdown(response,raw=True)

### Pytests

In [None]:
def same_string(response,expected):
    assert response.strip() == expected.strip(), f"Expected {expected.strip()}, but got {response.strip()}"

def test_to_brit_1():
    expected = 'Open the boot'
    """Test Translation from US to GB English."""
    message="Open the trunk"
    response=translate(message,
      to_lang="en_GB",temperature=.07,top_p=.2,max_tokens=20)
    same_string(response,expected)

def test_to_brit_1a():
    expected = 'Open the boot'
    """Test Translation from US to GB English in Drunk Mode."""
    message="Open the trunk"
    response=translate(message,
      to_lang="en_GB",temperature=2.0,top_p=.8,max_tokens=20)
    same_string(response,expected)

def test_to_brit_2():
    expected = 'Open the bonnet'
    """Test Translation from US to GB English."""
    message="Open the hood"
    response=translate(message,
      to_lang="en_GB",temperature=.07,top_p=.2,max_tokens=20)
    same_string(response,expected)

def test_to_brit_3():
    expected ="Ride the lift"
    message="Ride the elevator"
    response=translate(message,
      to_lang="en_GB",temperature=.07,top_p=.2,max_tokens=20)
    same_string(response,expected)

def run_tests(test_functions):
    for test in test_functions:
        try:
            test()
            print(f"{test.__name__} Passed")
        except AssertionError as e:
            print(f"{test.__name__} Failed: {e}")

if False:
  test_to_brit_1()
  print("Test 1 Passed")
  test_to_brit_2()
  print("Test 2 Passed")
  test_to_brit_3()
  print("Test 3 Passed")
  test_to_brit_1a() # Drunk Mode will always fail
  print("Test 1a Passed")

### Run Tests

In [None]:
# Automatically find all functions starting with 'test_'
test_functions = [func for name, func in globals().items() if callable(func) and name.startswith("test_")]

# Run the tests
run_tests(test_functions)

# Appendix

## Getting By With a Little Help from an LLM

**Link**: `https://chatgpt.com/share/ab631e53-78f4-4055-8eb5-311fc5e928c1`

### Prompt

I have this code ....

```
def same_string(response,expected):
    assert response.strip() == expected.strip(), f"Expected {expected.strip()}, but got {response.strip()}"

def test_to_brit_1():
    expected = 'Open the boot'
    """Test Translation from US to GB English."""
    message="Open the trunk"
    response=translate(message,
      to_lang="en_GB",temperature=.07,top_p=.2,max_tokens=20)
    same_string(response,expected)

def test_to_brit_1a():
    expected = 'Open the boot'
    """Test Translation from US to GB English in Drunk Mode."""
    message="Open the trunk"
    response=translate(message,
      to_lang="en_GB",temperature=2.0,top_p=.8,max_tokens=20)
    same_string(response,expected)

def test_to_brit_2():
    expected = 'Open the bonnet'
    """Test Translation from US to GB English."""
    message="Open the hood"
    response=translate(message,
      to_lang="en_GB",temperature=.07,top_p=.2,max_tokens=20)
    same_string(response,expected)

def test_to_brit_3():
    expected ="Ride the lift"
    message="Ride the elevator"
    response=translate(message,
      to_lang="en_GB",temperature=.07,top_p=.2,max_tokens=20)
    same_string(response,expected)

test_to_brit_1()
print("Test 1 Passed")
test_to_brit_2()
print("Test 2 Passed")
test_to_brit_3()
print("Test 3 Passed")
test_to_brit_1a() # Drunk Mode will always fail
print("Test 1a Passed")
```

I dont think that I can use pytest.main() inside of google collab  --
is there any way for me to iterate through the tests (i would be happy to manually create the list of functions if necessary) also is there a way to run it that catches the assertion error prints a message and continues through the list.

Alternatively if there is a way to query google colab for all of the functions that were defined by name (I am happy populate the list of functions by matching a pattern)

### Response

In Google Colab, you can manually iterate through a list of test functions, catch assertion errors, and continue running the remaining tests. Here’s an example that wraps your tests in a try-except block to handle assertion errors:

```python
def run_tests(test_functions):
    for test in test_functions:
        try:
            test()
            print(f"{test.__name__} Passed")
        except AssertionError as e:
            print(f"{test.__name__} Failed: {e}")

# Manually create a list of test functions
test_functions = [test_to_brit_1, test_to_brit_2, test_to_brit_3, test_to_brit_1a]

# Run the tests
run_tests(test_functions)
```

This approach manually defines a list of functions and uses a `try-except` block to catch any `AssertionError`, print the error message, and continue with the next test.

Alternatively, if you want to automatically find all test functions matching a specific pattern (e.g., functions that start with `test_`), you can use Python’s `globals()` to inspect the defined functions:

```python
# Automatically find all functions starting with 'test_'
test_functions = [func for name, func in globals().items() if callable(func) and name.startswith("test_")]

# Run the tests
run_tests(test_functions)
```

This way, you can avoid manually listing the functions.

## Debug Code

In [24]:
def test_string(response,expected):
    """Unit Test Pytest logic"""
    assert response.strip() == expected.strip(), f"Expected {expected.strip()}, but got {response.strip()}"

test_string(" A","A ")

In [21]:
print((" E ".strip()))

E
