# Setup


In [None]:
import requests
import json
import pytest
from dataclasses import dataclass

import ipytest
ipytest.autoconfig()

base_url = "http://localhost:8502/api/chatbot"

auth_key = "qA94VhroHMHFN55inWgfAAkt1WEmzQ4J" # Only for testing
auth_string = "&auth_key=" + auth_key + "&user_id=testing" # Only for testing
# In Version 1.6.1, the freva_config also needs to be set to a specific path. We won't be using this for now.
auth_string = auth_string + "&freva_config=" + "Cargo.toml" # Dummy value

# Helper Functions

In [None]:
def get_request(url, stream=False):
    return requests.get(base_url + url + auth_string, stream=stream)

def get_avail_chatbots():
    return get_request("/availablechatbots").json()

In [None]:
@dataclass
class StreamResult:
    chatbot: str | None
    raw_response: list
    json_response: list
    code_variants: list | None = None
    codeoutput_variants: list | None = None
    assistant_variants: list | None = None
    image_variants: list | None = None
    server_hint_variants: list | None = None
    thread_id: str | None = None

    def extract_variants(self):
        if self.json_response:
            self.code_variants = [i["content"][0] for i in self.json_response if i["variant"] == "Code"]
            self.codeoutput_variants = [i["content"][0] for i in self.json_response if i["variant"] == "CodeOutput"]
            self.assistant_variants = [i["content"] for i in self.json_response if i["variant"] == "Assistant"]
            self.image_variants = [i["content"] for i in self.json_response if i["variant"] == "Image"]
            self.server_hint_variants = [i["content"] for i in self.json_response if i["variant"] == "ServerHint"]
            self.thread_id = json.loads(self.json_response[0]["content"])["thread_id"]

def generate_full_respone(user_input, chatbot=None, thread_id=None) -> StreamResult:
    inner_url = "/streamresponse?input=" + user_input
    if chatbot:
        inner_url = inner_url + "&chatbot=" + chatbot
    if thread_id:
        inner_url = inner_url + "&thread_id=" + thread_id
        
    # The response is streamed, but we will consume it here and store it
    result = StreamResult(chatbot)
    response = get_request(inner_url, stream=True)
    
    unassembled_response = [] # Because the response may not necessary be chunked correctly. We will assemble it here.
    for delta in response:
        if delta.decode("utf-8")[0] == "{":
            unassembled_response.append(delta.decode("utf-8"))
        else:
            unassembled_response[-1] += delta.decode("utf-8")
    
    # It's assembled now
    result.raw_response = unassembled_response
    result.json_response = [json.loads(i) for i in unassembled_response]

    result.extract_variants()
    return result

def get_thread_by_id(thread_id):
    return get_request("/thread?thread_id=" + thread_id).json()


# Testing functions

In [None]:
def test_is_up():
    get_request("/ping")
    get_request("/docs")
    

def print_help():
    response = get_request("/help") # Same as /ping
    print(response.text)

def print_docs():
    response = get_request("/docs")
    print(response.text)

In [None]:
def test_available_chatbots():
    response = get_avail_chatbots()
    assert "gpt-4o-mini" in response
    assert "gpt-4o" in response


def get_hello_world_thread_id() -> str:
    response = generate_full_respone("Please use the code_interpreter tool to run the following code exactly and only once: \"print('Hello\\nWorld\\n!', flush=True)\".", chatbot="gpt-4o-mini")
    # Just make sure the code output contains "Hello World !"
    assert any("Hello\nWorld\n!" in i for i in response.codeoutput_variants)
    # Now return the thread_id for further testing
    return response.thread_id

def test_hello_world():
    ''' Does the printing of Hello World work? '''
    thread_id = get_hello_world_thread_id()
    # Now use the thread_id to test the getthread endpoint
    hw_thread = get_thread_by_id(thread_id)
    assert hw_thread["thread_id"] == thread_id # Just make sure the thread_id is correct
    assert any("Hello\nWorld\n!" in i for i in hw_thread["codeoutput_variants"]) # Make sure the code output contains "Hello World !"


def test_sine_wave(display = False):
    ''' Can the code_interpreter tool handle matplotlib and output an image? ''' # Base functionality test
    response = generate_full_respone("This is a test regarding your capabilities of using the code_interpreter tool and whether it supports matplotlib. Please use the code_interpreter tool to run the following code: \"import numpy as np\nimport matplotlib.pyplot as plt\nt = np.linspace(-2 * np.pi, 2 * np.pi, 100)\nsine_wave = np.sin(t)\nplt.figure(figsize=(10, 5))\nplt.plot(t, sine_wave, label='Sine Wave')\nplt.title('Sine Wave from -2π to 2π')\nplt.xlabel('Angle (radians)')\nplt.ylabel('Sine value')\nplt.axhline(0, color='black', linewidth=0.5, linestyle='--')\nplt.axvline(0, color='black', linewidth=0.5, linestyle='--')\nplt.grid()\nplt.legend()\nplt.show()\".", chatbot="gpt-4o-mini")
    # We want to make sure we have generated code, code output and an image. But we want to print the assistant response if it fails.
    print(response.assistant_variants)
    assert response.code_variants
    assert response.codeoutput_variants
    assert response.image_variants

    if display: # For manual testing, ipytest won't display the image
        from IPython.display import display, Image
        from base64 import b64decode
        for image in response.image_variants:
            display(Image(data=b64decode(image), format='png'))


def test_persistent_thread_storage():
    ''' Does the backend remember the content of a thread? ''' # Base functionality test
    response = generate_full_respone("Please add 2+2 in the code_interpreter tool.", chatbot="gpt-4o-mini")
    # Now follow up with another request to the same thread_id, to test whether the storage is persistent
    response2 = generate_full_respone("Now please multiply the result by 3.", chatbot="gpt-4o-mini", thread_id=response.thread_id)
    # The code output should now contain 12
    assert any("12" in i for i in response2.codeoutput_variants)


def test_persistant_state_storage():
    ''' Can the backend refer to the same variable in different tool calls? ''' # Since Version 1.6.3
    # Here, we want to test whether the value of a variable is stored between tool calls (not requests)
    response = generate_full_respone("Please assign the value 42 to the variable x in the code_interpreter tool. After that, call the tool with the code \"print(x, flush=True)\", without assigning x again. It's a test for the presistance of data.", chatbot="gpt-4o-mini")
    # The code output should now contain 42
    assert any("42" in i for i in response.codeoutput_variants)
    # Also make sure there are actually two code variants
    assert len(response.code_variants) == 2


def test_persistant_xarray_storage():
    ''' Can the backend refer to the same xarray in different tool calls? ''' # Since Version 1.6.5
    reponse = generate_full_respone("Please generate a simple xarray dataset in the code_interpreter tool and print out the content. After that, call the tool with the code \"print(ds, flush=True)\", without generating the dataset again. It's a test for the presistance of data, specifically whether xarray Datasets also work.", chatbot="gpt-4o-mini")
    # The code output should now contain the content of the xarray dataset
    assert any("xarray.Dataset" in i for i in reponse.codeoutput_variants)
    # Also make sure there are actually two code variants
    assert len(reponse.code_variants) == 2


def test_qwen_available():
    ''' Can the backend use non-OpenAI chatbots, such as Qwen? ''' # Since Version 1.7.1
    response = generate_full_respone("This is a test request for your basic functionality. Please respond with (200 Ok) and exit.", chatbot="qwen2.5:3b")
    # The assistant output should now contain "200 Ok"
    assert any("(200 Ok)" in i for i in response.assistant_variants)


def test_qwen_code_interpreter():
    ''' Can the backend get a code response from Qwen? ''' # Since Version 1.7.1
    response = generate_full_respone("Please use the code_interpreter tool to run `print(2938429834 * 234987234)`. Make sure to adhere to the JSON format!", chatbot="qwen2.5:3b")
    # The code output should now contain the result of the multiplication
    assert any("690493498994739156" in i for i in response.codeoutput_variants)

def test_heartbeat():
    ''' Can the backend send a heartbeat while a long calculation is running? ''' # Since Version 1.8.1
    response = generate_full_respone("Please use the code_interpreter tool to run the following code: \"import time\ntime.sleep(7)\".", chatbot="gpt-4o-mini")
    # There should now, in total be at least three ServerHint Variants
    assert len(response.server_hint_variants) >= 3
    # The second Serverhint (first is thread_id) should be JSON containing "memory", "total_memory", "cpu_last_minute", "process_cpu" and "process_memory"
    first_hearbeat = json.loads(response.server_hint_variants[1])
    assert "memory" in first_hearbeat
    assert "total_memory" in first_hearbeat
    assert "cpu_last_minute" in first_hearbeat
    assert "process_cpu" in first_hearbeat
    assert "process_memory" in first_hearbeat


# TODO: implement 1.8.3 feature of stopping a tool call! (and the 1.8.9 feature that derives from it)


def test_syntax_hinting():
    ''' Can the backend provide extended hints on syntax errors? ''' # Since Version 1.8.4
    response = generate_full_respone("Please use the code_interpreter tool to run the following code: \"print('Hello World'\". This is a test for the improved syntax error reporting. If a hint containing the syntax error is returned, the test is successful.", chatbot="gpt-4o-mini")
    # We can now check the Code Output for the string "Hint: the error occured on line", as well as "SyntaxError"
    assert any("Hint: the error occured on line" in i for i in response.codeoutput_variants)
    assert any("SyntaxError" in i for i in response.codeoutput_variants)

def test_regression_variable_storage():
    ''' Does the backend correctly handle the edge case of variable storage? ''' # Since Version 1.8.9
    input = "This is a test on a corner case of the code_interpreter tool: variables don't seem to be stored if the code errors before the last line.\
To test this. Please run the following code: \"x = 42\nraise Exception('This is a test exception')\nprint('Padding for last-line-logic')\","
    response = generate_full_respone(input, chatbot="gpt-4o-mini")
    # The code output should now contain the exception message
    assert any("This is a test exception" in i for i in response.codeoutput_variants)

    # Now make sure the variable x is still stored
    response2 = generate_full_respone("Now print the value of x without assigning it again.", chatbot="gpt-4o-mini", thread_id=response.thread_id)
    # The code output should now contain 42
    assert any("42" in i for i in response2.codeoutput_variants)


def test_o3_mini_available():
    ''' Can the backend use the O3-Mini chatbot, including for code_interpreter tool calls? ''' # Since Version 1.8.13
    response = generate_full_respone("This is a test request for your basic functionality. Please use the code_interpreter tool to run `print('Hello World')` and exit.", chatbot="o3-mini")
    # The Code Output should now contain "Hello World"
    assert any("Hello World!" in i for i in response.codeoutput_variants)
    

# Run tests

In [None]:
ipytest.run()