In [1]:
import mlflow

mlflow.set_tracking_uri("http://localhost:5500")
mlflow.set_experiment("rag")
mlflow.dspy.autolog(
    log_compiles=True,    # Track optimization process
    log_evals=True,       # Track evaluation results
    log_traces_from_compile=True  # Track program traces during optimization
)

  from .autonotebook import tqdm as notebook_tqdm


In [2]:
import dspy
import openai
import os

LLM_URL=os.getenv('LLM_URL', 'http://localhost:8080/v1')
API_KEY=os.getenv('API_KEY', 'fake')
LLM_MODEL=os.getenv('LLM_MODEL', 'openai/Llama-3.2-3B-Instruct-Q8_0.gguf')
MAX_TOKENS=os.getenv('MAX_TOKENS', 6000)
TEMPERATURE=os.getenv('TEMPERATURE', 0.2)
dspy.enable_logging()
lm = dspy.LM(model=LLM_MODEL,
             api_base=LLM_URL,  # ensure this points to your port
             api_key=API_KEY,
             temperature=TEMPERATURE,
             model_type='chat',
             stream=False)
dspy.configure(lm=lm)
#dspy.settings.configure(track_usage=True)

In [None]:
qa = dspy.Predict('question: str -> response: str')
response = qa(question="what are high memory and low memory on linux?")

[92m10:37:44 - LiteLLM:INFO[0m: utils.py:2991 - 
LiteLLM completion() model= Llama-3.2-3B-Instruct-Q8_0.gguf; provider = openai
INFO:LiteLLM:
LiteLLM completion() model= Llama-3.2-3B-Instruct-Q8_0.gguf; provider = openai
INFO:httpx:HTTP Request: POST http://localhost:8080/v1/chat/completions "HTTP/1.1 200 OK"
[92m10:37:47 - LiteLLM:INFO[0m: utils.py:1213 - Wrapper: Completed Call, calling success_handler
INFO:LiteLLM:Wrapper: Completed Call, calling success_handler
[92m10:37:47 - LiteLLM:INFO[0m: cost_calculator.py:655 - selected model name for cost calculation: openai/Llama-3.2-3B-Instruct-Q8_0.gguf
INFO:LiteLLM:selected model name for cost calculation: openai/Llama-3.2-3B-Instruct-Q8_0.gguf
[92m10:37:47 - LiteLLM:INFO[0m: cost_calculator.py:655 - selected model name for cost calculation: openai/Llama-3.2-3B-Instruct-Q8_0.gguf
INFO:LiteLLM:selected model name for cost calculation: openai/Llama-3.2-3B-Instruct-Q8_0.gguf
[92m10:37:47 - LiteLLM:INFO[0m: cost_calculator.py:655 -

[92m10:40:01 - LiteLLM:INFO[0m: cost_calculator.py:655 - selected model name for cost calculation: openai/Llama-3.2-3B-Instruct-Q8_0.gguf
INFO:LiteLLM:selected model name for cost calculation: openai/Llama-3.2-3B-Instruct-Q8_0.gguf
[92m10:40:01 - LiteLLM:INFO[0m: cost_calculator.py:655 - selected model name for cost calculation: Llama-3.2-3B-Instruct-Q8_0.gguf
INFO:LiteLLM:selected model name for cost calculation: Llama-3.2-3B-Instruct-Q8_0.gguf
[92m10:40:01 - LiteLLM:INFO[0m: cost_calculator.py:655 - selected model name for cost calculation: openai/Llama-3.2-3B-Instruct-Q8_0.gguf
INFO:LiteLLM:selected model name for cost calculation: openai/Llama-3.2-3B-Instruct-Q8_0.gguf
[92m10:40:01 - LiteLLM:INFO[0m: cost_calculator.py:655 - selected model name for cost calculation: Llama-3.2-3B-Instruct-Q8_0.gguf
INFO:LiteLLM:selected model name for cost calculation: Llama-3.2-3B-Instruct-Q8_0.gguf
[92m10:42:04 - LiteLLM:INFO[0m: cost_calculator.py:655 - selected model name for cost calc

In [9]:
dspy.inspect_history(n=1)





[34m[2025-06-03T10:37:47.458714][0m

[31mSystem message:[0m

Your input fields are:
1. `question` (str)
Your output fields are:
1. `response` (str)
All interactions will be structured in the following way, with the appropriate values filled in.

[[ ## question ## ]]
{question}

[[ ## response ## ]]
{response}

[[ ## completed ## ]]
In adhering to this structure, your objective is: 
        Given the fields `question`, produce the fields `response`.


[31mUser message:[0m

[[ ## question ## ]]
what are high memory and low memory on linux?

Respond with the corresponding output fields, starting with the field `[[ ## response ## ]]`, and then ending with the marker for `[[ ## completed ## ]]`.


[31mResponse:[0m

[32m[[ ## response ## ]]
High memory and low memory are two terms used to describe the amount of free memory available on a Linux system. High memory refers to a system with a significant amount of free memory, typically above 50% of the total system memory. This all

In [3]:
cot = dspy.ChainOfThought('question -> response')
cot(question="what is red hat linux?")

Prediction(
    reasoning='Red Hat Linux is a popular open-source operating system that is widely used in servers, desktops, and mobile devices. It is based on the Linux kernel and is known for its stability, security, and ease of use. Red Hat Linux is also a commercial version of Linux, which means it is supported by Red Hat, a company that provides free and paid support, as well as a wide range of software packages and services.',
    response='Red Hat Linux is a popular open-source operating system that is widely used in servers, desktops, and mobile devices. It is based on the Linux kernel and is known for its stability, security, and ease of use. Red Hat Linux is also a commercial version of Linux, which means it is supported by Red Hat, a company that provides free and paid support, as well as a wide range of software packages and services.'
)

In [4]:
import ujson
from dspy.utils import download

# Download question--answer pairs from the RAG-QA Arena "Tech" dataset.
download("https://huggingface.co/dspy/cache/resolve/main/ragqa_arena_tech_examples.jsonl")

with open("ragqa_arena_tech_examples.jsonl") as f:
    data = [ujson.loads(line) for line in f]

In [5]:
# Inspect one datapoint.
data[0]

{'question': 'why igp is used in mpls?',
 'response': "An IGP exchanges routing prefixes between gateways/routers.  \nWithout a routing protocol, you'd have to configure each route on every router and you'd have no dynamic updates when routes change because of link failures. \nFuthermore, within an MPLS network, an IGP is vital for advertising the internal topology and ensuring connectivity for MP-BGP inside the network.",
 'gold_doc_ids': [2822, 2823]}

In [6]:
data = [dspy.Example(**d).with_inputs('question') for d in data]

# Let's pick an `example` here from the data.
example = data[2]
example

Example({'question': 'why are my text messages coming up as maybe?', 'response': 'This is part of the Proactivity features new with iOS 9: It looks at info in emails to see if anyone with this number sent you an email and if it finds the phone number associated with a contact from your email, it will show you "Maybe". \n\nHowever, it has been suggested there is a bug in iOS 11.2 that can result in "Maybe" being displayed even when "Find Contacts in Other Apps" is disabled.', 'gold_doc_ids': [3956, 3957, 8034]}) (input_keys={'question'})

In [7]:
import random

random.Random(0).shuffle(data)
trainset, devset, testset = data[:200], data[200:500], data[500:1000]

len(trainset), len(devset), len(testset)

(200, 300, 500)

In [8]:
from dspy.evaluate import SemanticF1

# Instantiate the metric.
metric = SemanticF1(decompositional=True)

# Produce a prediction from our `cot` module, using the `example` above as input.
pred = cot(**example.inputs())

print(example)
print(pred)

# Compute the metric score for the prediction.
score = metric(example, pred)

print(f"Question: \t {example.question}\n")
print(f"Gold Response: \t {example.response}\n")
print(f"Predicted Response: \t {pred.response}\n")
print(f"Semantic F1 Score: {score:.2f}")

Example({'question': 'why are my text messages coming up as maybe?', 'response': 'This is part of the Proactivity features new with iOS 9: It looks at info in emails to see if anyone with this number sent you an email and if it finds the phone number associated with a contact from your email, it will show you "Maybe". \n\nHowever, it has been suggested there is a bug in iOS 11.2 that can result in "Maybe" being displayed even when "Find Contacts in Other Apps" is disabled.', 'gold_doc_ids': [3956, 3957, 8034]}) (input_keys={'question'})
Prediction(
    reasoning='Your text messages are coming up as "maybe" because of the way you\'re formatting your text messages. In many messaging platforms, including SMS and iMessage, the default formatting for text messages is to use a "maybe" or "unknown" sender ID if the phone number is not recognized or if the message is sent to a number that is not in the recipient\'s contact list.\n\nThis is a security feature designed to prevent spam messages f

In [9]:
dspy.inspect_history(n=1)





[34m[2025-06-03T11:10:59.081299][0m

[31mSystem message:[0m

Your input fields are:
1. `question` (str)
2. `ground_truth` (str)
3. `system_response` (str)
Your output fields are:
1. `reasoning` (str)
2. `ground_truth_key_ideas` (str): enumeration of key ideas in the ground truth
3. `system_response_key_ideas` (str): enumeration of key ideas in the system response
4. `discussion` (str): discussion of the overlap between ground truth and system response
5. `recall` (float): fraction (out of 1.0) of ground truth covered by the system response
6. `precision` (float): fraction (out of 1.0) of system response covered by the ground truth
All interactions will be structured in the following way, with the appropriate values filled in.

[[ ## question ## ]]
{question}

[[ ## ground_truth ## ]]
{ground_truth}

[[ ## system_response ## ]]
{system_response}

[[ ## reasoning ## ]]
{reasoning}

[[ ## ground_truth_key_ideas ## ]]
{ground_truth_key_ideas}

[[ ## system_response_key_ideas ## ]]


In [None]:
import mlflow

with mlflow.start_run(run_name="rag_evaluation"):
    evaluate = dspy.Evaluate(
        devset=devset,
        metric=metric,
        num_threads=24,
        display_progress=True,
        # To record the outputs and detailed scores to MLflow
        return_all_scores=True,
        return_outputs=True,
    )

    # Evaluate the program as usual
    aggregated_score, outputs, all_scores = evaluate(cot)


    # Log the aggregated score
    mlflow.log_metric("semantic_f1_score", aggregated_score)
    # Log the detailed evaluation results as a table
    mlflow.log_table(
        {
            "Question": [example.question for example in eval_set],
            "Gold Response": [example.response for example in eval_set],
            "Predicted Response": outputs,
            "Semantic F1 Score": all_scores,
        },
        artifact_file="eval_results.json",
    )

  0%|          | 0/300 [00:00<?, ?it/s]

[92m11:11:17 - LiteLLM:INFO[0m: utils.py:2991 - 
LiteLLM completion() model= Llama-3.2-3B-Instruct-Q8_0.gguf; provider = openai
INFO:LiteLLM:
LiteLLM completion() model= Llama-3.2-3B-Instruct-Q8_0.gguf; provider = openai
[92m11:11:17 - LiteLLM:INFO[0m: utils.py:2991 - 
LiteLLM completion() model= Llama-3.2-3B-Instruct-Q8_0.gguf; provider = openai
INFO:LiteLLM:
LiteLLM completion() model= Llama-3.2-3B-Instruct-Q8_0.gguf; provider = openai
[92m11:11:17 - LiteLLM:INFO[0m: utils.py:2991 - 
LiteLLM completion() model= Llama-3.2-3B-Instruct-Q8_0.gguf; provider = openai
INFO:LiteLLM:
LiteLLM completion() model= Llama-3.2-3B-Instruct-Q8_0.gguf; provider = openai
[92m11:11:17 - LiteLLM:INFO[0m: utils.py:2991 - 
LiteLLM completion() model= Llama-3.2-3B-Instruct-Q8_0.gguf; provider = openai
INFO:LiteLLM:
LiteLLM completion() model= Llama-3.2-3B-Instruct-Q8_0.gguf; provider = openai
[92m11:11:17 - LiteLLM:INFO[0m: utils.py:2991 - 
LiteLLM completion() model= Llama-3.2-3B-Instruct-Q8_0.gg

Average Metric: 0.67 / 1 (66.7%):   0%|          | 1/300 [00:53<4:26:15, 53.43s/it]

[92m11:12:10 - LiteLLM:INFO[0m: utils.py:2991 - 
LiteLLM completion() model= Llama-3.2-3B-Instruct-Q8_0.gguf; provider = openai
INFO:LiteLLM:
LiteLLM completion() model= Llama-3.2-3B-Instruct-Q8_0.gguf; provider = openai
INFO:httpx:HTTP Request: POST http://localhost:8080/v1/chat/completions "HTTP/1.1 200 OK"
[92m11:12:15 - LiteLLM:INFO[0m: utils.py:1213 - Wrapper: Completed Call, calling success_handler
INFO:LiteLLM:Wrapper: Completed Call, calling success_handler
[92m11:12:15 - LiteLLM:INFO[0m: cost_calculator.py:655 - selected model name for cost calculation: openai/Llama-3.2-3B-Instruct-Q8_0.gguf
INFO:LiteLLM:selected model name for cost calculation: openai/Llama-3.2-3B-Instruct-Q8_0.gguf
[92m11:12:15 - LiteLLM:INFO[0m: cost_calculator.py:655 - selected model name for cost calculation: openai/Llama-3.2-3B-Instruct-Q8_0.gguf
INFO:LiteLLM:selected model name for cost calculation: openai/Llama-3.2-3B-Instruct-Q8_0.gguf
[92m11:12:15 - LiteLLM:INFO[0m: cost_calculator.py:655 -

Average Metric: 1.28 / 2 (64.1%):   1%|          | 2/300 [00:58<2:02:46, 24.72s/it]

[92m11:12:15 - LiteLLM:INFO[0m: utils.py:2991 - 
LiteLLM completion() model= Llama-3.2-3B-Instruct-Q8_0.gguf; provider = openai
INFO:LiteLLM:
LiteLLM completion() model= Llama-3.2-3B-Instruct-Q8_0.gguf; provider = openai
INFO:httpx:HTTP Request: POST http://localhost:8080/v1/chat/completions "HTTP/1.1 200 OK"
[92m11:12:15 - LiteLLM:INFO[0m: utils.py:1213 - Wrapper: Completed Call, calling success_handler
INFO:LiteLLM:Wrapper: Completed Call, calling success_handler
[92m11:12:15 - LiteLLM:INFO[0m: cost_calculator.py:655 - selected model name for cost calculation: openai/Llama-3.2-3B-Instruct-Q8_0.gguf
INFO:LiteLLM:selected model name for cost calculation: openai/Llama-3.2-3B-Instruct-Q8_0.gguf
[92m11:12:15 - LiteLLM:INFO[0m: cost_calculator.py:655 - selected model name for cost calculation: openai/Llama-3.2-3B-Instruct-Q8_0.gguf
[92m11:12:15 - LiteLLM:INFO[0m: cost_calculator.py:655 - selected model name for cost calculation: openai/Llama-3.2-3B-Instruct-Q8_0.gguf
INFO:LiteLLM

Average Metric: 2.17 / 3 (72.4%):   1%|          | 3/300 [00:58<1:06:48, 13.50s/it]

[92m11:12:15 - LiteLLM:INFO[0m: utils.py:2991 - 
LiteLLM completion() model= Llama-3.2-3B-Instruct-Q8_0.gguf; provider = openai
INFO:LiteLLM:
LiteLLM completion() model= Llama-3.2-3B-Instruct-Q8_0.gguf; provider = openai
INFO:httpx:HTTP Request: POST http://localhost:8080/v1/chat/completions "HTTP/1.1 200 OK"
[92m11:12:16 - LiteLLM:INFO[0m: utils.py:1213 - Wrapper: Completed Call, calling success_handler
INFO:LiteLLM:Wrapper: Completed Call, calling success_handler
[92m11:12:16 - LiteLLM:INFO[0m: cost_calculator.py:655 - selected model name for cost calculation: openai/Llama-3.2-3B-Instruct-Q8_0.gguf
INFO:LiteLLM:selected model name for cost calculation: openai/Llama-3.2-3B-Instruct-Q8_0.gguf
[92m11:12:16 - LiteLLM:INFO[0m: cost_calculator.py:655 - selected model name for cost calculation: openai/Llama-3.2-3B-Instruct-Q8_0.gguf
INFO:LiteLLM:selected model name for cost calculation: openai/Llama-3.2-3B-Instruct-Q8_0.gguf
[92m11:12:16 - LiteLLM:INFO[0m: cost_calculator.py:655 -

Average Metric: 2.84 / 4 (70.9%):   1%|▏         | 4/300 [00:59<42:45,  8.67s/it]  

INFO:httpx:HTTP Request: POST http://localhost:8080/v1/chat/completions "HTTP/1.1 200 OK"
[92m11:12:16 - LiteLLM:INFO[0m: utils.py:1213 - Wrapper: Completed Call, calling success_handler
INFO:LiteLLM:Wrapper: Completed Call, calling success_handler
[92m11:12:16 - LiteLLM:INFO[0m: cost_calculator.py:655 - selected model name for cost calculation: openai/Llama-3.2-3B-Instruct-Q8_0.gguf
INFO:LiteLLM:selected model name for cost calculation: openai/Llama-3.2-3B-Instruct-Q8_0.gguf
[92m11:12:16 - LiteLLM:INFO[0m: cost_calculator.py:655 - selected model name for cost calculation: openai/Llama-3.2-3B-Instruct-Q8_0.gguf
INFO:LiteLLM:selected model name for cost calculation: openai/Llama-3.2-3B-Instruct-Q8_0.gguf
[92m11:12:16 - LiteLLM:INFO[0m: cost_calculator.py:655 - selected model name for cost calculation: openai/Llama-3.2-3B-Instruct-Q8_0.gguf
[92m11:12:16 - LiteLLM:INFO[0m: cost_calculator.py:655 - selected model name for cost calculation: Llama-3.2-3B-Instruct-Q8_0.gguf
INFO:Lit

Average Metric: 3.73 / 5 (74.5%):   1%|▏         | 4/300 [00:59<42:45,  8.67s/it]

[92m11:12:16 - LiteLLM:INFO[0m: utils.py:2991 - 
LiteLLM completion() model= Llama-3.2-3B-Instruct-Q8_0.gguf; provider = openai
INFO:LiteLLM:
LiteLLM completion() model= Llama-3.2-3B-Instruct-Q8_0.gguf; provider = openai
INFO:httpx:HTTP Request: POST http://localhost:8080/v1/chat/completions "HTTP/1.1 200 OK"
[92m11:12:19 - LiteLLM:INFO[0m: utils.py:1213 - Wrapper: Completed Call, calling success_handler
INFO:LiteLLM:Wrapper: Completed Call, calling success_handler
[92m11:12:19 - LiteLLM:INFO[0m: cost_calculator.py:655 - selected model name for cost calculation: openai/Llama-3.2-3B-Instruct-Q8_0.gguf
INFO:LiteLLM:selected model name for cost calculation: openai/Llama-3.2-3B-Instruct-Q8_0.gguf
[92m11:12:19 - LiteLLM:INFO[0m: cost_calculator.py:655 - selected model name for cost calculation: openai/Llama-3.2-3B-Instruct-Q8_0.gguf
INFO:LiteLLM:selected model name for cost calculation: openai/Llama-3.2-3B-Instruct-Q8_0.gguf
[92m11:12:19 - LiteLLM:INFO[0m: cost_calculator.py:655 -

Average Metric: 4.39 / 6 (73.2%):   2%|▏         | 6/300 [01:01<22:55,  4.68s/it]

[92m11:12:19 - LiteLLM:INFO[0m: utils.py:2991 - 
LiteLLM completion() model= Llama-3.2-3B-Instruct-Q8_0.gguf; provider = openai
INFO:LiteLLM:
LiteLLM completion() model= Llama-3.2-3B-Instruct-Q8_0.gguf; provider = openai
INFO:httpx:HTTP Request: POST http://localhost:8080/v1/chat/completions "HTTP/1.1 200 OK"
[92m11:12:20 - LiteLLM:INFO[0m: utils.py:1213 - Wrapper: Completed Call, calling success_handler
INFO:LiteLLM:Wrapper: Completed Call, calling success_handler
[92m11:12:20 - LiteLLM:INFO[0m: cost_calculator.py:655 - selected model name for cost calculation: openai/Llama-3.2-3B-Instruct-Q8_0.gguf
INFO:LiteLLM:selected model name for cost calculation: openai/Llama-3.2-3B-Instruct-Q8_0.gguf
[92m11:12:20 - LiteLLM:INFO[0m: cost_calculator.py:655 - selected model name for cost calculation: openai/Llama-3.2-3B-Instruct-Q8_0.gguf
INFO:LiteLLM:selected model name for cost calculation: openai/Llama-3.2-3B-Instruct-Q8_0.gguf
[92m11:12:20 - LiteLLM:INFO[0m: cost_calculator.py:655 -

Average Metric: 4.71 / 7 (67.3%):   2%|▏         | 7/300 [01:03<18:15,  3.74s/it]

[92m11:12:20 - LiteLLM:INFO[0m: utils.py:2991 - 
LiteLLM completion() model= Llama-3.2-3B-Instruct-Q8_0.gguf; provider = openai
INFO:LiteLLM:
LiteLLM completion() model= Llama-3.2-3B-Instruct-Q8_0.gguf; provider = openai
INFO:httpx:HTTP Request: POST http://localhost:8080/v1/chat/completions "HTTP/1.1 200 OK"
[92m11:12:21 - LiteLLM:INFO[0m: utils.py:1213 - Wrapper: Completed Call, calling success_handler
INFO:LiteLLM:Wrapper: Completed Call, calling success_handler
[92m11:12:21 - LiteLLM:INFO[0m: cost_calculator.py:655 - selected model name for cost calculation: openai/Llama-3.2-3B-Instruct-Q8_0.gguf
INFO:LiteLLM:selected model name for cost calculation: openai/Llama-3.2-3B-Instruct-Q8_0.gguf
[92m11:12:21 - LiteLLM:INFO[0m: cost_calculator.py:655 - selected model name for cost calculation: openai/Llama-3.2-3B-Instruct-Q8_0.gguf
INFO:LiteLLM:selected model name for cost calculation: openai/Llama-3.2-3B-Instruct-Q8_0.gguf
[92m11:12:21 - LiteLLM:INFO[0m: cost_calculator.py:655 -

Average Metric: 5.60 / 8 (70.0%):   3%|▎         | 8/300 [01:03<14:03,  2.89s/it]

[92m11:12:21 - LiteLLM:INFO[0m: utils.py:2991 - 
LiteLLM completion() model= Llama-3.2-3B-Instruct-Q8_0.gguf; provider = openai
INFO:LiteLLM:
LiteLLM completion() model= Llama-3.2-3B-Instruct-Q8_0.gguf; provider = openai
INFO:httpx:HTTP Request: POST http://localhost:8080/v1/chat/completions "HTTP/1.1 200 OK"
[92m11:12:21 - LiteLLM:INFO[0m: utils.py:1213 - Wrapper: Completed Call, calling success_handler
INFO:LiteLLM:Wrapper: Completed Call, calling success_handler
[92m11:12:21 - LiteLLM:INFO[0m: cost_calculator.py:655 - selected model name for cost calculation: openai/Llama-3.2-3B-Instruct-Q8_0.gguf
INFO:LiteLLM:selected model name for cost calculation: openai/Llama-3.2-3B-Instruct-Q8_0.gguf
[92m11:12:21 - LiteLLM:INFO[0m: cost_calculator.py:655 - selected model name for cost calculation: openai/Llama-3.2-3B-Instruct-Q8_0.gguf
INFO:LiteLLM:selected model name for cost calculation: openai/Llama-3.2-3B-Instruct-Q8_0.gguf
[92m11:12:21 - LiteLLM:INFO[0m: cost_calculator.py:655 -

Average Metric: 5.94 / 9 (65.9%):   3%|▎         | 9/300 [01:03<10:10,  2.10s/it]

[92m11:12:21 - LiteLLM:INFO[0m: utils.py:2991 - 
LiteLLM completion() model= Llama-3.2-3B-Instruct-Q8_0.gguf; provider = openai
INFO:LiteLLM:
LiteLLM completion() model= Llama-3.2-3B-Instruct-Q8_0.gguf; provider = openai
INFO:httpx:HTTP Request: POST http://localhost:8080/v1/chat/completions "HTTP/1.1 200 OK"
[92m11:12:21 - LiteLLM:INFO[0m: utils.py:1213 - Wrapper: Completed Call, calling success_handler
INFO:LiteLLM:Wrapper: Completed Call, calling success_handler
[92m11:12:21 - LiteLLM:INFO[0m: cost_calculator.py:655 - selected model name for cost calculation: openai/Llama-3.2-3B-Instruct-Q8_0.gguf
INFO:LiteLLM:selected model name for cost calculation: openai/Llama-3.2-3B-Instruct-Q8_0.gguf
[92m11:12:21 - LiteLLM:INFO[0m: cost_calculator.py:655 - selected model name for cost calculation: openai/Llama-3.2-3B-Instruct-Q8_0.gguf
INFO:LiteLLM:selected model name for cost calculation: openai/Llama-3.2-3B-Instruct-Q8_0.gguf
[92m11:12:21 - LiteLLM:INFO[0m: cost_calculator.py:655 -

Average Metric: 6.82 / 10 (68.2%):   3%|▎         | 9/300 [01:03<10:10,  2.10s/it]

[92m11:12:21 - LiteLLM:INFO[0m: utils.py:2991 - 
LiteLLM completion() model= Llama-3.2-3B-Instruct-Q8_0.gguf; provider = openai
INFO:LiteLLM:
LiteLLM completion() model= Llama-3.2-3B-Instruct-Q8_0.gguf; provider = openai
INFO:httpx:HTTP Request: POST http://localhost:8080/v1/chat/completions "HTTP/1.1 200 OK"
[92m11:12:23 - LiteLLM:INFO[0m: utils.py:1213 - Wrapper: Completed Call, calling success_handler
INFO:LiteLLM:Wrapper: Completed Call, calling success_handler
[92m11:12:23 - LiteLLM:INFO[0m: cost_calculator.py:655 - selected model name for cost calculation: openai/Llama-3.2-3B-Instruct-Q8_0.gguf
INFO:LiteLLM:selected model name for cost calculation: openai/Llama-3.2-3B-Instruct-Q8_0.gguf
[92m11:12:23 - LiteLLM:INFO[0m: cost_calculator.py:655 - selected model name for cost calculation: openai/Llama-3.2-3B-Instruct-Q8_0.gguf
INFO:LiteLLM:selected model name for cost calculation: openai/Llama-3.2-3B-Instruct-Q8_0.gguf
[92m11:12:23 - LiteLLM:INFO[0m: cost_calculator.py:655 -

Average Metric: 7.49 / 11 (68.1%):   4%|▎         | 11/300 [01:06<08:38,  1.79s/it]

[92m11:12:24 - LiteLLM:INFO[0m: utils.py:2991 - 
LiteLLM completion() model= Llama-3.2-3B-Instruct-Q8_0.gguf; provider = openai
INFO:LiteLLM:
LiteLLM completion() model= Llama-3.2-3B-Instruct-Q8_0.gguf; provider = openai
INFO:httpx:HTTP Request: POST http://localhost:8080/v1/chat/completions "HTTP/1.1 200 OK"
[92m11:12:24 - LiteLLM:INFO[0m: utils.py:1213 - Wrapper: Completed Call, calling success_handler
INFO:LiteLLM:Wrapper: Completed Call, calling success_handler
[92m11:12:24 - LiteLLM:INFO[0m: cost_calculator.py:655 - selected model name for cost calculation: openai/Llama-3.2-3B-Instruct-Q8_0.gguf
INFO:LiteLLM:selected model name for cost calculation: openai/Llama-3.2-3B-Instruct-Q8_0.gguf
[92m11:12:24 - LiteLLM:INFO[0m: cost_calculator.py:655 - selected model name for cost calculation: openai/Llama-3.2-3B-Instruct-Q8_0.gguf
[92m11:12:24 - LiteLLM:INFO[0m: cost_calculator.py:655 - selected model name for cost calculation: openai/Llama-3.2-3B-Instruct-Q8_0.gguf
INFO:LiteLLM

Average Metric: 8.16 / 12 (68.0%):   4%|▍         | 12/300 [01:06<06:40,  1.39s/it]

[92m11:12:24 - LiteLLM:INFO[0m: utils.py:2991 - 
LiteLLM completion() model= Llama-3.2-3B-Instruct-Q8_0.gguf; provider = openai
INFO:LiteLLM:
LiteLLM completion() model= Llama-3.2-3B-Instruct-Q8_0.gguf; provider = openai
INFO:httpx:HTTP Request: POST http://localhost:8080/v1/chat/completions "HTTP/1.1 200 OK"
[92m11:12:25 - LiteLLM:INFO[0m: utils.py:1213 - Wrapper: Completed Call, calling success_handler
INFO:LiteLLM:Wrapper: Completed Call, calling success_handler
[92m11:12:25 - LiteLLM:INFO[0m: cost_calculator.py:655 - selected model name for cost calculation: openai/Llama-3.2-3B-Instruct-Q8_0.gguf
INFO:LiteLLM:selected model name for cost calculation: openai/Llama-3.2-3B-Instruct-Q8_0.gguf
[92m11:12:25 - LiteLLM:INFO[0m: cost_calculator.py:655 - selected model name for cost calculation: openai/Llama-3.2-3B-Instruct-Q8_0.gguf
INFO:LiteLLM:selected model name for cost calculation: openai/Llama-3.2-3B-Instruct-Q8_0.gguf
[92m11:12:25 - LiteLLM:INFO[0m: cost_calculator.py:655 -

Average Metric: 8.82 / 13 (67.9%):   4%|▍         | 13/300 [01:07<06:16,  1.31s/it]

[92m11:12:25 - LiteLLM:INFO[0m: utils.py:2991 - 
LiteLLM completion() model= Llama-3.2-3B-Instruct-Q8_0.gguf; provider = openai
INFO:LiteLLM:
LiteLLM completion() model= Llama-3.2-3B-Instruct-Q8_0.gguf; provider = openai
INFO:httpx:HTTP Request: POST http://localhost:8080/v1/chat/completions "HTTP/1.1 200 OK"
[92m11:12:26 - LiteLLM:INFO[0m: utils.py:1213 - Wrapper: Completed Call, calling success_handler
INFO:LiteLLM:Wrapper: Completed Call, calling success_handler
[92m11:12:26 - LiteLLM:INFO[0m: cost_calculator.py:655 - selected model name for cost calculation: openai/Llama-3.2-3B-Instruct-Q8_0.gguf
INFO:LiteLLM:selected model name for cost calculation: openai/Llama-3.2-3B-Instruct-Q8_0.gguf
[92m11:12:26 - LiteLLM:INFO[0m: cost_calculator.py:655 - selected model name for cost calculation: openai/Llama-3.2-3B-Instruct-Q8_0.gguf
[92m11:12:26 - LiteLLM:INFO[0m: cost_calculator.py:655 - selected model name for cost calculation: openai/Llama-3.2-3B-Instruct-Q8_0.gguf
INFO:LiteLLM

Average Metric: 9.49 / 14 (67.8%):   5%|▍         | 14/300 [01:09<06:13,  1.31s/it]

[92m11:12:26 - LiteLLM:INFO[0m: utils.py:2991 - 
LiteLLM completion() model= Llama-3.2-3B-Instruct-Q8_0.gguf; provider = openai
INFO:LiteLLM:
LiteLLM completion() model= Llama-3.2-3B-Instruct-Q8_0.gguf; provider = openai
INFO:httpx:HTTP Request: POST http://localhost:8080/v1/chat/completions "HTTP/1.1 200 OK"
[92m11:12:27 - LiteLLM:INFO[0m: utils.py:1213 - Wrapper: Completed Call, calling success_handler
INFO:LiteLLM:Wrapper: Completed Call, calling success_handler
[92m11:12:27 - LiteLLM:INFO[0m: cost_calculator.py:655 - selected model name for cost calculation: openai/Llama-3.2-3B-Instruct-Q8_0.gguf
INFO:LiteLLM:selected model name for cost calculation: openai/Llama-3.2-3B-Instruct-Q8_0.gguf
[92m11:12:27 - LiteLLM:INFO[0m: cost_calculator.py:655 - selected model name for cost calculation: openai/Llama-3.2-3B-Instruct-Q8_0.gguf
INFO:LiteLLM:selected model name for cost calculation: openai/Llama-3.2-3B-Instruct-Q8_0.gguf
[92m11:12:27 - LiteLLM:INFO[0m: cost_calculator.py:655 -

Average Metric: 10.67 / 16 (66.7%):   5%|▌         | 15/300 [01:10<06:04,  1.28s/it]

[92m11:12:27 - LiteLLM:INFO[0m: utils.py:2991 - 
LiteLLM completion() model= Llama-3.2-3B-Instruct-Q8_0.gguf; provider = openai
INFO:LiteLLM:
LiteLLM completion() model= Llama-3.2-3B-Instruct-Q8_0.gguf; provider = openai
[92m11:12:27 - LiteLLM:INFO[0m: utils.py:2991 - 
LiteLLM completion() model= Llama-3.2-3B-Instruct-Q8_0.gguf; provider = openai
INFO:LiteLLM:
LiteLLM completion() model= Llama-3.2-3B-Instruct-Q8_0.gguf; provider = openai
INFO:httpx:HTTP Request: POST http://localhost:8080/v1/chat/completions "HTTP/1.1 200 OK"
[92m11:12:31 - LiteLLM:INFO[0m: utils.py:1213 - Wrapper: Completed Call, calling success_handler
INFO:LiteLLM:Wrapper: Completed Call, calling success_handler
[92m11:12:31 - LiteLLM:INFO[0m: cost_calculator.py:655 - selected model name for cost calculation: openai/Llama-3.2-3B-Instruct-Q8_0.gguf
INFO:LiteLLM:selected model name for cost calculation: openai/Llama-3.2-3B-Instruct-Q8_0.gguf
[92m11:12:31 - LiteLLM:INFO[0m: cost_calculator.py:655 - selected m

Average Metric: 11.27 / 17 (66.3%):   6%|▌         | 17/300 [01:13<06:49,  1.45s/it]

[92m11:12:31 - LiteLLM:INFO[0m: utils.py:2991 - 
LiteLLM completion() model= Llama-3.2-3B-Instruct-Q8_0.gguf; provider = openai
INFO:LiteLLM:
LiteLLM completion() model= Llama-3.2-3B-Instruct-Q8_0.gguf; provider = openai
INFO:httpx:HTTP Request: POST http://localhost:8080/v1/chat/completions "HTTP/1.1 200 OK"
[92m11:12:31 - LiteLLM:INFO[0m: utils.py:1213 - Wrapper: Completed Call, calling success_handler
INFO:LiteLLM:Wrapper: Completed Call, calling success_handler
[92m11:12:31 - LiteLLM:INFO[0m: cost_calculator.py:655 - selected model name for cost calculation: openai/Llama-3.2-3B-Instruct-Q8_0.gguf
INFO:LiteLLM:selected model name for cost calculation: openai/Llama-3.2-3B-Instruct-Q8_0.gguf
[92m11:12:31 - LiteLLM:INFO[0m: cost_calculator.py:655 - selected model name for cost calculation: openai/Llama-3.2-3B-Instruct-Q8_0.gguf
INFO:LiteLLM:selected model name for cost calculation: openai/Llama-3.2-3B-Instruct-Q8_0.gguf
[92m11:12:31 - LiteLLM:INFO[0m: cost_calculator.py:655 -

Average Metric: 11.27 / 18 (62.6%):   6%|▌         | 18/300 [01:15<07:25,  1.58s/it]

[92m11:12:33 - LiteLLM:INFO[0m: utils.py:2991 - 
LiteLLM completion() model= Llama-3.2-3B-Instruct-Q8_0.gguf; provider = openai
INFO:LiteLLM:
LiteLLM completion() model= Llama-3.2-3B-Instruct-Q8_0.gguf; provider = openai
INFO:httpx:HTTP Request: POST http://localhost:8080/v1/chat/completions "HTTP/1.1 200 OK"
[92m11:12:33 - LiteLLM:INFO[0m: utils.py:1213 - Wrapper: Completed Call, calling success_handler
INFO:LiteLLM:Wrapper: Completed Call, calling success_handler
[92m11:12:33 - LiteLLM:INFO[0m: cost_calculator.py:655 - selected model name for cost calculation: openai/Llama-3.2-3B-Instruct-Q8_0.gguf
INFO:LiteLLM:selected model name for cost calculation: openai/Llama-3.2-3B-Instruct-Q8_0.gguf
[92m11:12:33 - LiteLLM:INFO[0m: cost_calculator.py:655 - selected model name for cost calculation: openai/Llama-3.2-3B-Instruct-Q8_0.gguf
[92m11:12:33 - LiteLLM:INFO[0m: cost_calculator.py:655 - selected model name for cost calculation: openai/Llama-3.2-3B-Instruct-Q8_0.gguf
INFO:LiteLLM

Average Metric: 11.94 / 19 (62.8%):   6%|▋         | 19/300 [01:16<05:54,  1.26s/it]

[92m11:12:33 - LiteLLM:INFO[0m: utils.py:2991 - 
LiteLLM completion() model= Llama-3.2-3B-Instruct-Q8_0.gguf; provider = openai
INFO:LiteLLM:
LiteLLM completion() model= Llama-3.2-3B-Instruct-Q8_0.gguf; provider = openai
INFO:httpx:HTTP Request: POST http://localhost:8080/v1/chat/completions "HTTP/1.1 200 OK"
[92m11:12:35 - LiteLLM:INFO[0m: utils.py:1213 - Wrapper: Completed Call, calling success_handler
INFO:LiteLLM:Wrapper: Completed Call, calling success_handler
[92m11:12:35 - LiteLLM:INFO[0m: cost_calculator.py:655 - selected model name for cost calculation: openai/Llama-3.2-3B-Instruct-Q8_0.gguf
INFO:LiteLLM:selected model name for cost calculation: openai/Llama-3.2-3B-Instruct-Q8_0.gguf
[92m11:12:35 - LiteLLM:INFO[0m: cost_calculator.py:655 - selected model name for cost calculation: openai/Llama-3.2-3B-Instruct-Q8_0.gguf
INFO:LiteLLM:selected model name for cost calculation: openai/Llama-3.2-3B-Instruct-Q8_0.gguf
[92m11:12:35 - LiteLLM:INFO[0m: cost_calculator.py:655 -

Average Metric: 12.60 / 20 (63.0%):   7%|▋         | 20/300 [01:26<17:41,  3.79s/it]

[92m11:12:44 - LiteLLM:INFO[0m: utils.py:2991 - 
LiteLLM completion() model= Llama-3.2-3B-Instruct-Q8_0.gguf; provider = openai
INFO:LiteLLM:
LiteLLM completion() model= Llama-3.2-3B-Instruct-Q8_0.gguf; provider = openai
INFO:httpx:HTTP Request: POST http://localhost:8080/v1/chat/completions "HTTP/1.1 200 OK"
[92m11:12:44 - LiteLLM:INFO[0m: utils.py:1213 - Wrapper: Completed Call, calling success_handler
INFO:LiteLLM:Wrapper: Completed Call, calling success_handler
[92m11:12:44 - LiteLLM:INFO[0m: cost_calculator.py:655 - selected model name for cost calculation: openai/Llama-3.2-3B-Instruct-Q8_0.gguf
INFO:LiteLLM:selected model name for cost calculation: openai/Llama-3.2-3B-Instruct-Q8_0.gguf
[92m11:12:44 - LiteLLM:INFO[0m: cost_calculator.py:655 - selected model name for cost calculation: openai/Llama-3.2-3B-Instruct-Q8_0.gguf
[92m11:12:44 - LiteLLM:INFO[0m: cost_calculator.py:655 - selected model name for cost calculation: openai/Llama-3.2-3B-Instruct-Q8_0.gguf
INFO:LiteLLM

Average Metric: 13.45 / 21 (64.1%):   7%|▋         | 21/300 [01:30<17:32,  3.77s/it]

[92m11:12:47 - LiteLLM:INFO[0m: utils.py:2991 - 
LiteLLM completion() model= Llama-3.2-3B-Instruct-Q8_0.gguf; provider = openai
INFO:LiteLLM:
LiteLLM completion() model= Llama-3.2-3B-Instruct-Q8_0.gguf; provider = openai
INFO:httpx:HTTP Request: POST http://localhost:8080/v1/chat/completions "HTTP/1.1 200 OK"
[92m11:12:49 - LiteLLM:INFO[0m: utils.py:1213 - Wrapper: Completed Call, calling success_handler
INFO:LiteLLM:Wrapper: Completed Call, calling success_handler
[92m11:12:49 - LiteLLM:INFO[0m: cost_calculator.py:655 - selected model name for cost calculation: openai/Llama-3.2-3B-Instruct-Q8_0.gguf
INFO:LiteLLM:selected model name for cost calculation: openai/Llama-3.2-3B-Instruct-Q8_0.gguf
[92m11:12:49 - LiteLLM:INFO[0m: cost_calculator.py:655 - selected model name for cost calculation: openai/Llama-3.2-3B-Instruct-Q8_0.gguf
INFO:LiteLLM:selected model name for cost calculation: openai/Llama-3.2-3B-Instruct-Q8_0.gguf
[92m11:12:49 - LiteLLM:INFO[0m: cost_calculator.py:655 -

Average Metric: 14.12 / 22 (64.2%):   7%|▋         | 22/300 [01:32<14:59,  3.24s/it]

[92m11:12:49 - LiteLLM:INFO[0m: utils.py:2991 - 
LiteLLM completion() model= Llama-3.2-3B-Instruct-Q8_0.gguf; provider = openai
INFO:LiteLLM:
LiteLLM completion() model= Llama-3.2-3B-Instruct-Q8_0.gguf; provider = openai
INFO:httpx:HTTP Request: POST http://localhost:8080/v1/chat/completions "HTTP/1.1 200 OK"
[92m11:12:51 - LiteLLM:INFO[0m: utils.py:1213 - Wrapper: Completed Call, calling success_handler
INFO:LiteLLM:Wrapper: Completed Call, calling success_handler
[92m11:12:51 - LiteLLM:INFO[0m: cost_calculator.py:655 - selected model name for cost calculation: openai/Llama-3.2-3B-Instruct-Q8_0.gguf
INFO:LiteLLM:selected model name for cost calculation: openai/Llama-3.2-3B-Instruct-Q8_0.gguf
[92m11:12:51 - LiteLLM:INFO[0m: cost_calculator.py:655 - selected model name for cost calculation: openai/Llama-3.2-3B-Instruct-Q8_0.gguf
[92m11:12:51 - LiteLLM:INFO[0m: cost_calculator.py:655 - selected model name for cost calculation: openai/Llama-3.2-3B-Instruct-Q8_0.gguf
INFO:LiteLLM

Average Metric: 14.12 / 23 (61.4%):   8%|▊         | 23/300 [01:48<32:09,  6.97s/it]

[92m11:13:05 - LiteLLM:INFO[0m: utils.py:2991 - 
LiteLLM completion() model= Llama-3.2-3B-Instruct-Q8_0.gguf; provider = openai
INFO:LiteLLM:
LiteLLM completion() model= Llama-3.2-3B-Instruct-Q8_0.gguf; provider = openai
INFO:httpx:HTTP Request: POST http://localhost:8080/v1/chat/completions "HTTP/1.1 200 OK"
[92m11:13:06 - LiteLLM:INFO[0m: utils.py:1213 - Wrapper: Completed Call, calling success_handler
INFO:LiteLLM:Wrapper: Completed Call, calling success_handler
[92m11:13:06 - LiteLLM:INFO[0m: cost_calculator.py:655 - selected model name for cost calculation: openai/Llama-3.2-3B-Instruct-Q8_0.gguf
INFO:LiteLLM:selected model name for cost calculation: openai/Llama-3.2-3B-Instruct-Q8_0.gguf
[92m11:13:06 - LiteLLM:INFO[0m: cost_calculator.py:655 - selected model name for cost calculation: openai/Llama-3.2-3B-Instruct-Q8_0.gguf
INFO:LiteLLM:selected model name for cost calculation: openai/Llama-3.2-3B-Instruct-Q8_0.gguf
[92m11:13:06 - LiteLLM:INFO[0m: cost_calculator.py:655 -

Average Metric: 14.12 / 24 (58.8%):   8%|▊         | 24/300 [01:49<23:38,  5.14s/it]

[92m11:13:06 - LiteLLM:INFO[0m: utils.py:2991 - 
LiteLLM completion() model= Llama-3.2-3B-Instruct-Q8_0.gguf; provider = openai
INFO:LiteLLM:
LiteLLM completion() model= Llama-3.2-3B-Instruct-Q8_0.gguf; provider = openai
INFO:httpx:HTTP Request: POST http://localhost:8080/v1/chat/completions "HTTP/1.1 200 OK"
[92m11:13:06 - LiteLLM:INFO[0m: utils.py:1213 - Wrapper: Completed Call, calling success_handler
INFO:LiteLLM:Wrapper: Completed Call, calling success_handler
[92m11:13:06 - LiteLLM:INFO[0m: cost_calculator.py:655 - selected model name for cost calculation: openai/Llama-3.2-3B-Instruct-Q8_0.gguf
INFO:LiteLLM:selected model name for cost calculation: openai/Llama-3.2-3B-Instruct-Q8_0.gguf
[92m11:13:06 - LiteLLM:INFO[0m: cost_calculator.py:655 - selected model name for cost calculation: openai/Llama-3.2-3B-Instruct-Q8_0.gguf
INFO:LiteLLM:selected model name for cost calculation: openai/Llama-3.2-3B-Instruct-Q8_0.gguf
[92m11:13:06 - LiteLLM:INFO[0m: cost_calculator.py:655 -

Average Metric: 14.79 / 25 (59.1%):   8%|▊         | 25/300 [01:51<19:56,  4.35s/it]

[92m11:13:09 - LiteLLM:INFO[0m: utils.py:2991 - 
LiteLLM completion() model= Llama-3.2-3B-Instruct-Q8_0.gguf; provider = openai
INFO:LiteLLM:
LiteLLM completion() model= Llama-3.2-3B-Instruct-Q8_0.gguf; provider = openai
INFO:httpx:HTTP Request: POST http://localhost:8080/v1/chat/completions "HTTP/1.1 200 OK"
[92m11:13:09 - LiteLLM:INFO[0m: utils.py:1213 - Wrapper: Completed Call, calling success_handler
INFO:LiteLLM:Wrapper: Completed Call, calling success_handler
[92m11:13:09 - LiteLLM:INFO[0m: cost_calculator.py:655 - selected model name for cost calculation: openai/Llama-3.2-3B-Instruct-Q8_0.gguf
INFO:LiteLLM:selected model name for cost calculation: openai/Llama-3.2-3B-Instruct-Q8_0.gguf
[92m11:13:09 - LiteLLM:INFO[0m: cost_calculator.py:655 - selected model name for cost calculation: openai/Llama-3.2-3B-Instruct-Q8_0.gguf
[92m11:13:09 - LiteLLM:INFO[0m: cost_calculator.py:655 - selected model name for cost calculation: openai/Llama-3.2-3B-Instruct-Q8_0.gguf
INFO:LiteLLM

Average Metric: 14.79 / 26 (56.9%):   9%|▊         | 26/300 [01:55<19:12,  4.21s/it]

[92m11:13:12 - LiteLLM:INFO[0m: utils.py:2991 - 
LiteLLM completion() model= Llama-3.2-3B-Instruct-Q8_0.gguf; provider = openai
INFO:LiteLLM:
LiteLLM completion() model= Llama-3.2-3B-Instruct-Q8_0.gguf; provider = openai
INFO:httpx:HTTP Request: POST http://localhost:8080/v1/chat/completions "HTTP/1.1 200 OK"
[92m11:13:14 - LiteLLM:INFO[0m: utils.py:1213 - Wrapper: Completed Call, calling success_handler
INFO:LiteLLM:Wrapper: Completed Call, calling success_handler
[92m11:13:14 - LiteLLM:INFO[0m: cost_calculator.py:655 - selected model name for cost calculation: openai/Llama-3.2-3B-Instruct-Q8_0.gguf
INFO:LiteLLM:selected model name for cost calculation: openai/Llama-3.2-3B-Instruct-Q8_0.gguf
[92m11:13:14 - LiteLLM:INFO[0m: cost_calculator.py:655 - selected model name for cost calculation: openai/Llama-3.2-3B-Instruct-Q8_0.gguf
INFO:LiteLLM:selected model name for cost calculation: openai/Llama-3.2-3B-Instruct-Q8_0.gguf
[92m11:13:14 - LiteLLM:INFO[0m: cost_calculator.py:655 -

Average Metric: 14.79 / 27 (54.8%):   9%|▉         | 27/300 [01:59<19:12,  4.22s/it]

[92m11:13:17 - LiteLLM:INFO[0m: utils.py:2991 - 
LiteLLM completion() model= Llama-3.2-3B-Instruct-Q8_0.gguf; provider = openai
INFO:LiteLLM:
LiteLLM completion() model= Llama-3.2-3B-Instruct-Q8_0.gguf; provider = openai
INFO:httpx:HTTP Request: POST http://localhost:8080/v1/chat/completions "HTTP/1.1 200 OK"
[92m11:13:18 - LiteLLM:INFO[0m: utils.py:1213 - Wrapper: Completed Call, calling success_handler
INFO:LiteLLM:Wrapper: Completed Call, calling success_handler
[92m11:13:18 - LiteLLM:INFO[0m: cost_calculator.py:655 - selected model name for cost calculation: openai/Llama-3.2-3B-Instruct-Q8_0.gguf
INFO:LiteLLM:selected model name for cost calculation: openai/Llama-3.2-3B-Instruct-Q8_0.gguf
[92m11:13:18 - LiteLLM:INFO[0m: cost_calculator.py:655 - selected model name for cost calculation: openai/Llama-3.2-3B-Instruct-Q8_0.gguf
INFO:LiteLLM:selected model name for cost calculation: openai/Llama-3.2-3B-Instruct-Q8_0.gguf
[92m11:13:18 - LiteLLM:INFO[0m: cost_calculator.py:655 -

Average Metric: 15.45 / 28 (55.2%):   9%|▉         | 28/300 [02:01<15:23,  3.40s/it]

[92m11:13:18 - LiteLLM:INFO[0m: utils.py:2991 - 
LiteLLM completion() model= Llama-3.2-3B-Instruct-Q8_0.gguf; provider = openai
INFO:LiteLLM:
LiteLLM completion() model= Llama-3.2-3B-Instruct-Q8_0.gguf; provider = openai
INFO:httpx:HTTP Request: POST http://localhost:8080/v1/chat/completions "HTTP/1.1 200 OK"
[92m11:13:19 - LiteLLM:INFO[0m: utils.py:1213 - Wrapper: Completed Call, calling success_handler
INFO:LiteLLM:Wrapper: Completed Call, calling success_handler
[92m11:13:19 - LiteLLM:INFO[0m: cost_calculator.py:655 - selected model name for cost calculation: openai/Llama-3.2-3B-Instruct-Q8_0.gguf
INFO:LiteLLM:selected model name for cost calculation: openai/Llama-3.2-3B-Instruct-Q8_0.gguf
[92m11:13:19 - LiteLLM:INFO[0m: cost_calculator.py:655 - selected model name for cost calculation: openai/Llama-3.2-3B-Instruct-Q8_0.gguf
INFO:LiteLLM:selected model name for cost calculation: openai/Llama-3.2-3B-Instruct-Q8_0.gguf
[92m11:13:19 - LiteLLM:INFO[0m: cost_calculator.py:655 -

Average Metric: 16.34 / 29 (56.3%):  10%|▉         | 29/300 [02:02<12:14,  2.71s/it]

[92m11:13:19 - LiteLLM:INFO[0m: utils.py:2991 - 
LiteLLM completion() model= Llama-3.2-3B-Instruct-Q8_0.gguf; provider = openai
INFO:LiteLLM:
LiteLLM completion() model= Llama-3.2-3B-Instruct-Q8_0.gguf; provider = openai
INFO:httpx:HTTP Request: POST http://localhost:8080/v1/chat/completions "HTTP/1.1 200 OK"
[92m11:13:19 - LiteLLM:INFO[0m: utils.py:1213 - Wrapper: Completed Call, calling success_handler
INFO:LiteLLM:Wrapper: Completed Call, calling success_handler
[92m11:13:19 - LiteLLM:INFO[0m: cost_calculator.py:655 - selected model name for cost calculation: openai/Llama-3.2-3B-Instruct-Q8_0.gguf
INFO:LiteLLM:selected model name for cost calculation: openai/Llama-3.2-3B-Instruct-Q8_0.gguf
[92m11:13:19 - LiteLLM:INFO[0m: cost_calculator.py:655 - selected model name for cost calculation: openai/Llama-3.2-3B-Instruct-Q8_0.gguf
INFO:LiteLLM:selected model name for cost calculation: openai/Llama-3.2-3B-Instruct-Q8_0.gguf
[92m11:13:19 - LiteLLM:INFO[0m: cost_calculator.py:655 -

Average Metric: 17.01 / 31 (54.9%):  10%|█         | 30/300 [02:04<11:05,  2.47s/it]

[92m11:13:21 - LiteLLM:INFO[0m: utils.py:2991 - 
LiteLLM completion() model= Llama-3.2-3B-Instruct-Q8_0.gguf; provider = openai
INFO:LiteLLM:
LiteLLM completion() model= Llama-3.2-3B-Instruct-Q8_0.gguf; provider = openai
[92m11:13:21 - LiteLLM:INFO[0m: utils.py:2991 - 
LiteLLM completion() model= Llama-3.2-3B-Instruct-Q8_0.gguf; provider = openai
INFO:LiteLLM:
LiteLLM completion() model= Llama-3.2-3B-Instruct-Q8_0.gguf; provider = openai
INFO:httpx:HTTP Request: POST http://localhost:8080/v1/chat/completions "HTTP/1.1 200 OK"
[92m11:13:22 - LiteLLM:INFO[0m: utils.py:1213 - Wrapper: Completed Call, calling success_handler
INFO:LiteLLM:Wrapper: Completed Call, calling success_handler
[92m11:13:22 - LiteLLM:INFO[0m: cost_calculator.py:655 - selected model name for cost calculation: openai/Llama-3.2-3B-Instruct-Q8_0.gguf
INFO:LiteLLM:selected model name for cost calculation: openai/Llama-3.2-3B-Instruct-Q8_0.gguf
[92m11:13:22 - LiteLLM:INFO[0m: cost_calculator.py:655 - selected m

Average Metric: 17.67 / 32 (55.2%):  11%|█         | 32/300 [02:05<07:14,  1.62s/it]

[92m11:13:22 - LiteLLM:INFO[0m: utils.py:2991 - 
LiteLLM completion() model= Llama-3.2-3B-Instruct-Q8_0.gguf; provider = openai
INFO:LiteLLM:
LiteLLM completion() model= Llama-3.2-3B-Instruct-Q8_0.gguf; provider = openai
INFO:httpx:HTTP Request: POST http://localhost:8080/v1/chat/completions "HTTP/1.1 200 OK"
[92m11:13:24 - LiteLLM:INFO[0m: utils.py:1213 - Wrapper: Completed Call, calling success_handler
INFO:LiteLLM:Wrapper: Completed Call, calling success_handler
[92m11:13:24 - LiteLLM:INFO[0m: cost_calculator.py:655 - selected model name for cost calculation: openai/Llama-3.2-3B-Instruct-Q8_0.gguf
INFO:LiteLLM:selected model name for cost calculation: openai/Llama-3.2-3B-Instruct-Q8_0.gguf
[92m11:13:24 - LiteLLM:INFO[0m: cost_calculator.py:655 - selected model name for cost calculation: openai/Llama-3.2-3B-Instruct-Q8_0.gguf
INFO:LiteLLM:selected model name for cost calculation: openai/Llama-3.2-3B-Instruct-Q8_0.gguf
[92m11:13:24 - LiteLLM:INFO[0m: cost_calculator.py:655 -

Average Metric: 17.67 / 33 (53.6%):  11%|█         | 33/300 [02:09<09:44,  2.19s/it]

[92m11:13:26 - LiteLLM:INFO[0m: utils.py:2991 - 
LiteLLM completion() model= Llama-3.2-3B-Instruct-Q8_0.gguf; provider = openai
INFO:LiteLLM:
LiteLLM completion() model= Llama-3.2-3B-Instruct-Q8_0.gguf; provider = openai
INFO:httpx:HTTP Request: POST http://localhost:8080/v1/chat/completions "HTTP/1.1 200 OK"
[92m11:13:27 - LiteLLM:INFO[0m: utils.py:1213 - Wrapper: Completed Call, calling success_handler
INFO:LiteLLM:Wrapper: Completed Call, calling success_handler
[92m11:13:27 - LiteLLM:INFO[0m: cost_calculator.py:655 - selected model name for cost calculation: openai/Llama-3.2-3B-Instruct-Q8_0.gguf
INFO:LiteLLM:selected model name for cost calculation: openai/Llama-3.2-3B-Instruct-Q8_0.gguf
[92m11:13:27 - LiteLLM:INFO[0m: cost_calculator.py:655 - selected model name for cost calculation: openai/Llama-3.2-3B-Instruct-Q8_0.gguf
[92m11:13:27 - LiteLLM:INFO[0m: cost_calculator.py:655 - selected model name for cost calculation: openai/Llama-3.2-3B-Instruct-Q8_0.gguf
INFO:LiteLLM

Average Metric: 17.67 / 34 (52.0%):  11%|█▏        | 34/300 [02:10<08:04,  1.82s/it]

[92m11:13:27 - LiteLLM:INFO[0m: utils.py:2991 - 
LiteLLM completion() model= Llama-3.2-3B-Instruct-Q8_0.gguf; provider = openai
INFO:LiteLLM:
LiteLLM completion() model= Llama-3.2-3B-Instruct-Q8_0.gguf; provider = openai
INFO:httpx:HTTP Request: POST http://localhost:8080/v1/chat/completions "HTTP/1.1 200 OK"
[92m11:13:27 - LiteLLM:INFO[0m: utils.py:1213 - Wrapper: Completed Call, calling success_handler
INFO:LiteLLM:Wrapper: Completed Call, calling success_handler
[92m11:13:27 - LiteLLM:INFO[0m: cost_calculator.py:655 - selected model name for cost calculation: openai/Llama-3.2-3B-Instruct-Q8_0.gguf
INFO:LiteLLM:selected model name for cost calculation: openai/Llama-3.2-3B-Instruct-Q8_0.gguf
[92m11:13:27 - LiteLLM:INFO[0m: cost_calculator.py:655 - selected model name for cost calculation: openai/Llama-3.2-3B-Instruct-Q8_0.gguf
INFO:LiteLLM:selected model name for cost calculation: openai/Llama-3.2-3B-Instruct-Q8_0.gguf
[92m11:13:27 - LiteLLM:INFO[0m: cost_calculator.py:655 -

Average Metric: 17.67 / 35 (50.5%):  12%|█▏        | 35/300 [02:10<06:13,  1.41s/it]

[92m11:13:27 - LiteLLM:INFO[0m: utils.py:2991 - 
LiteLLM completion() model= Llama-3.2-3B-Instruct-Q8_0.gguf; provider = openai
INFO:LiteLLM:
LiteLLM completion() model= Llama-3.2-3B-Instruct-Q8_0.gguf; provider = openai
INFO:httpx:HTTP Request: POST http://localhost:8080/v1/chat/completions "HTTP/1.1 200 OK"
[92m11:13:29 - LiteLLM:INFO[0m: utils.py:1213 - Wrapper: Completed Call, calling success_handler
INFO:LiteLLM:Wrapper: Completed Call, calling success_handler
[92m11:13:29 - LiteLLM:INFO[0m: cost_calculator.py:655 - selected model name for cost calculation: openai/Llama-3.2-3B-Instruct-Q8_0.gguf
INFO:LiteLLM:selected model name for cost calculation: openai/Llama-3.2-3B-Instruct-Q8_0.gguf
[92m11:13:29 - LiteLLM:INFO[0m: cost_calculator.py:655 - selected model name for cost calculation: openai/Llama-3.2-3B-Instruct-Q8_0.gguf
INFO:LiteLLM:selected model name for cost calculation: openai/Llama-3.2-3B-Instruct-Q8_0.gguf
[92m11:13:29 - LiteLLM:INFO[0m: cost_calculator.py:655 -

Average Metric: 18.56 / 36 (51.6%):  12%|█▏        | 36/300 [02:13<08:31,  1.94s/it]

[92m11:13:31 - LiteLLM:INFO[0m: utils.py:2991 - 
LiteLLM completion() model= Llama-3.2-3B-Instruct-Q8_0.gguf; provider = openai
INFO:LiteLLM:
LiteLLM completion() model= Llama-3.2-3B-Instruct-Q8_0.gguf; provider = openai
INFO:httpx:HTTP Request: POST http://localhost:8080/v1/chat/completions "HTTP/1.1 200 OK"
[92m11:13:31 - LiteLLM:INFO[0m: utils.py:1213 - Wrapper: Completed Call, calling success_handler
INFO:LiteLLM:Wrapper: Completed Call, calling success_handler
[92m11:13:31 - LiteLLM:INFO[0m: cost_calculator.py:655 - selected model name for cost calculation: openai/Llama-3.2-3B-Instruct-Q8_0.gguf
INFO:LiteLLM:selected model name for cost calculation: openai/Llama-3.2-3B-Instruct-Q8_0.gguf
[92m11:13:31 - LiteLLM:INFO[0m: cost_calculator.py:655 - selected model name for cost calculation: openai/Llama-3.2-3B-Instruct-Q8_0.gguf
INFO:LiteLLM:selected model name for cost calculation: openai/Llama-3.2-3B-Instruct-Q8_0.gguf
[92m11:13:31 - LiteLLM:INFO[0m: cost_calculator.py:655 -

Average Metric: 19.23 / 37 (52.0%):  12%|█▏        | 37/300 [02:16<09:30,  2.17s/it]

[92m11:13:33 - LiteLLM:INFO[0m: utils.py:2991 - 
LiteLLM completion() model= Llama-3.2-3B-Instruct-Q8_0.gguf; provider = openai
INFO:LiteLLM:
LiteLLM completion() model= Llama-3.2-3B-Instruct-Q8_0.gguf; provider = openai
INFO:httpx:HTTP Request: POST http://localhost:8080/v1/chat/completions "HTTP/1.1 200 OK"
[92m11:13:33 - LiteLLM:INFO[0m: utils.py:1213 - Wrapper: Completed Call, calling success_handler
INFO:LiteLLM:Wrapper: Completed Call, calling success_handler
[92m11:13:33 - LiteLLM:INFO[0m: cost_calculator.py:655 - selected model name for cost calculation: openai/Llama-3.2-3B-Instruct-Q8_0.gguf
INFO:LiteLLM:selected model name for cost calculation: openai/Llama-3.2-3B-Instruct-Q8_0.gguf
[92m11:13:33 - LiteLLM:INFO[0m: cost_calculator.py:655 - selected model name for cost calculation: openai/Llama-3.2-3B-Instruct-Q8_0.gguf
INFO:LiteLLM:selected model name for cost calculation: openai/Llama-3.2-3B-Instruct-Q8_0.gguf
[92m11:13:33 - LiteLLM:INFO[0m: cost_calculator.py:655 -

Average Metric: 20.08 / 38 (52.8%):  13%|█▎        | 38/300 [02:19<09:48,  2.24s/it]

[92m11:13:36 - LiteLLM:INFO[0m: utils.py:2991 - 
LiteLLM completion() model= Llama-3.2-3B-Instruct-Q8_0.gguf; provider = openai
INFO:LiteLLM:
LiteLLM completion() model= Llama-3.2-3B-Instruct-Q8_0.gguf; provider = openai
INFO:httpx:HTTP Request: POST http://localhost:8080/v1/chat/completions "HTTP/1.1 200 OK"
[92m11:13:38 - LiteLLM:INFO[0m: utils.py:1213 - Wrapper: Completed Call, calling success_handler
INFO:LiteLLM:Wrapper: Completed Call, calling success_handler
[92m11:13:38 - LiteLLM:INFO[0m: cost_calculator.py:655 - selected model name for cost calculation: openai/Llama-3.2-3B-Instruct-Q8_0.gguf
INFO:LiteLLM:selected model name for cost calculation: openai/Llama-3.2-3B-Instruct-Q8_0.gguf
[92m11:13:38 - LiteLLM:INFO[0m: cost_calculator.py:655 - selected model name for cost calculation: openai/Llama-3.2-3B-Instruct-Q8_0.gguf
[92m11:13:38 - LiteLLM:INFO[0m: cost_calculator.py:655 - selected model name for cost calculation: openai/Llama-3.2-3B-Instruct-Q8_0.gguf
INFO:LiteLLM

Average Metric: 20.74 / 39 (53.2%):  13%|█▎        | 39/300 [02:20<09:16,  2.13s/it]

[92m11:13:38 - LiteLLM:INFO[0m: utils.py:2991 - 
LiteLLM completion() model= Llama-3.2-3B-Instruct-Q8_0.gguf; provider = openai
INFO:LiteLLM:
LiteLLM completion() model= Llama-3.2-3B-Instruct-Q8_0.gguf; provider = openai
INFO:httpx:HTTP Request: POST http://localhost:8080/v1/chat/completions "HTTP/1.1 200 OK"
[92m11:13:39 - LiteLLM:INFO[0m: utils.py:1213 - Wrapper: Completed Call, calling success_handler
INFO:LiteLLM:Wrapper: Completed Call, calling success_handler
[92m11:13:39 - LiteLLM:INFO[0m: cost_calculator.py:655 - selected model name for cost calculation: openai/Llama-3.2-3B-Instruct-Q8_0.gguf
INFO:LiteLLM:selected model name for cost calculation: openai/Llama-3.2-3B-Instruct-Q8_0.gguf
[92m11:13:39 - LiteLLM:INFO[0m: cost_calculator.py:655 - selected model name for cost calculation: openai/Llama-3.2-3B-Instruct-Q8_0.gguf
INFO:LiteLLM:selected model name for cost calculation: openai/Llama-3.2-3B-Instruct-Q8_0.gguf
[92m11:13:39 - LiteLLM:INFO[0m: cost_calculator.py:655 -

Average Metric: 20.74 / 40 (51.9%):  13%|█▎        | 40/300 [02:22<08:17,  1.91s/it]

[92m11:13:39 - LiteLLM:INFO[0m: utils.py:2991 - 
LiteLLM completion() model= Llama-3.2-3B-Instruct-Q8_0.gguf; provider = openai
INFO:LiteLLM:
LiteLLM completion() model= Llama-3.2-3B-Instruct-Q8_0.gguf; provider = openai
[92m11:13:39 - LiteLLM:INFO[0m: utils.py:2991 - 
LiteLLM completion() model= Llama-3.2-3B-Instruct-Q8_0.gguf; provider = openai
INFO:LiteLLM:
LiteLLM completion() model= Llama-3.2-3B-Instruct-Q8_0.gguf; provider = openai
INFO:httpx:HTTP Request: POST http://localhost:8080/v1/chat/completions "HTTP/1.1 200 OK"
[92m11:13:42 - LiteLLM:INFO[0m: utils.py:1213 - Wrapper: Completed Call, calling success_handler
INFO:LiteLLM:Wrapper: Completed Call, calling success_handler
[92m11:13:42 - LiteLLM:INFO[0m: cost_calculator.py:655 - selected model name for cost calculation: openai/Llama-3.2-3B-Instruct-Q8_0.gguf
INFO:LiteLLM:selected model name for cost calculation: openai/Llama-3.2-3B-Instruct-Q8_0.gguf
[92m11:13:42 - LiteLLM:INFO[0m: cost_calculator.py:655 - selected m

Average Metric: 21.41 / 41 (52.2%):  14%|█▎        | 41/300 [02:28<14:14,  3.30s/it]

[92m11:13:46 - LiteLLM:INFO[0m: utils.py:2991 - 
LiteLLM completion() model= Llama-3.2-3B-Instruct-Q8_0.gguf; provider = openai
INFO:LiteLLM:
LiteLLM completion() model= Llama-3.2-3B-Instruct-Q8_0.gguf; provider = openai
INFO:httpx:HTTP Request: POST http://localhost:8080/v1/chat/completions "HTTP/1.1 200 OK"
[92m11:13:48 - LiteLLM:INFO[0m: utils.py:1213 - Wrapper: Completed Call, calling success_handler
INFO:httpx:HTTP Request: POST http://localhost:8080/v1/chat/completions "HTTP/1.1 200 OK"
INFO:LiteLLM:Wrapper: Completed Call, calling success_handler
[92m11:13:48 - LiteLLM:INFO[0m: utils.py:1213 - Wrapper: Completed Call, calling success_handler
INFO:LiteLLM:Wrapper: Completed Call, calling success_handler
[92m11:13:48 - LiteLLM:INFO[0m: cost_calculator.py:655 - selected model name for cost calculation: openai/Llama-3.2-3B-Instruct-Q8_0.gguf
[92m11:13:48 - LiteLLM:INFO[0m: cost_calculator.py:655 - selected model name for cost calculation: openai/Llama-3.2-3B-Instruct-Q8_0.

Average Metric: 22.08 / 43 (51.3%):  14%|█▍        | 42/300 [02:31<13:24,  3.12s/it]

[92m11:13:48 - LiteLLM:INFO[0m: utils.py:2991 - 
LiteLLM completion() model= Llama-3.2-3B-Instruct-Q8_0.gguf; provider = openai
INFO:LiteLLM:
LiteLLM completion() model= Llama-3.2-3B-Instruct-Q8_0.gguf; provider = openai
[92m11:13:48 - LiteLLM:INFO[0m: utils.py:2991 - 
LiteLLM completion() model= Llama-3.2-3B-Instruct-Q8_0.gguf; provider = openai
INFO:LiteLLM:
LiteLLM completion() model= Llama-3.2-3B-Instruct-Q8_0.gguf; provider = openai
INFO:httpx:HTTP Request: POST http://localhost:8080/v1/chat/completions "HTTP/1.1 200 OK"
[92m11:13:48 - LiteLLM:INFO[0m: utils.py:1213 - Wrapper: Completed Call, calling success_handler
INFO:LiteLLM:Wrapper: Completed Call, calling success_handler
[92m11:13:48 - LiteLLM:INFO[0m: cost_calculator.py:655 - selected model name for cost calculation: openai/Llama-3.2-3B-Instruct-Q8_0.gguf
INFO:LiteLLM:selected model name for cost calculation: openai/Llama-3.2-3B-Instruct-Q8_0.gguf
[92m11:13:48 - LiteLLM:INFO[0m: cost_calculator.py:655 - selected m

Average Metric: 22.92 / 44 (52.1%):  15%|█▍        | 44/300 [02:31<07:18,  1.71s/it]

[92m11:13:49 - LiteLLM:INFO[0m: utils.py:2991 - 
LiteLLM completion() model= Llama-3.2-3B-Instruct-Q8_0.gguf; provider = openai
INFO:LiteLLM:
LiteLLM completion() model= Llama-3.2-3B-Instruct-Q8_0.gguf; provider = openai
INFO:httpx:HTTP Request: POST http://localhost:8080/v1/chat/completions "HTTP/1.1 200 OK"
[92m11:13:50 - LiteLLM:INFO[0m: utils.py:1213 - Wrapper: Completed Call, calling success_handler
INFO:LiteLLM:Wrapper: Completed Call, calling success_handler
[92m11:13:50 - LiteLLM:INFO[0m: cost_calculator.py:655 - selected model name for cost calculation: openai/Llama-3.2-3B-Instruct-Q8_0.gguf
INFO:LiteLLM:selected model name for cost calculation: openai/Llama-3.2-3B-Instruct-Q8_0.gguf
[92m11:13:50 - LiteLLM:INFO[0m: cost_calculator.py:655 - selected model name for cost calculation: openai/Llama-3.2-3B-Instruct-Q8_0.gguf
INFO:LiteLLM:selected model name for cost calculation: openai/Llama-3.2-3B-Instruct-Q8_0.gguf
[92m11:13:50 - LiteLLM:INFO[0m: cost_calculator.py:655 -

Average Metric: 23.74 / 45 (52.8%):  15%|█▌        | 45/300 [02:41<15:46,  3.71s/it]

[92m11:13:58 - LiteLLM:INFO[0m: utils.py:2991 - 
LiteLLM completion() model= Llama-3.2-3B-Instruct-Q8_0.gguf; provider = openai
INFO:LiteLLM:
LiteLLM completion() model= Llama-3.2-3B-Instruct-Q8_0.gguf; provider = openai
INFO:httpx:HTTP Request: POST http://localhost:8080/v1/chat/completions "HTTP/1.1 200 OK"
[92m11:13:58 - LiteLLM:INFO[0m: utils.py:1213 - Wrapper: Completed Call, calling success_handler
INFO:LiteLLM:Wrapper: Completed Call, calling success_handler
[92m11:13:58 - LiteLLM:INFO[0m: cost_calculator.py:655 - selected model name for cost calculation: openai/Llama-3.2-3B-Instruct-Q8_0.gguf
INFO:LiteLLM:selected model name for cost calculation: openai/Llama-3.2-3B-Instruct-Q8_0.gguf
[92m11:13:58 - LiteLLM:INFO[0m: cost_calculator.py:655 - selected model name for cost calculation: openai/Llama-3.2-3B-Instruct-Q8_0.gguf
INFO:LiteLLM:selected model name for cost calculation: openai/Llama-3.2-3B-Instruct-Q8_0.gguf
[92m11:13:58 - LiteLLM:INFO[0m: cost_calculator.py:655 -

Average Metric: 23.74 / 46 (51.6%):  15%|█▌        | 46/300 [02:50<22:01,  5.20s/it]

[92m11:14:08 - LiteLLM:INFO[0m: utils.py:2991 - 
LiteLLM completion() model= Llama-3.2-3B-Instruct-Q8_0.gguf; provider = openai
INFO:LiteLLM:
LiteLLM completion() model= Llama-3.2-3B-Instruct-Q8_0.gguf; provider = openai
[92m11:14:08 - LiteLLM:INFO[0m: utils.py:2991 - 
LiteLLM completion() model= Llama-3.2-3B-Instruct-Q8_0.gguf; provider = openai
INFO:LiteLLM:
LiteLLM completion() model= Llama-3.2-3B-Instruct-Q8_0.gguf; provider = openai
INFO:httpx:HTTP Request: POST http://localhost:8080/v1/chat/completions "HTTP/1.1 200 OK"
[92m11:14:08 - LiteLLM:INFO[0m: utils.py:1213 - Wrapper: Completed Call, calling success_handler
INFO:LiteLLM:Wrapper: Completed Call, calling success_handler
[92m11:14:08 - LiteLLM:INFO[0m: cost_calculator.py:655 - selected model name for cost calculation: openai/Llama-3.2-3B-Instruct-Q8_0.gguf
INFO:LiteLLM:selected model name for cost calculation: openai/Llama-3.2-3B-Instruct-Q8_0.gguf
[92m11:14:08 - LiteLLM:INFO[0m: cost_calculator.py:655 - selected m

Average Metric: 24.36 / 47 (51.8%):  16%|█▌        | 47/300 [02:51<16:07,  3.83s/it]

[92m11:14:08 - LiteLLM:INFO[0m: utils.py:2991 - 
LiteLLM completion() model= Llama-3.2-3B-Instruct-Q8_0.gguf; provider = openai
INFO:LiteLLM:
LiteLLM completion() model= Llama-3.2-3B-Instruct-Q8_0.gguf; provider = openai
INFO:httpx:HTTP Request: POST http://localhost:8080/v1/chat/completions "HTTP/1.1 200 OK"
[92m11:14:10 - LiteLLM:INFO[0m: utils.py:1213 - Wrapper: Completed Call, calling success_handler
INFO:LiteLLM:Wrapper: Completed Call, calling success_handler
[92m11:14:10 - LiteLLM:INFO[0m: cost_calculator.py:655 - selected model name for cost calculation: openai/Llama-3.2-3B-Instruct-Q8_0.gguf
INFO:LiteLLM:selected model name for cost calculation: openai/Llama-3.2-3B-Instruct-Q8_0.gguf
[92m11:14:10 - LiteLLM:INFO[0m: cost_calculator.py:655 - selected model name for cost calculation: openai/Llama-3.2-3B-Instruct-Q8_0.gguf
INFO:LiteLLM:selected model name for cost calculation: openai/Llama-3.2-3B-Instruct-Q8_0.gguf
[92m11:14:10 - LiteLLM:INFO[0m: cost_calculator.py:655 -

Average Metric: 25.20 / 49 (51.4%):  16%|█▌        | 48/300 [02:55<16:34,  3.95s/it]

[92m11:14:12 - LiteLLM:INFO[0m: utils.py:2991 - 
LiteLLM completion() model= Llama-3.2-3B-Instruct-Q8_0.gguf; provider = openai
INFO:LiteLLM:
LiteLLM completion() model= Llama-3.2-3B-Instruct-Q8_0.gguf; provider = openai
INFO:httpx:HTTP Request: POST http://localhost:8080/v1/chat/completions "HTTP/1.1 200 OK"
[92m11:14:12 - LiteLLM:INFO[0m: utils.py:1213 - Wrapper: Completed Call, calling success_handler
INFO:LiteLLM:Wrapper: Completed Call, calling success_handler
[92m11:14:12 - LiteLLM:INFO[0m: cost_calculator.py:655 - selected model name for cost calculation: openai/Llama-3.2-3B-Instruct-Q8_0.gguf
INFO:LiteLLM:selected model name for cost calculation: openai/Llama-3.2-3B-Instruct-Q8_0.gguf
[92m11:14:12 - LiteLLM:INFO[0m: cost_calculator.py:655 - selected model name for cost calculation: openai/Llama-3.2-3B-Instruct-Q8_0.gguf
INFO:LiteLLM:selected model name for cost calculation: openai/Llama-3.2-3B-Instruct-Q8_0.gguf
[92m11:14:12 - LiteLLM:INFO[0m: cost_calculator.py:655 -

Average Metric: 25.87 / 50 (51.7%):  17%|█▋        | 50/300 [02:57<10:56,  2.63s/it]

[92m11:14:14 - LiteLLM:INFO[0m: utils.py:2991 - 
LiteLLM completion() model= Llama-3.2-3B-Instruct-Q8_0.gguf; provider = openai
INFO:LiteLLM:
LiteLLM completion() model= Llama-3.2-3B-Instruct-Q8_0.gguf; provider = openai
INFO:httpx:HTTP Request: POST http://localhost:8080/v1/chat/completions "HTTP/1.1 200 OK"
[92m11:14:16 - LiteLLM:INFO[0m: utils.py:1213 - Wrapper: Completed Call, calling success_handler
INFO:LiteLLM:Wrapper: Completed Call, calling success_handler
[92m11:14:16 - LiteLLM:INFO[0m: cost_calculator.py:655 - selected model name for cost calculation: openai/Llama-3.2-3B-Instruct-Q8_0.gguf
INFO:LiteLLM:selected model name for cost calculation: openai/Llama-3.2-3B-Instruct-Q8_0.gguf
[92m11:14:16 - LiteLLM:INFO[0m: cost_calculator.py:655 - selected model name for cost calculation: openai/Llama-3.2-3B-Instruct-Q8_0.gguf
INFO:LiteLLM:selected model name for cost calculation: openai/Llama-3.2-3B-Instruct-Q8_0.gguf
[92m11:14:16 - LiteLLM:INFO[0m: cost_calculator.py:655 -

Average Metric: 26.72 / 51 (52.4%):  17%|█▋        | 51/300 [03:01<12:35,  3.04s/it]

[92m11:14:18 - LiteLLM:INFO[0m: utils.py:2991 - 
LiteLLM completion() model= Llama-3.2-3B-Instruct-Q8_0.gguf; provider = openai
INFO:LiteLLM:
LiteLLM completion() model= Llama-3.2-3B-Instruct-Q8_0.gguf; provider = openai
INFO:httpx:HTTP Request: POST http://localhost:8080/v1/chat/completions "HTTP/1.1 200 OK"
[92m11:14:19 - LiteLLM:INFO[0m: utils.py:1213 - Wrapper: Completed Call, calling success_handler
INFO:LiteLLM:Wrapper: Completed Call, calling success_handler
[92m11:14:19 - LiteLLM:INFO[0m: cost_calculator.py:655 - selected model name for cost calculation: openai/Llama-3.2-3B-Instruct-Q8_0.gguf
INFO:LiteLLM:selected model name for cost calculation: openai/Llama-3.2-3B-Instruct-Q8_0.gguf
[92m11:14:19 - LiteLLM:INFO[0m: cost_calculator.py:655 - selected model name for cost calculation: openai/Llama-3.2-3B-Instruct-Q8_0.gguf
INFO:LiteLLM:selected model name for cost calculation: openai/Llama-3.2-3B-Instruct-Q8_0.gguf
[92m11:14:19 - LiteLLM:INFO[0m: cost_calculator.py:655 -

Average Metric: 26.72 / 52 (51.4%):  17%|█▋        | 52/300 [03:01<09:32,  2.31s/it]

[92m11:14:19 - LiteLLM:INFO[0m: utils.py:2991 - 
LiteLLM completion() model= Llama-3.2-3B-Instruct-Q8_0.gguf; provider = openai
INFO:LiteLLM:
LiteLLM completion() model= Llama-3.2-3B-Instruct-Q8_0.gguf; provider = openai
INFO:httpx:HTTP Request: POST http://localhost:8080/v1/chat/completions "HTTP/1.1 200 OK"
[92m11:14:21 - LiteLLM:INFO[0m: utils.py:1213 - Wrapper: Completed Call, calling success_handler
INFO:LiteLLM:Wrapper: Completed Call, calling success_handler
[92m11:14:21 - LiteLLM:INFO[0m: cost_calculator.py:655 - selected model name for cost calculation: openai/Llama-3.2-3B-Instruct-Q8_0.gguf
INFO:LiteLLM:selected model name for cost calculation: openai/Llama-3.2-3B-Instruct-Q8_0.gguf
[92m11:14:21 - LiteLLM:INFO[0m: cost_calculator.py:655 - selected model name for cost calculation: openai/Llama-3.2-3B-Instruct-Q8_0.gguf
INFO:LiteLLM:selected model name for cost calculation: openai/Llama-3.2-3B-Instruct-Q8_0.gguf
[92m11:14:21 - LiteLLM:INFO[0m: cost_calculator.py:655 -

Average Metric: 27.38 / 53 (51.7%):  18%|█▊        | 53/300 [03:04<09:28,  2.30s/it]

[92m11:14:21 - LiteLLM:INFO[0m: utils.py:2991 - 
LiteLLM completion() model= Llama-3.2-3B-Instruct-Q8_0.gguf; provider = openai
INFO:LiteLLM:
LiteLLM completion() model= Llama-3.2-3B-Instruct-Q8_0.gguf; provider = openai
INFO:httpx:HTTP Request: POST http://localhost:8080/v1/chat/completions "HTTP/1.1 200 OK"
[92m11:14:21 - LiteLLM:INFO[0m: utils.py:1213 - Wrapper: Completed Call, calling success_handler
INFO:LiteLLM:Wrapper: Completed Call, calling success_handler
[92m11:14:21 - LiteLLM:INFO[0m: cost_calculator.py:655 - selected model name for cost calculation: openai/Llama-3.2-3B-Instruct-Q8_0.gguf
INFO:LiteLLM:selected model name for cost calculation: openai/Llama-3.2-3B-Instruct-Q8_0.gguf
[92m11:14:21 - LiteLLM:INFO[0m: cost_calculator.py:655 - selected model name for cost calculation: openai/Llama-3.2-3B-Instruct-Q8_0.gguf
INFO:LiteLLM:selected model name for cost calculation: openai/Llama-3.2-3B-Instruct-Q8_0.gguf
[92m11:14:21 - LiteLLM:INFO[0m: cost_calculator.py:655 -

Average Metric: 27.38 / 54 (50.7%):  18%|█▊        | 54/300 [03:04<06:58,  1.70s/it]

[92m11:14:21 - LiteLLM:INFO[0m: utils.py:2991 - 
LiteLLM completion() model= Llama-3.2-3B-Instruct-Q8_0.gguf; provider = openai
INFO:LiteLLM:
LiteLLM completion() model= Llama-3.2-3B-Instruct-Q8_0.gguf; provider = openai
INFO:httpx:HTTP Request: POST http://localhost:8080/v1/chat/completions "HTTP/1.1 200 OK"
[92m11:14:23 - LiteLLM:INFO[0m: utils.py:1213 - Wrapper: Completed Call, calling success_handler
INFO:LiteLLM:Wrapper: Completed Call, calling success_handler
[92m11:14:23 - LiteLLM:INFO[0m: cost_calculator.py:655 - selected model name for cost calculation: openai/Llama-3.2-3B-Instruct-Q8_0.gguf
INFO:LiteLLM:selected model name for cost calculation: openai/Llama-3.2-3B-Instruct-Q8_0.gguf
[92m11:14:23 - LiteLLM:INFO[0m: cost_calculator.py:655 - selected model name for cost calculation: openai/Llama-3.2-3B-Instruct-Q8_0.gguf
INFO:LiteLLM:selected model name for cost calculation: openai/Llama-3.2-3B-Instruct-Q8_0.gguf
[92m11:14:23 - LiteLLM:INFO[0m: cost_calculator.py:655 -

Average Metric: 28.07 / 55 (51.0%):  18%|█▊        | 55/300 [03:08<10:06,  2.48s/it]

[92m11:14:25 - LiteLLM:INFO[0m: utils.py:2991 - 
LiteLLM completion() model= Llama-3.2-3B-Instruct-Q8_0.gguf; provider = openai
INFO:LiteLLM:
LiteLLM completion() model= Llama-3.2-3B-Instruct-Q8_0.gguf; provider = openai
INFO:httpx:HTTP Request: POST http://localhost:8080/v1/chat/completions "HTTP/1.1 200 OK"
[92m11:14:26 - LiteLLM:INFO[0m: utils.py:1213 - Wrapper: Completed Call, calling success_handler
INFO:LiteLLM:Wrapper: Completed Call, calling success_handler
[92m11:14:26 - LiteLLM:INFO[0m: cost_calculator.py:655 - selected model name for cost calculation: openai/Llama-3.2-3B-Instruct-Q8_0.gguf
INFO:LiteLLM:selected model name for cost calculation: openai/Llama-3.2-3B-Instruct-Q8_0.gguf
[92m11:14:26 - LiteLLM:INFO[0m: cost_calculator.py:655 - selected model name for cost calculation: openai/Llama-3.2-3B-Instruct-Q8_0.gguf
[92m11:14:26 - LiteLLM:INFO[0m: cost_calculator.py:655 - selected model name for cost calculation: openai/Llama-3.2-3B-Instruct-Q8_0.gguf
INFO:LiteLLM

Average Metric: 28.74 / 56 (51.3%):  19%|█▊        | 56/300 [03:09<08:23,  2.06s/it]

[92m11:14:27 - LiteLLM:INFO[0m: utils.py:2991 - 
LiteLLM completion() model= Llama-3.2-3B-Instruct-Q8_0.gguf; provider = openai
INFO:LiteLLM:
LiteLLM completion() model= Llama-3.2-3B-Instruct-Q8_0.gguf; provider = openai
INFO:httpx:HTTP Request: POST http://localhost:8080/v1/chat/completions "HTTP/1.1 200 OK"
[92m11:14:28 - LiteLLM:INFO[0m: utils.py:1213 - Wrapper: Completed Call, calling success_handler
INFO:LiteLLM:Wrapper: Completed Call, calling success_handler
[92m11:14:28 - LiteLLM:INFO[0m: cost_calculator.py:655 - selected model name for cost calculation: openai/Llama-3.2-3B-Instruct-Q8_0.gguf
INFO:LiteLLM:selected model name for cost calculation: openai/Llama-3.2-3B-Instruct-Q8_0.gguf
[92m11:14:28 - LiteLLM:INFO[0m: cost_calculator.py:655 - selected model name for cost calculation: openai/Llama-3.2-3B-Instruct-Q8_0.gguf
INFO:LiteLLM:selected model name for cost calculation: openai/Llama-3.2-3B-Instruct-Q8_0.gguf
[92m11:14:28 - LiteLLM:INFO[0m: cost_calculator.py:655 -

Average Metric: 29.40 / 57 (51.6%):  19%|█▉        | 57/300 [03:11<08:07,  2.00s/it]

[92m11:14:28 - LiteLLM:INFO[0m: utils.py:2991 - 
LiteLLM completion() model= Llama-3.2-3B-Instruct-Q8_0.gguf; provider = openai
INFO:LiteLLM:
LiteLLM completion() model= Llama-3.2-3B-Instruct-Q8_0.gguf; provider = openai
INFO:httpx:HTTP Request: POST http://localhost:8080/v1/chat/completions "HTTP/1.1 200 OK"
[92m11:14:29 - LiteLLM:INFO[0m: utils.py:1213 - Wrapper: Completed Call, calling success_handler
INFO:LiteLLM:Wrapper: Completed Call, calling success_handler
[92m11:14:29 - LiteLLM:INFO[0m: cost_calculator.py:655 - selected model name for cost calculation: openai/Llama-3.2-3B-Instruct-Q8_0.gguf
INFO:LiteLLM:selected model name for cost calculation: openai/Llama-3.2-3B-Instruct-Q8_0.gguf
[92m11:14:29 - LiteLLM:INFO[0m: cost_calculator.py:655 - selected model name for cost calculation: openai/Llama-3.2-3B-Instruct-Q8_0.gguf
INFO:LiteLLM:selected model name for cost calculation: openai/Llama-3.2-3B-Instruct-Q8_0.gguf
[92m11:14:29 - LiteLLM:INFO[0m: cost_calculator.py:655 -

Average Metric: 30.07 / 58 (51.8%):  19%|█▉        | 58/300 [03:11<06:00,  1.49s/it]

[92m11:14:29 - LiteLLM:INFO[0m: utils.py:2991 - 
LiteLLM completion() model= Llama-3.2-3B-Instruct-Q8_0.gguf; provider = openai
INFO:LiteLLM:
LiteLLM completion() model= Llama-3.2-3B-Instruct-Q8_0.gguf; provider = openai
INFO:httpx:HTTP Request: POST http://localhost:8080/v1/chat/completions "HTTP/1.1 200 OK"
[92m11:14:31 - LiteLLM:INFO[0m: utils.py:1213 - Wrapper: Completed Call, calling success_handler
INFO:LiteLLM:Wrapper: Completed Call, calling success_handler
[92m11:14:31 - LiteLLM:INFO[0m: cost_calculator.py:655 - selected model name for cost calculation: openai/Llama-3.2-3B-Instruct-Q8_0.gguf
INFO:LiteLLM:selected model name for cost calculation: openai/Llama-3.2-3B-Instruct-Q8_0.gguf
[92m11:14:31 - LiteLLM:INFO[0m: cost_calculator.py:655 - selected model name for cost calculation: openai/Llama-3.2-3B-Instruct-Q8_0.gguf
INFO:LiteLLM:selected model name for cost calculation: openai/Llama-3.2-3B-Instruct-Q8_0.gguf
[92m11:14:31 - LiteLLM:INFO[0m: cost_calculator.py:655 -

Average Metric: 30.07 / 59 (51.0%):  20%|█▉        | 59/300 [03:13<06:46,  1.69s/it]

[92m11:14:31 - LiteLLM:INFO[0m: utils.py:2991 - 
LiteLLM completion() model= Llama-3.2-3B-Instruct-Q8_0.gguf; provider = openai
INFO:LiteLLM:
LiteLLM completion() model= Llama-3.2-3B-Instruct-Q8_0.gguf; provider = openai
INFO:httpx:HTTP Request: POST http://localhost:8080/v1/chat/completions "HTTP/1.1 200 OK"
[92m11:14:32 - LiteLLM:INFO[0m: utils.py:1213 - Wrapper: Completed Call, calling success_handler
INFO:LiteLLM:Wrapper: Completed Call, calling success_handler
[92m11:14:32 - LiteLLM:INFO[0m: cost_calculator.py:655 - selected model name for cost calculation: openai/Llama-3.2-3B-Instruct-Q8_0.gguf
INFO:LiteLLM:selected model name for cost calculation: openai/Llama-3.2-3B-Instruct-Q8_0.gguf
[92m11:14:32 - LiteLLM:INFO[0m: cost_calculator.py:655 - selected model name for cost calculation: openai/Llama-3.2-3B-Instruct-Q8_0.gguf
INFO:LiteLLM:selected model name for cost calculation: openai/Llama-3.2-3B-Instruct-Q8_0.gguf
[92m11:14:32 - LiteLLM:INFO[0m: cost_calculator.py:655 -

Average Metric: 30.74 / 60 (51.2%):  20%|██        | 60/300 [03:18<09:58,  2.50s/it]

[92m11:14:35 - LiteLLM:INFO[0m: utils.py:2991 - 
LiteLLM completion() model= Llama-3.2-3B-Instruct-Q8_0.gguf; provider = openai
INFO:LiteLLM:
LiteLLM completion() model= Llama-3.2-3B-Instruct-Q8_0.gguf; provider = openai
INFO:httpx:HTTP Request: POST http://localhost:8080/v1/chat/completions "HTTP/1.1 200 OK"
[92m11:14:37 - LiteLLM:INFO[0m: utils.py:1213 - Wrapper: Completed Call, calling success_handler
INFO:LiteLLM:Wrapper: Completed Call, calling success_handler
[92m11:14:37 - LiteLLM:INFO[0m: cost_calculator.py:655 - selected model name for cost calculation: openai/Llama-3.2-3B-Instruct-Q8_0.gguf
INFO:LiteLLM:selected model name for cost calculation: openai/Llama-3.2-3B-Instruct-Q8_0.gguf
[92m11:14:37 - LiteLLM:INFO[0m: cost_calculator.py:655 - selected model name for cost calculation: openai/Llama-3.2-3B-Instruct-Q8_0.gguf
[92m11:14:37 - LiteLLM:INFO[0m: cost_calculator.py:655 - selected model name for cost calculation: openai/Llama-3.2-3B-Instruct-Q8_0.gguf
INFO:LiteLLM

Average Metric: 31.40 / 61 (51.5%):  20%|██        | 61/300 [03:19<08:52,  2.23s/it]

[92m11:14:37 - LiteLLM:INFO[0m: utils.py:2991 - 
LiteLLM completion() model= Llama-3.2-3B-Instruct-Q8_0.gguf; provider = openai
INFO:LiteLLM:
LiteLLM completion() model= Llama-3.2-3B-Instruct-Q8_0.gguf; provider = openai
INFO:httpx:HTTP Request: POST http://localhost:8080/v1/chat/completions "HTTP/1.1 200 OK"
[92m11:14:39 - LiteLLM:INFO[0m: utils.py:1213 - Wrapper: Completed Call, calling success_handler
INFO:LiteLLM:Wrapper: Completed Call, calling success_handler
[92m11:14:39 - LiteLLM:INFO[0m: cost_calculator.py:655 - selected model name for cost calculation: openai/Llama-3.2-3B-Instruct-Q8_0.gguf
INFO:LiteLLM:selected model name for cost calculation: openai/Llama-3.2-3B-Instruct-Q8_0.gguf
[92m11:14:39 - LiteLLM:INFO[0m: cost_calculator.py:655 - selected model name for cost calculation: openai/Llama-3.2-3B-Instruct-Q8_0.gguf
INFO:LiteLLM:selected model name for cost calculation: openai/Llama-3.2-3B-Instruct-Q8_0.gguf
[92m11:14:39 - LiteLLM:INFO[0m: cost_calculator.py:655 -

Average Metric: 32.07 / 62 (51.7%):  21%|██        | 62/300 [03:27<14:39,  3.70s/it]

[92m11:14:44 - LiteLLM:INFO[0m: utils.py:2991 - 
LiteLLM completion() model= Llama-3.2-3B-Instruct-Q8_0.gguf; provider = openai
INFO:LiteLLM:
LiteLLM completion() model= Llama-3.2-3B-Instruct-Q8_0.gguf; provider = openai
INFO:httpx:HTTP Request: POST http://localhost:8080/v1/chat/completions "HTTP/1.1 200 OK"
[92m11:14:45 - LiteLLM:INFO[0m: utils.py:1213 - Wrapper: Completed Call, calling success_handler
INFO:LiteLLM:Wrapper: Completed Call, calling success_handler
[92m11:14:45 - LiteLLM:INFO[0m: cost_calculator.py:655 - selected model name for cost calculation: openai/Llama-3.2-3B-Instruct-Q8_0.gguf
INFO:LiteLLM:selected model name for cost calculation: openai/Llama-3.2-3B-Instruct-Q8_0.gguf
[92m11:14:45 - LiteLLM:INFO[0m: cost_calculator.py:655 - selected model name for cost calculation: openai/Llama-3.2-3B-Instruct-Q8_0.gguf
INFO:LiteLLM:selected model name for cost calculation: openai/Llama-3.2-3B-Instruct-Q8_0.gguf
[92m11:14:45 - LiteLLM:INFO[0m: cost_calculator.py:655 -

Average Metric: 32.74 / 63 (52.0%):  21%|██        | 63/300 [03:27<11:09,  2.82s/it]

[92m11:14:45 - LiteLLM:INFO[0m: utils.py:2991 - 
LiteLLM completion() model= Llama-3.2-3B-Instruct-Q8_0.gguf; provider = openai
INFO:LiteLLM:
LiteLLM completion() model= Llama-3.2-3B-Instruct-Q8_0.gguf; provider = openai
INFO:httpx:HTTP Request: POST http://localhost:8080/v1/chat/completions "HTTP/1.1 200 OK"
[92m11:14:46 - LiteLLM:INFO[0m: utils.py:1213 - Wrapper: Completed Call, calling success_handler
INFO:LiteLLM:Wrapper: Completed Call, calling success_handler
[92m11:14:46 - LiteLLM:INFO[0m: cost_calculator.py:655 - selected model name for cost calculation: openai/Llama-3.2-3B-Instruct-Q8_0.gguf
INFO:LiteLLM:selected model name for cost calculation: openai/Llama-3.2-3B-Instruct-Q8_0.gguf
[92m11:14:46 - LiteLLM:INFO[0m: cost_calculator.py:655 - selected model name for cost calculation: openai/Llama-3.2-3B-Instruct-Q8_0.gguf
INFO:LiteLLM:selected model name for cost calculation: openai/Llama-3.2-3B-Instruct-Q8_0.gguf
[92m11:14:46 - LiteLLM:INFO[0m: cost_calculator.py:655 -

Average Metric: 33.40 / 64 (52.2%):  21%|██▏       | 64/300 [03:29<09:57,  2.53s/it]

[92m11:14:47 - LiteLLM:INFO[0m: utils.py:2991 - 
LiteLLM completion() model= Llama-3.2-3B-Instruct-Q8_0.gguf; provider = openai
INFO:LiteLLM:
LiteLLM completion() model= Llama-3.2-3B-Instruct-Q8_0.gguf; provider = openai
INFO:httpx:HTTP Request: POST http://localhost:8080/v1/chat/completions "HTTP/1.1 200 OK"
[92m11:14:49 - LiteLLM:INFO[0m: utils.py:1213 - Wrapper: Completed Call, calling success_handler
INFO:LiteLLM:Wrapper: Completed Call, calling success_handler
[92m11:14:49 - LiteLLM:INFO[0m: cost_calculator.py:655 - selected model name for cost calculation: openai/Llama-3.2-3B-Instruct-Q8_0.gguf
INFO:LiteLLM:selected model name for cost calculation: openai/Llama-3.2-3B-Instruct-Q8_0.gguf
[92m11:14:49 - LiteLLM:INFO[0m: cost_calculator.py:655 - selected model name for cost calculation: openai/Llama-3.2-3B-Instruct-Q8_0.gguf
INFO:LiteLLM:selected model name for cost calculation: openai/Llama-3.2-3B-Instruct-Q8_0.gguf
[92m11:14:49 - LiteLLM:INFO[0m: cost_calculator.py:655 -

Average Metric: 33.40 / 65 (51.4%):  22%|██▏       | 65/300 [03:35<14:07,  3.61s/it]

[92m11:14:53 - LiteLLM:INFO[0m: utils.py:2991 - 
LiteLLM completion() model= Llama-3.2-3B-Instruct-Q8_0.gguf; provider = openai
INFO:LiteLLM:
LiteLLM completion() model= Llama-3.2-3B-Instruct-Q8_0.gguf; provider = openai
INFO:httpx:HTTP Request: POST http://localhost:8080/v1/chat/completions "HTTP/1.1 200 OK"
[92m11:14:53 - LiteLLM:INFO[0m: utils.py:1213 - Wrapper: Completed Call, calling success_handler
INFO:LiteLLM:Wrapper: Completed Call, calling success_handler
[92m11:14:53 - LiteLLM:INFO[0m: cost_calculator.py:655 - selected model name for cost calculation: openai/Llama-3.2-3B-Instruct-Q8_0.gguf
INFO:LiteLLM:selected model name for cost calculation: openai/Llama-3.2-3B-Instruct-Q8_0.gguf
[92m11:14:53 - LiteLLM:INFO[0m: cost_calculator.py:655 - selected model name for cost calculation: openai/Llama-3.2-3B-Instruct-Q8_0.gguf
INFO:LiteLLM:selected model name for cost calculation: openai/Llama-3.2-3B-Instruct-Q8_0.gguf
[92m11:14:53 - LiteLLM:INFO[0m: cost_calculator.py:655 -

Average Metric: 33.74 / 66 (51.1%):  22%|██▏       | 66/300 [03:51<28:20,  7.27s/it]

[92m11:15:09 - LiteLLM:INFO[0m: utils.py:2991 - 
LiteLLM completion() model= Llama-3.2-3B-Instruct-Q8_0.gguf; provider = openai
INFO:LiteLLM:
LiteLLM completion() model= Llama-3.2-3B-Instruct-Q8_0.gguf; provider = openai
INFO:httpx:HTTP Request: POST http://localhost:8080/v1/chat/completions "HTTP/1.1 200 OK"
[92m11:15:09 - LiteLLM:INFO[0m: utils.py:1213 - Wrapper: Completed Call, calling success_handler
INFO:LiteLLM:Wrapper: Completed Call, calling success_handler
[92m11:15:09 - LiteLLM:INFO[0m: cost_calculator.py:655 - selected model name for cost calculation: openai/Llama-3.2-3B-Instruct-Q8_0.gguf
INFO:LiteLLM:selected model name for cost calculation: openai/Llama-3.2-3B-Instruct-Q8_0.gguf
[92m11:15:09 - LiteLLM:INFO[0m: cost_calculator.py:655 - selected model name for cost calculation: openai/Llama-3.2-3B-Instruct-Q8_0.gguf
[92m11:15:09 - LiteLLM:INFO[0m: cost_calculator.py:655 - selected model name for cost calculation: openai/Llama-3.2-3B-Instruct-Q8_0.gguf
INFO:LiteLLM

Average Metric: 34.40 / 67 (51.3%):  22%|██▏       | 67/300 [03:53<22:15,  5.73s/it]

[92m11:15:11 - LiteLLM:INFO[0m: utils.py:2991 - 
LiteLLM completion() model= Llama-3.2-3B-Instruct-Q8_0.gguf; provider = openai
INFO:LiteLLM:
LiteLLM completion() model= Llama-3.2-3B-Instruct-Q8_0.gguf; provider = openai
INFO:httpx:HTTP Request: POST http://localhost:8080/v1/chat/completions "HTTP/1.1 200 OK"
[92m11:15:12 - LiteLLM:INFO[0m: utils.py:1213 - Wrapper: Completed Call, calling success_handler
INFO:LiteLLM:Wrapper: Completed Call, calling success_handler
[92m11:15:12 - LiteLLM:INFO[0m: cost_calculator.py:655 - selected model name for cost calculation: openai/Llama-3.2-3B-Instruct-Q8_0.gguf
INFO:LiteLLM:selected model name for cost calculation: openai/Llama-3.2-3B-Instruct-Q8_0.gguf
[92m11:15:12 - LiteLLM:INFO[0m: cost_calculator.py:655 - selected model name for cost calculation: openai/Llama-3.2-3B-Instruct-Q8_0.gguf
[92m11:15:12 - LiteLLM:INFO[0m: cost_calculator.py:655 - selected model name for cost calculation: openai/Llama-3.2-3B-Instruct-Q8_0.gguf
INFO:LiteLLM

Average Metric: 35.07 / 68 (51.6%):  23%|██▎       | 68/300 [03:57<19:57,  5.16s/it]

[92m11:15:14 - LiteLLM:INFO[0m: utils.py:2991 - 
LiteLLM completion() model= Llama-3.2-3B-Instruct-Q8_0.gguf; provider = openai
INFO:LiteLLM:
LiteLLM completion() model= Llama-3.2-3B-Instruct-Q8_0.gguf; provider = openai
INFO:httpx:HTTP Request: POST http://localhost:8080/v1/chat/completions "HTTP/1.1 200 OK"
[92m11:15:17 - LiteLLM:INFO[0m: utils.py:1213 - Wrapper: Completed Call, calling success_handler
INFO:LiteLLM:Wrapper: Completed Call, calling success_handler
[92m11:15:17 - LiteLLM:INFO[0m: cost_calculator.py:655 - selected model name for cost calculation: openai/Llama-3.2-3B-Instruct-Q8_0.gguf
INFO:LiteLLM:selected model name for cost calculation: openai/Llama-3.2-3B-Instruct-Q8_0.gguf
[92m11:15:17 - LiteLLM:INFO[0m: cost_calculator.py:655 - selected model name for cost calculation: openai/Llama-3.2-3B-Instruct-Q8_0.gguf
INFO:LiteLLM:selected model name for cost calculation: openai/Llama-3.2-3B-Instruct-Q8_0.gguf
[92m11:15:17 - LiteLLM:INFO[0m: cost_calculator.py:655 -

Average Metric: 35.74 / 69 (51.8%):  23%|██▎       | 69/300 [04:02<19:10,  4.98s/it]

[92m11:15:19 - LiteLLM:INFO[0m: utils.py:2991 - 
LiteLLM completion() model= Llama-3.2-3B-Instruct-Q8_0.gguf; provider = openai
INFO:LiteLLM:
LiteLLM completion() model= Llama-3.2-3B-Instruct-Q8_0.gguf; provider = openai
INFO:httpx:HTTP Request: POST http://localhost:8080/v1/chat/completions "HTTP/1.1 200 OK"
[92m11:15:20 - LiteLLM:INFO[0m: utils.py:1213 - Wrapper: Completed Call, calling success_handler
INFO:LiteLLM:Wrapper: Completed Call, calling success_handler
[92m11:15:20 - LiteLLM:INFO[0m: cost_calculator.py:655 - selected model name for cost calculation: openai/Llama-3.2-3B-Instruct-Q8_0.gguf
INFO:LiteLLM:selected model name for cost calculation: openai/Llama-3.2-3B-Instruct-Q8_0.gguf
[92m11:15:20 - LiteLLM:INFO[0m: cost_calculator.py:655 - selected model name for cost calculation: openai/Llama-3.2-3B-Instruct-Q8_0.gguf
INFO:LiteLLM:selected model name for cost calculation: openai/Llama-3.2-3B-Instruct-Q8_0.gguf
[92m11:15:20 - LiteLLM:INFO[0m: cost_calculator.py:655 -

Average Metric: 36.42 / 70 (52.0%):  23%|██▎       | 70/300 [04:05<17:10,  4.48s/it]

[92m11:15:22 - LiteLLM:INFO[0m: utils.py:2991 - 
LiteLLM completion() model= Llama-3.2-3B-Instruct-Q8_0.gguf; provider = openai
INFO:LiteLLM:
LiteLLM completion() model= Llama-3.2-3B-Instruct-Q8_0.gguf; provider = openai
INFO:httpx:HTTP Request: POST http://localhost:8080/v1/chat/completions "HTTP/1.1 200 OK"
[92m11:15:27 - LiteLLM:INFO[0m: utils.py:1213 - Wrapper: Completed Call, calling success_handler
INFO:LiteLLM:Wrapper: Completed Call, calling success_handler
[92m11:15:27 - LiteLLM:INFO[0m: cost_calculator.py:655 - selected model name for cost calculation: openai/Llama-3.2-3B-Instruct-Q8_0.gguf
INFO:LiteLLM:selected model name for cost calculation: openai/Llama-3.2-3B-Instruct-Q8_0.gguf
[92m11:15:27 - LiteLLM:INFO[0m: cost_calculator.py:655 - selected model name for cost calculation: openai/Llama-3.2-3B-Instruct-Q8_0.gguf
INFO:LiteLLM:selected model name for cost calculation: openai/Llama-3.2-3B-Instruct-Q8_0.gguf
[92m11:15:27 - LiteLLM:INFO[0m: cost_calculator.py:655 -

Average Metric: 37.94 / 72 (52.7%):  24%|██▎       | 71/300 [04:10<17:12,  4.51s/it]

[92m11:15:27 - LiteLLM:INFO[0m: utils.py:2991 - 
LiteLLM completion() model= Llama-3.2-3B-Instruct-Q8_0.gguf; provider = openai
INFO:LiteLLM:
LiteLLM completion() model= Llama-3.2-3B-Instruct-Q8_0.gguf; provider = openai
INFO:httpx:HTTP Request: POST http://localhost:8080/v1/chat/completions "HTTP/1.1 200 OK"
[92m11:15:29 - LiteLLM:INFO[0m: utils.py:1213 - Wrapper: Completed Call, calling success_handler
INFO:LiteLLM:Wrapper: Completed Call, calling success_handler
[92m11:15:29 - LiteLLM:INFO[0m: cost_calculator.py:655 - selected model name for cost calculation: openai/Llama-3.2-3B-Instruct-Q8_0.gguf
INFO:LiteLLM:selected model name for cost calculation: openai/Llama-3.2-3B-Instruct-Q8_0.gguf
[92m11:15:29 - LiteLLM:INFO[0m: cost_calculator.py:655 - selected model name for cost calculation: openai/Llama-3.2-3B-Instruct-Q8_0.gguf
INFO:LiteLLM:selected model name for cost calculation: openai/Llama-3.2-3B-Instruct-Q8_0.gguf
[92m11:15:29 - LiteLLM:INFO[0m: cost_calculator.py:655 -

Average Metric: 37.94 / 73 (52.0%):  24%|██▍       | 73/300 [04:11<10:45,  2.84s/it]

[92m11:15:29 - LiteLLM:INFO[0m: utils.py:2991 - 
LiteLLM completion() model= Llama-3.2-3B-Instruct-Q8_0.gguf; provider = openai
INFO:LiteLLM:
LiteLLM completion() model= Llama-3.2-3B-Instruct-Q8_0.gguf; provider = openai
INFO:httpx:HTTP Request: POST http://localhost:8080/v1/chat/completions "HTTP/1.1 200 OK"
[92m11:15:29 - LiteLLM:INFO[0m: utils.py:1213 - Wrapper: Completed Call, calling success_handler
INFO:LiteLLM:Wrapper: Completed Call, calling success_handler
[92m11:15:29 - LiteLLM:INFO[0m: cost_calculator.py:655 - selected model name for cost calculation: openai/Llama-3.2-3B-Instruct-Q8_0.gguf
INFO:LiteLLM:selected model name for cost calculation: openai/Llama-3.2-3B-Instruct-Q8_0.gguf
[92m11:15:29 - LiteLLM:INFO[0m: cost_calculator.py:655 - selected model name for cost calculation: openai/Llama-3.2-3B-Instruct-Q8_0.gguf
INFO:LiteLLM:selected model name for cost calculation: openai/Llama-3.2-3B-Instruct-Q8_0.gguf
[92m11:15:29 - LiteLLM:INFO[0m: cost_calculator.py:655 -

Average Metric: 38.60 / 74 (52.2%):  25%|██▍       | 74/300 [04:13<09:53,  2.63s/it]

[92m11:15:31 - LiteLLM:INFO[0m: utils.py:2991 - 
LiteLLM completion() model= Llama-3.2-3B-Instruct-Q8_0.gguf; provider = openai
INFO:LiteLLM:
LiteLLM completion() model= Llama-3.2-3B-Instruct-Q8_0.gguf; provider = openai
INFO:httpx:HTTP Request: POST http://localhost:8080/v1/chat/completions "HTTP/1.1 200 OK"
[92m11:15:31 - LiteLLM:INFO[0m: utils.py:1213 - Wrapper: Completed Call, calling success_handler
INFO:LiteLLM:Wrapper: Completed Call, calling success_handler
[92m11:15:31 - LiteLLM:INFO[0m: cost_calculator.py:655 - selected model name for cost calculation: openai/Llama-3.2-3B-Instruct-Q8_0.gguf
INFO:LiteLLM:selected model name for cost calculation: openai/Llama-3.2-3B-Instruct-Q8_0.gguf
[92m11:15:31 - LiteLLM:INFO[0m: cost_calculator.py:655 - selected model name for cost calculation: openai/Llama-3.2-3B-Instruct-Q8_0.gguf
INFO:LiteLLM:selected model name for cost calculation: openai/Llama-3.2-3B-Instruct-Q8_0.gguf
[92m11:15:31 - LiteLLM:INFO[0m: cost_calculator.py:655 -

Average Metric: 39.17 / 75 (52.2%):  25%|██▌       | 75/300 [04:21<14:37,  3.90s/it]

[92m11:15:38 - LiteLLM:INFO[0m: utils.py:2991 - 
LiteLLM completion() model= Llama-3.2-3B-Instruct-Q8_0.gguf; provider = openai
INFO:LiteLLM:
LiteLLM completion() model= Llama-3.2-3B-Instruct-Q8_0.gguf; provider = openai
INFO:httpx:HTTP Request: POST http://localhost:8080/v1/chat/completions "HTTP/1.1 200 OK"
[92m11:15:38 - LiteLLM:INFO[0m: utils.py:1213 - Wrapper: Completed Call, calling success_handler
INFO:LiteLLM:Wrapper: Completed Call, calling success_handler
[92m11:15:38 - LiteLLM:INFO[0m: cost_calculator.py:655 - selected model name for cost calculation: openai/Llama-3.2-3B-Instruct-Q8_0.gguf
INFO:LiteLLM:selected model name for cost calculation: openai/Llama-3.2-3B-Instruct-Q8_0.gguf
[92m11:15:38 - LiteLLM:INFO[0m: cost_calculator.py:655 - selected model name for cost calculation: openai/Llama-3.2-3B-Instruct-Q8_0.gguf
INFO:LiteLLM:selected model name for cost calculation: openai/Llama-3.2-3B-Instruct-Q8_0.gguf
[92m11:15:38 - LiteLLM:INFO[0m: cost_calculator.py:655 -

Average Metric: 39.84 / 76 (52.4%):  25%|██▌       | 76/300 [04:21<10:57,  2.93s/it]

[92m11:15:39 - LiteLLM:INFO[0m: utils.py:2991 - 
LiteLLM completion() model= Llama-3.2-3B-Instruct-Q8_0.gguf; provider = openai
INFO:LiteLLM:
LiteLLM completion() model= Llama-3.2-3B-Instruct-Q8_0.gguf; provider = openai
INFO:httpx:HTTP Request: POST http://localhost:8080/v1/chat/completions "HTTP/1.1 200 OK"
[92m11:15:40 - LiteLLM:INFO[0m: utils.py:1213 - Wrapper: Completed Call, calling success_handler
INFO:LiteLLM:Wrapper: Completed Call, calling success_handler
[92m11:15:40 - LiteLLM:INFO[0m: cost_calculator.py:655 - selected model name for cost calculation: openai/Llama-3.2-3B-Instruct-Q8_0.gguf
INFO:LiteLLM:selected model name for cost calculation: openai/Llama-3.2-3B-Instruct-Q8_0.gguf
[92m11:15:40 - LiteLLM:INFO[0m: cost_calculator.py:655 - selected model name for cost calculation: openai/Llama-3.2-3B-Instruct-Q8_0.gguf
INFO:LiteLLM:selected model name for cost calculation: openai/Llama-3.2-3B-Instruct-Q8_0.gguf
[92m11:15:40 - LiteLLM:INFO[0m: cost_calculator.py:655 -

Average Metric: 39.84 / 77 (51.7%):  26%|██▌       | 77/300 [04:25<12:04,  3.25s/it]

[92m11:15:43 - LiteLLM:INFO[0m: utils.py:2991 - 
LiteLLM completion() model= Llama-3.2-3B-Instruct-Q8_0.gguf; provider = openai
INFO:LiteLLM:
LiteLLM completion() model= Llama-3.2-3B-Instruct-Q8_0.gguf; provider = openai
INFO:httpx:HTTP Request: POST http://localhost:8080/v1/chat/completions "HTTP/1.1 200 OK"
[92m11:15:43 - LiteLLM:INFO[0m: utils.py:1213 - Wrapper: Completed Call, calling success_handler
INFO:LiteLLM:Wrapper: Completed Call, calling success_handler
[92m11:15:43 - LiteLLM:INFO[0m: cost_calculator.py:655 - selected model name for cost calculation: openai/Llama-3.2-3B-Instruct-Q8_0.gguf
INFO:LiteLLM:selected model name for cost calculation: openai/Llama-3.2-3B-Instruct-Q8_0.gguf
[92m11:15:43 - LiteLLM:INFO[0m: cost_calculator.py:655 - selected model name for cost calculation: openai/Llama-3.2-3B-Instruct-Q8_0.gguf
INFO:LiteLLM:selected model name for cost calculation: openai/Llama-3.2-3B-Instruct-Q8_0.gguf
[92m11:15:43 - LiteLLM:INFO[0m: cost_calculator.py:655 -

Average Metric: 40.83 / 79 (51.7%):  26%|██▌       | 78/300 [04:28<11:05,  3.00s/it]

[92m11:15:45 - LiteLLM:INFO[0m: utils.py:2991 - 
LiteLLM completion() model= Llama-3.2-3B-Instruct-Q8_0.gguf; provider = openai
INFO:LiteLLM:
LiteLLM completion() model= Llama-3.2-3B-Instruct-Q8_0.gguf; provider = openai
[92m11:15:45 - LiteLLM:INFO[0m: utils.py:2991 - 
LiteLLM completion() model= Llama-3.2-3B-Instruct-Q8_0.gguf; provider = openai
INFO:LiteLLM:
LiteLLM completion() model= Llama-3.2-3B-Instruct-Q8_0.gguf; provider = openai
INFO:httpx:HTTP Request: POST http://localhost:8080/v1/chat/completions "HTTP/1.1 200 OK"
[92m11:15:46 - LiteLLM:INFO[0m: utils.py:1213 - Wrapper: Completed Call, calling success_handler
INFO:LiteLLM:Wrapper: Completed Call, calling success_handler
[92m11:15:46 - LiteLLM:INFO[0m: cost_calculator.py:655 - selected model name for cost calculation: openai/Llama-3.2-3B-Instruct-Q8_0.gguf
INFO:LiteLLM:selected model name for cost calculation: openai/Llama-3.2-3B-Instruct-Q8_0.gguf
[92m11:15:46 - LiteLLM:INFO[0m: cost_calculator.py:655 - selected m

Average Metric: 41.72 / 80 (52.1%):  27%|██▋       | 80/300 [04:33<10:26,  2.85s/it]

[92m11:15:50 - LiteLLM:INFO[0m: utils.py:2991 - 
LiteLLM completion() model= Llama-3.2-3B-Instruct-Q8_0.gguf; provider = openai
INFO:LiteLLM:
LiteLLM completion() model= Llama-3.2-3B-Instruct-Q8_0.gguf; provider = openai
INFO:httpx:HTTP Request: POST http://localhost:8080/v1/chat/completions "HTTP/1.1 200 OK"
[92m11:15:51 - LiteLLM:INFO[0m: utils.py:1213 - Wrapper: Completed Call, calling success_handler
INFO:LiteLLM:Wrapper: Completed Call, calling success_handler
[92m11:15:51 - LiteLLM:INFO[0m: cost_calculator.py:655 - selected model name for cost calculation: openai/Llama-3.2-3B-Instruct-Q8_0.gguf
INFO:LiteLLM:selected model name for cost calculation: openai/Llama-3.2-3B-Instruct-Q8_0.gguf
[92m11:15:51 - LiteLLM:INFO[0m: cost_calculator.py:655 - selected model name for cost calculation: openai/Llama-3.2-3B-Instruct-Q8_0.gguf
INFO:LiteLLM:selected model name for cost calculation: openai/Llama-3.2-3B-Instruct-Q8_0.gguf
[92m11:15:51 - LiteLLM:INFO[0m: cost_calculator.py:655 -

Average Metric: 42.61 / 81 (52.6%):  27%|██▋       | 81/300 [04:34<08:44,  2.40s/it]

[92m11:15:51 - LiteLLM:INFO[0m: utils.py:2991 - 
LiteLLM completion() model= Llama-3.2-3B-Instruct-Q8_0.gguf; provider = openai
INFO:LiteLLM:
LiteLLM completion() model= Llama-3.2-3B-Instruct-Q8_0.gguf; provider = openai
INFO:httpx:HTTP Request: POST http://localhost:8080/v1/chat/completions "HTTP/1.1 200 OK"
[92m11:15:53 - LiteLLM:INFO[0m: utils.py:1213 - Wrapper: Completed Call, calling success_handler
INFO:LiteLLM:Wrapper: Completed Call, calling success_handler
[92m11:15:53 - LiteLLM:INFO[0m: cost_calculator.py:655 - selected model name for cost calculation: openai/Llama-3.2-3B-Instruct-Q8_0.gguf
INFO:LiteLLM:selected model name for cost calculation: openai/Llama-3.2-3B-Instruct-Q8_0.gguf
[92m11:15:53 - LiteLLM:INFO[0m: cost_calculator.py:655 - selected model name for cost calculation: openai/Llama-3.2-3B-Instruct-Q8_0.gguf
[92m11:15:53 - LiteLLM:INFO[0m: cost_calculator.py:655 - selected model name for cost calculation: openai/Llama-3.2-3B-Instruct-Q8_0.gguf
INFO:LiteLLM

Average Metric: 43.27 / 82 (52.8%):  27%|██▋       | 82/300 [04:35<07:50,  2.16s/it]

[92m11:15:53 - LiteLLM:INFO[0m: utils.py:2991 - 
LiteLLM completion() model= Llama-3.2-3B-Instruct-Q8_0.gguf; provider = openai
INFO:LiteLLM:
LiteLLM completion() model= Llama-3.2-3B-Instruct-Q8_0.gguf; provider = openai
INFO:httpx:HTTP Request: POST http://localhost:8080/v1/chat/completions "HTTP/1.1 200 OK"
[92m11:15:55 - LiteLLM:INFO[0m: utils.py:1213 - Wrapper: Completed Call, calling success_handler
INFO:LiteLLM:Wrapper: Completed Call, calling success_handler
[92m11:15:55 - LiteLLM:INFO[0m: cost_calculator.py:655 - selected model name for cost calculation: openai/Llama-3.2-3B-Instruct-Q8_0.gguf
INFO:LiteLLM:selected model name for cost calculation: openai/Llama-3.2-3B-Instruct-Q8_0.gguf
[92m11:15:55 - LiteLLM:INFO[0m: cost_calculator.py:655 - selected model name for cost calculation: openai/Llama-3.2-3B-Instruct-Q8_0.gguf
INFO:LiteLLM:selected model name for cost calculation: openai/Llama-3.2-3B-Instruct-Q8_0.gguf
[92m11:15:55 - LiteLLM:INFO[0m: cost_calculator.py:655 -

Average Metric: 43.94 / 83 (52.9%):  28%|██▊       | 83/300 [04:45<14:56,  4.13s/it]

[92m11:16:02 - LiteLLM:INFO[0m: utils.py:2991 - 
LiteLLM completion() model= Llama-3.2-3B-Instruct-Q8_0.gguf; provider = openai
INFO:LiteLLM:
LiteLLM completion() model= Llama-3.2-3B-Instruct-Q8_0.gguf; provider = openai
INFO:httpx:HTTP Request: POST http://localhost:8080/v1/chat/completions "HTTP/1.1 200 OK"
[92m11:16:02 - LiteLLM:INFO[0m: utils.py:1213 - Wrapper: Completed Call, calling success_handler
INFO:LiteLLM:Wrapper: Completed Call, calling success_handler
[92m11:16:02 - LiteLLM:INFO[0m: cost_calculator.py:655 - selected model name for cost calculation: openai/Llama-3.2-3B-Instruct-Q8_0.gguf
INFO:LiteLLM:selected model name for cost calculation: openai/Llama-3.2-3B-Instruct-Q8_0.gguf
[92m11:16:02 - LiteLLM:INFO[0m: cost_calculator.py:655 - selected model name for cost calculation: openai/Llama-3.2-3B-Instruct-Q8_0.gguf
INFO:LiteLLM:selected model name for cost calculation: openai/Llama-3.2-3B-Instruct-Q8_0.gguf
[92m11:16:02 - LiteLLM:INFO[0m: cost_calculator.py:655 -

Average Metric: 44.61 / 84 (53.1%):  28%|██▊       | 84/300 [04:45<11:00,  3.06s/it]

[92m11:16:03 - LiteLLM:INFO[0m: utils.py:2991 - 
LiteLLM completion() model= Llama-3.2-3B-Instruct-Q8_0.gguf; provider = openai
INFO:LiteLLM:
LiteLLM completion() model= Llama-3.2-3B-Instruct-Q8_0.gguf; provider = openai
INFO:httpx:HTTP Request: POST http://localhost:8080/v1/chat/completions "HTTP/1.1 200 OK"
[92m11:16:06 - LiteLLM:INFO[0m: utils.py:1213 - Wrapper: Completed Call, calling success_handler
INFO:LiteLLM:Wrapper: Completed Call, calling success_handler
[92m11:16:06 - LiteLLM:INFO[0m: cost_calculator.py:655 - selected model name for cost calculation: openai/Llama-3.2-3B-Instruct-Q8_0.gguf
INFO:LiteLLM:selected model name for cost calculation: openai/Llama-3.2-3B-Instruct-Q8_0.gguf
[92m11:16:06 - LiteLLM:INFO[0m: cost_calculator.py:655 - selected model name for cost calculation: openai/Llama-3.2-3B-Instruct-Q8_0.gguf
INFO:LiteLLM:selected model name for cost calculation: openai/Llama-3.2-3B-Instruct-Q8_0.gguf
[92m11:16:06 - LiteLLM:INFO[0m: cost_calculator.py:655 -

Average Metric: 45.27 / 85 (53.3%):  28%|██▊       | 85/300 [04:49<11:24,  3.18s/it]

[92m11:16:06 - LiteLLM:INFO[0m: utils.py:2991 - 
LiteLLM completion() model= Llama-3.2-3B-Instruct-Q8_0.gguf; provider = openai
INFO:LiteLLM:
LiteLLM completion() model= Llama-3.2-3B-Instruct-Q8_0.gguf; provider = openai
INFO:httpx:HTTP Request: POST http://localhost:8080/v1/chat/completions "HTTP/1.1 200 OK"
[92m11:16:07 - LiteLLM:INFO[0m: utils.py:1213 - Wrapper: Completed Call, calling success_handler
INFO:LiteLLM:Wrapper: Completed Call, calling success_handler
[92m11:16:07 - LiteLLM:INFO[0m: cost_calculator.py:655 - selected model name for cost calculation: openai/Llama-3.2-3B-Instruct-Q8_0.gguf
INFO:LiteLLM:selected model name for cost calculation: openai/Llama-3.2-3B-Instruct-Q8_0.gguf
[92m11:16:07 - LiteLLM:INFO[0m: cost_calculator.py:655 - selected model name for cost calculation: openai/Llama-3.2-3B-Instruct-Q8_0.gguf
INFO:LiteLLM:selected model name for cost calculation: openai/Llama-3.2-3B-Instruct-Q8_0.gguf
[92m11:16:07 - LiteLLM:INFO[0m: cost_calculator.py:655 -

Average Metric: 46.16 / 86 (53.7%):  29%|██▊       | 86/300 [04:50<09:01,  2.53s/it]

[92m11:16:07 - LiteLLM:INFO[0m: utils.py:2991 - 
LiteLLM completion() model= Llama-3.2-3B-Instruct-Q8_0.gguf; provider = openai
INFO:LiteLLM:
LiteLLM completion() model= Llama-3.2-3B-Instruct-Q8_0.gguf; provider = openai
INFO:httpx:HTTP Request: POST http://localhost:8080/v1/chat/completions "HTTP/1.1 200 OK"
[92m11:16:07 - LiteLLM:INFO[0m: utils.py:1213 - Wrapper: Completed Call, calling success_handler
INFO:LiteLLM:Wrapper: Completed Call, calling success_handler
[92m11:16:07 - LiteLLM:INFO[0m: cost_calculator.py:655 - selected model name for cost calculation: openai/Llama-3.2-3B-Instruct-Q8_0.gguf
INFO:LiteLLM:selected model name for cost calculation: openai/Llama-3.2-3B-Instruct-Q8_0.gguf
[92m11:16:07 - LiteLLM:INFO[0m: cost_calculator.py:655 - selected model name for cost calculation: openai/Llama-3.2-3B-Instruct-Q8_0.gguf
INFO:LiteLLM:selected model name for cost calculation: openai/Llama-3.2-3B-Instruct-Q8_0.gguf
[92m11:16:07 - LiteLLM:INFO[0m: cost_calculator.py:655 -

Average Metric: 46.16 / 87 (53.1%):  29%|██▉       | 87/300 [04:54<10:24,  2.93s/it]

[92m11:16:11 - LiteLLM:INFO[0m: utils.py:2991 - 
LiteLLM completion() model= Llama-3.2-3B-Instruct-Q8_0.gguf; provider = openai
INFO:LiteLLM:
LiteLLM completion() model= Llama-3.2-3B-Instruct-Q8_0.gguf; provider = openai
INFO:httpx:HTTP Request: POST http://localhost:8080/v1/chat/completions "HTTP/1.1 200 OK"
[92m11:16:12 - LiteLLM:INFO[0m: utils.py:1213 - Wrapper: Completed Call, calling success_handler
INFO:LiteLLM:Wrapper: Completed Call, calling success_handler
[92m11:16:12 - LiteLLM:INFO[0m: cost_calculator.py:655 - selected model name for cost calculation: openai/Llama-3.2-3B-Instruct-Q8_0.gguf
INFO:LiteLLM:selected model name for cost calculation: openai/Llama-3.2-3B-Instruct-Q8_0.gguf
[92m11:16:12 - LiteLLM:INFO[0m: cost_calculator.py:655 - selected model name for cost calculation: openai/Llama-3.2-3B-Instruct-Q8_0.gguf
INFO:LiteLLM:selected model name for cost calculation: openai/Llama-3.2-3B-Instruct-Q8_0.gguf
[92m11:16:12 - LiteLLM:INFO[0m: cost_calculator.py:655 -

Average Metric: 47.05 / 88 (53.5%):  29%|██▉       | 88/300 [04:56<09:57,  2.82s/it]

[92m11:16:13 - LiteLLM:INFO[0m: utils.py:2991 - 
LiteLLM completion() model= Llama-3.2-3B-Instruct-Q8_0.gguf; provider = openai
INFO:LiteLLM:
LiteLLM completion() model= Llama-3.2-3B-Instruct-Q8_0.gguf; provider = openai
INFO:httpx:HTTP Request: POST http://localhost:8080/v1/chat/completions "HTTP/1.1 200 OK"
[92m11:16:15 - LiteLLM:INFO[0m: utils.py:1213 - Wrapper: Completed Call, calling success_handler
INFO:LiteLLM:Wrapper: Completed Call, calling success_handler
[92m11:16:15 - LiteLLM:INFO[0m: cost_calculator.py:655 - selected model name for cost calculation: openai/Llama-3.2-3B-Instruct-Q8_0.gguf
INFO:LiteLLM:selected model name for cost calculation: openai/Llama-3.2-3B-Instruct-Q8_0.gguf
[92m11:16:15 - LiteLLM:INFO[0m: cost_calculator.py:655 - selected model name for cost calculation: openai/Llama-3.2-3B-Instruct-Q8_0.gguf
INFO:LiteLLM:selected model name for cost calculation: openai/Llama-3.2-3B-Instruct-Q8_0.gguf
[92m11:16:15 - LiteLLM:INFO[0m: cost_calculator.py:655 -

Average Metric: 47.05 / 88 (53.5%):  30%|██▉       | 89/300 [05:07<18:20,  5.22s/it]

[92m11:16:24 - LiteLLM:INFO[0m: utils.py:2991 - 
LiteLLM completion() model= Llama-3.2-3B-Instruct-Q8_0.gguf; provider = openai
INFO:LiteLLM:
LiteLLM completion() model= Llama-3.2-3B-Instruct-Q8_0.gguf; provider = openai
INFO:httpx:HTTP Request: POST http://localhost:8080/v1/chat/completions "HTTP/1.1 200 OK"
[92m11:16:26 - LiteLLM:INFO[0m: utils.py:1213 - Wrapper: Completed Call, calling success_handler
INFO:LiteLLM:Wrapper: Completed Call, calling success_handler
[92m11:16:26 - LiteLLM:INFO[0m: cost_calculator.py:655 - selected model name for cost calculation: openai/Llama-3.2-3B-Instruct-Q8_0.gguf
INFO:LiteLLM:selected model name for cost calculation: openai/Llama-3.2-3B-Instruct-Q8_0.gguf
[92m11:16:26 - LiteLLM:INFO[0m: cost_calculator.py:655 - selected model name for cost calculation: openai/Llama-3.2-3B-Instruct-Q8_0.gguf
[92m11:16:26 - LiteLLM:INFO[0m: cost_calculator.py:655 - selected model name for cost calculation: openai/Llama-3.2-3B-Instruct-Q8_0.gguf
INFO:LiteLLM

Average Metric: 47.72 / 89 (53.6%):  30%|███       | 90/300 [05:09<14:54,  4.26s/it]

[92m11:16:26 - LiteLLM:INFO[0m: utils.py:2991 - 
LiteLLM completion() model= Llama-3.2-3B-Instruct-Q8_0.gguf; provider = openai
INFO:LiteLLM:
LiteLLM completion() model= Llama-3.2-3B-Instruct-Q8_0.gguf; provider = openai
INFO:httpx:HTTP Request: POST http://localhost:8080/v1/chat/completions "HTTP/1.1 200 OK"
[92m11:16:28 - LiteLLM:INFO[0m: utils.py:1213 - Wrapper: Completed Call, calling success_handler
INFO:LiteLLM:Wrapper: Completed Call, calling success_handler
[92m11:16:28 - LiteLLM:INFO[0m: cost_calculator.py:655 - selected model name for cost calculation: openai/Llama-3.2-3B-Instruct-Q8_0.gguf
INFO:LiteLLM:selected model name for cost calculation: openai/Llama-3.2-3B-Instruct-Q8_0.gguf
[92m11:16:28 - LiteLLM:INFO[0m: cost_calculator.py:655 - selected model name for cost calculation: openai/Llama-3.2-3B-Instruct-Q8_0.gguf
INFO:LiteLLM:selected model name for cost calculation: openai/Llama-3.2-3B-Instruct-Q8_0.gguf
[92m11:16:28 - LiteLLM:INFO[0m: cost_calculator.py:655 -

Average Metric: 48.61 / 90 (54.0%):  30%|███       | 91/300 [05:14<15:54,  4.57s/it]

[92m11:16:32 - LiteLLM:INFO[0m: utils.py:2991 - 
LiteLLM completion() model= Llama-3.2-3B-Instruct-Q8_0.gguf; provider = openai
INFO:LiteLLM:
LiteLLM completion() model= Llama-3.2-3B-Instruct-Q8_0.gguf; provider = openai
INFO:httpx:HTTP Request: POST http://localhost:8080/v1/chat/completions "HTTP/1.1 200 OK"
[92m11:16:35 - LiteLLM:INFO[0m: utils.py:1213 - Wrapper: Completed Call, calling success_handler
INFO:LiteLLM:Wrapper: Completed Call, calling success_handler
[92m11:16:35 - LiteLLM:INFO[0m: cost_calculator.py:655 - selected model name for cost calculation: openai/Llama-3.2-3B-Instruct-Q8_0.gguf
INFO:LiteLLM:selected model name for cost calculation: openai/Llama-3.2-3B-Instruct-Q8_0.gguf
[92m11:16:35 - LiteLLM:INFO[0m: cost_calculator.py:655 - selected model name for cost calculation: openai/Llama-3.2-3B-Instruct-Q8_0.gguf
[92m11:16:35 - LiteLLM:INFO[0m: cost_calculator.py:655 - selected model name for cost calculation: openai/Llama-3.2-3B-Instruct-Q8_0.gguf
INFO:LiteLLM

Average Metric: 49.27 / 91 (54.1%):  31%|███       | 92/300 [05:18<14:48,  4.27s/it]

[92m11:16:35 - LiteLLM:INFO[0m: utils.py:2991 - 
LiteLLM completion() model= Llama-3.2-3B-Instruct-Q8_0.gguf; provider = openai
INFO:LiteLLM:
LiteLLM completion() model= Llama-3.2-3B-Instruct-Q8_0.gguf; provider = openai
INFO:httpx:HTTP Request: POST http://localhost:8080/v1/chat/completions "HTTP/1.1 200 OK"
[92m11:16:36 - LiteLLM:INFO[0m: utils.py:1213 - Wrapper: Completed Call, calling success_handler
INFO:LiteLLM:Wrapper: Completed Call, calling success_handler
[92m11:16:36 - LiteLLM:INFO[0m: cost_calculator.py:655 - selected model name for cost calculation: openai/Llama-3.2-3B-Instruct-Q8_0.gguf
INFO:LiteLLM:selected model name for cost calculation: openai/Llama-3.2-3B-Instruct-Q8_0.gguf
[92m11:16:36 - LiteLLM:INFO[0m: cost_calculator.py:655 - selected model name for cost calculation: openai/Llama-3.2-3B-Instruct-Q8_0.gguf
INFO:LiteLLM:selected model name for cost calculation: openai/Llama-3.2-3B-Instruct-Q8_0.gguf
[92m11:16:36 - LiteLLM:INFO[0m: cost_calculator.py:655 -

Average Metric: 49.87 / 92 (54.2%):  31%|███       | 93/300 [05:19<11:18,  3.28s/it]

[92m11:16:36 - LiteLLM:INFO[0m: utils.py:2991 - 
LiteLLM completion() model= Llama-3.2-3B-Instruct-Q8_0.gguf; provider = openai
INFO:LiteLLM:
LiteLLM completion() model= Llama-3.2-3B-Instruct-Q8_0.gguf; provider = openai
INFO:httpx:HTTP Request: POST http://localhost:8080/v1/chat/completions "HTTP/1.1 200 OK"
[92m11:16:40 - LiteLLM:INFO[0m: utils.py:1213 - Wrapper: Completed Call, calling success_handler
INFO:LiteLLM:Wrapper: Completed Call, calling success_handler
[92m11:16:40 - LiteLLM:INFO[0m: cost_calculator.py:655 - selected model name for cost calculation: openai/Llama-3.2-3B-Instruct-Q8_0.gguf
INFO:LiteLLM:selected model name for cost calculation: openai/Llama-3.2-3B-Instruct-Q8_0.gguf
[92m11:16:40 - LiteLLM:INFO[0m: cost_calculator.py:655 - selected model name for cost calculation: openai/Llama-3.2-3B-Instruct-Q8_0.gguf
INFO:LiteLLM:selected model name for cost calculation: openai/Llama-3.2-3B-Instruct-Q8_0.gguf
[92m11:16:40 - LiteLLM:INFO[0m: cost_calculator.py:655 -

Average Metric: 50.87 / 94 (54.1%):  31%|███▏      | 94/300 [05:23<11:44,  3.42s/it]

INFO:httpx:HTTP Request: POST http://localhost:8080/v1/chat/completions "HTTP/1.1 200 OK"
[92m11:16:40 - LiteLLM:INFO[0m: utils.py:1213 - Wrapper: Completed Call, calling success_handler
INFO:LiteLLM:Wrapper: Completed Call, calling success_handler
[92m11:16:40 - LiteLLM:INFO[0m: cost_calculator.py:655 - selected model name for cost calculation: openai/Llama-3.2-3B-Instruct-Q8_0.gguf
INFO:LiteLLM:selected model name for cost calculation: openai/Llama-3.2-3B-Instruct-Q8_0.gguf
[92m11:16:40 - LiteLLM:INFO[0m: cost_calculator.py:655 - selected model name for cost calculation: openai/Llama-3.2-3B-Instruct-Q8_0.gguf
INFO:LiteLLM:selected model name for cost calculation: openai/Llama-3.2-3B-Instruct-Q8_0.gguf
[92m11:16:40 - LiteLLM:INFO[0m: cost_calculator.py:655 - selected model name for cost calculation: openai/Llama-3.2-3B-Instruct-Q8_0.gguf
INFO:LiteLLM:selected model name for cost calculation: openai/Llama-3.2-3B-Instruct-Q8_0.gguf
[92m11:16:40 - LiteLLM:INFO[0m: cost_calculat

Average Metric: 51.76 / 95 (54.5%):  32%|███▏      | 96/300 [05:27<09:41,  2.85s/it]

[92m11:16:44 - LiteLLM:INFO[0m: utils.py:2991 - 
LiteLLM completion() model= Llama-3.2-3B-Instruct-Q8_0.gguf; provider = openai
INFO:LiteLLM:
LiteLLM completion() model= Llama-3.2-3B-Instruct-Q8_0.gguf; provider = openai
INFO:httpx:HTTP Request: POST http://localhost:8080/v1/chat/completions "HTTP/1.1 200 OK"
[92m11:16:45 - LiteLLM:INFO[0m: utils.py:1213 - Wrapper: Completed Call, calling success_handler
INFO:LiteLLM:Wrapper: Completed Call, calling success_handler
[92m11:16:45 - LiteLLM:INFO[0m: cost_calculator.py:655 - selected model name for cost calculation: openai/Llama-3.2-3B-Instruct-Q8_0.gguf
INFO:LiteLLM:selected model name for cost calculation: openai/Llama-3.2-3B-Instruct-Q8_0.gguf
[92m11:16:45 - LiteLLM:INFO[0m: cost_calculator.py:655 - selected model name for cost calculation: openai/Llama-3.2-3B-Instruct-Q8_0.gguf
INFO:LiteLLM:selected model name for cost calculation: openai/Llama-3.2-3B-Instruct-Q8_0.gguf
[92m11:16:45 - LiteLLM:INFO[0m: cost_calculator.py:655 -

Average Metric: 52.61 / 96 (54.8%):  32%|███▏      | 97/300 [05:28<07:52,  2.33s/it]

[92m11:16:45 - LiteLLM:INFO[0m: utils.py:2991 - 
LiteLLM completion() model= Llama-3.2-3B-Instruct-Q8_0.gguf; provider = openai
INFO:LiteLLM:
LiteLLM completion() model= Llama-3.2-3B-Instruct-Q8_0.gguf; provider = openai
INFO:httpx:HTTP Request: POST http://localhost:8080/v1/chat/completions "HTTP/1.1 200 OK"
[92m11:16:46 - LiteLLM:INFO[0m: utils.py:1213 - Wrapper: Completed Call, calling success_handler
INFO:LiteLLM:Wrapper: Completed Call, calling success_handler
[92m11:16:46 - LiteLLM:INFO[0m: cost_calculator.py:655 - selected model name for cost calculation: openai/Llama-3.2-3B-Instruct-Q8_0.gguf
INFO:LiteLLM:selected model name for cost calculation: openai/Llama-3.2-3B-Instruct-Q8_0.gguf
[92m11:16:46 - LiteLLM:INFO[0m: cost_calculator.py:655 - selected model name for cost calculation: openai/Llama-3.2-3B-Instruct-Q8_0.gguf
INFO:LiteLLM:selected model name for cost calculation: openai/Llama-3.2-3B-Instruct-Q8_0.gguf
[92m11:16:46 - LiteLLM:INFO[0m: cost_calculator.py:655 -

Average Metric: 53.14 / 97 (54.8%):  33%|███▎      | 98/300 [05:29<07:16,  2.16s/it]

[92m11:16:47 - LiteLLM:INFO[0m: utils.py:2991 - 
LiteLLM completion() model= Llama-3.2-3B-Instruct-Q8_0.gguf; provider = openai
INFO:LiteLLM:
LiteLLM completion() model= Llama-3.2-3B-Instruct-Q8_0.gguf; provider = openai
INFO:httpx:HTTP Request: POST http://localhost:8080/v1/chat/completions "HTTP/1.1 200 OK"
[92m11:16:50 - LiteLLM:INFO[0m: utils.py:1213 - Wrapper: Completed Call, calling success_handler
INFO:LiteLLM:Wrapper: Completed Call, calling success_handler
[92m11:16:50 - LiteLLM:INFO[0m: cost_calculator.py:655 - selected model name for cost calculation: openai/Llama-3.2-3B-Instruct-Q8_0.gguf
INFO:LiteLLM:selected model name for cost calculation: openai/Llama-3.2-3B-Instruct-Q8_0.gguf
[92m11:16:50 - LiteLLM:INFO[0m: cost_calculator.py:655 - selected model name for cost calculation: openai/Llama-3.2-3B-Instruct-Q8_0.gguf
[92m11:16:50 - LiteLLM:INFO[0m: cost_calculator.py:655 - selected model name for cost calculation: openai/Llama-3.2-3B-Instruct-Q8_0.gguf
INFO:LiteLLM

Average Metric: 53.14 / 98 (54.2%):  33%|███▎      | 99/300 [05:34<09:54,  2.96s/it]

[92m11:16:52 - LiteLLM:INFO[0m: utils.py:2991 - 
LiteLLM completion() model= Llama-3.2-3B-Instruct-Q8_0.gguf; provider = openai
INFO:LiteLLM:
LiteLLM completion() model= Llama-3.2-3B-Instruct-Q8_0.gguf; provider = openai
INFO:httpx:HTTP Request: POST http://localhost:8080/v1/chat/completions "HTTP/1.1 200 OK"
[92m11:16:54 - LiteLLM:INFO[0m: utils.py:1213 - Wrapper: Completed Call, calling success_handler
INFO:LiteLLM:Wrapper: Completed Call, calling success_handler
[92m11:16:54 - LiteLLM:INFO[0m: cost_calculator.py:655 - selected model name for cost calculation: openai/Llama-3.2-3B-Instruct-Q8_0.gguf
INFO:LiteLLM:selected model name for cost calculation: openai/Llama-3.2-3B-Instruct-Q8_0.gguf
[92m11:16:54 - LiteLLM:INFO[0m: cost_calculator.py:655 - selected model name for cost calculation: openai/Llama-3.2-3B-Instruct-Q8_0.gguf
[92m11:16:54 - LiteLLM:INFO[0m: cost_calculator.py:655 - selected model name for cost calculation: openai/Llama-3.2-3B-Instruct-Q8_0.gguf
INFO:LiteLLM

Average Metric: 54.03 / 99 (54.6%):  33%|███▎      | 100/300 [05:37<09:26,  2.83s/it]

[92m11:16:54 - LiteLLM:INFO[0m: utils.py:2991 - 
LiteLLM completion() model= Llama-3.2-3B-Instruct-Q8_0.gguf; provider = openai
INFO:LiteLLM:
LiteLLM completion() model= Llama-3.2-3B-Instruct-Q8_0.gguf; provider = openai
INFO:httpx:HTTP Request: POST http://localhost:8080/v1/chat/completions "HTTP/1.1 200 OK"
[92m11:16:56 - LiteLLM:INFO[0m: utils.py:1213 - Wrapper: Completed Call, calling success_handler
INFO:LiteLLM:Wrapper: Completed Call, calling success_handler
[92m11:16:56 - LiteLLM:INFO[0m: cost_calculator.py:655 - selected model name for cost calculation: openai/Llama-3.2-3B-Instruct-Q8_0.gguf
INFO:LiteLLM:selected model name for cost calculation: openai/Llama-3.2-3B-Instruct-Q8_0.gguf
[92m11:16:56 - LiteLLM:INFO[0m: cost_calculator.py:655 - selected model name for cost calculation: openai/Llama-3.2-3B-Instruct-Q8_0.gguf
INFO:LiteLLM:selected model name for cost calculation: openai/Llama-3.2-3B-Instruct-Q8_0.gguf
[92m11:16:56 - LiteLLM:INFO[0m: cost_calculator.py:655 -

Average Metric: 54.70 / 100 (54.7%):  34%|███▎      | 101/300 [05:39<08:26,  2.55s/it]

[92m11:16:56 - LiteLLM:INFO[0m: utils.py:2991 - 
LiteLLM completion() model= Llama-3.2-3B-Instruct-Q8_0.gguf; provider = openai
INFO:LiteLLM:
LiteLLM completion() model= Llama-3.2-3B-Instruct-Q8_0.gguf; provider = openai
INFO:httpx:HTTP Request: POST http://localhost:8080/v1/chat/completions "HTTP/1.1 200 OK"
[92m11:16:57 - LiteLLM:INFO[0m: utils.py:1213 - Wrapper: Completed Call, calling success_handler
INFO:LiteLLM:Wrapper: Completed Call, calling success_handler
[92m11:16:57 - LiteLLM:INFO[0m: cost_calculator.py:655 - selected model name for cost calculation: openai/Llama-3.2-3B-Instruct-Q8_0.gguf
INFO:LiteLLM:selected model name for cost calculation: openai/Llama-3.2-3B-Instruct-Q8_0.gguf
[92m11:16:57 - LiteLLM:INFO[0m: cost_calculator.py:655 - selected model name for cost calculation: openai/Llama-3.2-3B-Instruct-Q8_0.gguf
INFO:LiteLLM:selected model name for cost calculation: openai/Llama-3.2-3B-Instruct-Q8_0.gguf
[92m11:16:57 - LiteLLM:INFO[0m: cost_calculator.py:655 -

Average Metric: 55.02 / 101 (54.5%):  34%|███▍      | 102/300 [05:40<06:56,  2.10s/it]

[92m11:16:57 - LiteLLM:INFO[0m: utils.py:2991 - 
LiteLLM completion() model= Llama-3.2-3B-Instruct-Q8_0.gguf; provider = openai
INFO:LiteLLM:
LiteLLM completion() model= Llama-3.2-3B-Instruct-Q8_0.gguf; provider = openai
INFO:httpx:HTTP Request: POST http://localhost:8080/v1/chat/completions "HTTP/1.1 200 OK"
[92m11:16:59 - LiteLLM:INFO[0m: utils.py:1213 - Wrapper: Completed Call, calling success_handler
INFO:LiteLLM:Wrapper: Completed Call, calling success_handler
[92m11:16:59 - LiteLLM:INFO[0m: cost_calculator.py:655 - selected model name for cost calculation: openai/Llama-3.2-3B-Instruct-Q8_0.gguf
INFO:LiteLLM:selected model name for cost calculation: openai/Llama-3.2-3B-Instruct-Q8_0.gguf
[92m11:16:59 - LiteLLM:INFO[0m: cost_calculator.py:655 - selected model name for cost calculation: openai/Llama-3.2-3B-Instruct-Q8_0.gguf
INFO:LiteLLM:selected model name for cost calculation: openai/Llama-3.2-3B-Instruct-Q8_0.gguf
[92m11:16:59 - LiteLLM:INFO[0m: cost_calculator.py:655 -

Average Metric: 55.02 / 102 (53.9%):  34%|███▍      | 103/300 [05:47<11:51,  3.61s/it]

[92m11:17:04 - LiteLLM:INFO[0m: utils.py:2991 - 
LiteLLM completion() model= Llama-3.2-3B-Instruct-Q8_0.gguf; provider = openai
INFO:LiteLLM:
LiteLLM completion() model= Llama-3.2-3B-Instruct-Q8_0.gguf; provider = openai
INFO:httpx:HTTP Request: POST http://localhost:8080/v1/chat/completions "HTTP/1.1 200 OK"
[92m11:17:05 - LiteLLM:INFO[0m: utils.py:1213 - Wrapper: Completed Call, calling success_handler
INFO:LiteLLM:Wrapper: Completed Call, calling success_handler
[92m11:17:05 - LiteLLM:INFO[0m: cost_calculator.py:655 - selected model name for cost calculation: openai/Llama-3.2-3B-Instruct-Q8_0.gguf
INFO:LiteLLM:selected model name for cost calculation: openai/Llama-3.2-3B-Instruct-Q8_0.gguf
[92m11:17:05 - LiteLLM:INFO[0m: cost_calculator.py:655 - selected model name for cost calculation: openai/Llama-3.2-3B-Instruct-Q8_0.gguf
INFO:LiteLLM:selected model name for cost calculation: openai/Llama-3.2-3B-Instruct-Q8_0.gguf
[92m11:17:05 - LiteLLM:INFO[0m: cost_calculator.py:655 -

Average Metric: 55.87 / 103 (54.2%):  35%|███▍      | 104/300 [05:50<11:02,  3.38s/it]

[92m11:17:07 - LiteLLM:INFO[0m: utils.py:2991 - 
LiteLLM completion() model= Llama-3.2-3B-Instruct-Q8_0.gguf; provider = openai
INFO:LiteLLM:
LiteLLM completion() model= Llama-3.2-3B-Instruct-Q8_0.gguf; provider = openai
INFO:httpx:HTTP Request: POST http://localhost:8080/v1/chat/completions "HTTP/1.1 200 OK"
[92m11:17:08 - LiteLLM:INFO[0m: utils.py:1213 - Wrapper: Completed Call, calling success_handler
INFO:LiteLLM:Wrapper: Completed Call, calling success_handler
[92m11:17:08 - LiteLLM:INFO[0m: cost_calculator.py:655 - selected model name for cost calculation: openai/Llama-3.2-3B-Instruct-Q8_0.gguf
INFO:LiteLLM:selected model name for cost calculation: openai/Llama-3.2-3B-Instruct-Q8_0.gguf
[92m11:17:08 - LiteLLM:INFO[0m: cost_calculator.py:655 - selected model name for cost calculation: openai/Llama-3.2-3B-Instruct-Q8_0.gguf
INFO:LiteLLM:selected model name for cost calculation: openai/Llama-3.2-3B-Instruct-Q8_0.gguf
[92m11:17:08 - LiteLLM:INFO[0m: cost_calculator.py:655 -

Average Metric: 56.54 / 104 (54.4%):  35%|███▌      | 105/300 [05:51<08:50,  2.72s/it]

[92m11:17:08 - LiteLLM:INFO[0m: utils.py:2991 - 
LiteLLM completion() model= Llama-3.2-3B-Instruct-Q8_0.gguf; provider = openai
INFO:LiteLLM:
LiteLLM completion() model= Llama-3.2-3B-Instruct-Q8_0.gguf; provider = openai
INFO:httpx:HTTP Request: POST http://localhost:8080/v1/chat/completions "HTTP/1.1 200 OK"
[92m11:17:09 - LiteLLM:INFO[0m: utils.py:1213 - Wrapper: Completed Call, calling success_handler
INFO:LiteLLM:Wrapper: Completed Call, calling success_handler
[92m11:17:09 - LiteLLM:INFO[0m: cost_calculator.py:655 - selected model name for cost calculation: openai/Llama-3.2-3B-Instruct-Q8_0.gguf
INFO:LiteLLM:selected model name for cost calculation: openai/Llama-3.2-3B-Instruct-Q8_0.gguf
[92m11:17:09 - LiteLLM:INFO[0m: cost_calculator.py:655 - selected model name for cost calculation: openai/Llama-3.2-3B-Instruct-Q8_0.gguf
[92m11:17:09 - LiteLLM:INFO[0m: cost_calculator.py:655 - selected model name for cost calculation: openai/Llama-3.2-3B-Instruct-Q8_0.gguf
INFO:LiteLLM

Average Metric: 57.14 / 105 (54.4%):  35%|███▌      | 106/300 [05:52<06:56,  2.15s/it]

[92m11:17:09 - LiteLLM:INFO[0m: utils.py:2991 - 
LiteLLM completion() model= Llama-3.2-3B-Instruct-Q8_0.gguf; provider = openai
INFO:LiteLLM:
LiteLLM completion() model= Llama-3.2-3B-Instruct-Q8_0.gguf; provider = openai
INFO:httpx:HTTP Request: POST http://localhost:8080/v1/chat/completions "HTTP/1.1 200 OK"
[92m11:17:13 - LiteLLM:INFO[0m: utils.py:1213 - Wrapper: Completed Call, calling success_handler
INFO:LiteLLM:Wrapper: Completed Call, calling success_handler
[92m11:17:13 - LiteLLM:INFO[0m: cost_calculator.py:655 - selected model name for cost calculation: openai/Llama-3.2-3B-Instruct-Q8_0.gguf
INFO:LiteLLM:selected model name for cost calculation: openai/Llama-3.2-3B-Instruct-Q8_0.gguf
[92m11:17:13 - LiteLLM:INFO[0m: cost_calculator.py:655 - selected model name for cost calculation: openai/Llama-3.2-3B-Instruct-Q8_0.gguf
[92m11:17:13 - LiteLLM:INFO[0m: cost_calculator.py:655 - selected model name for cost calculation: openai/Llama-3.2-3B-Instruct-Q8_0.gguf
INFO:LiteLLM

Average Metric: 57.99 / 106 (54.7%):  36%|███▌      | 107/300 [05:57<10:08,  3.15s/it]

[92m11:17:15 - LiteLLM:INFO[0m: utils.py:2991 - 
LiteLLM completion() model= Llama-3.2-3B-Instruct-Q8_0.gguf; provider = openai
INFO:LiteLLM:
LiteLLM completion() model= Llama-3.2-3B-Instruct-Q8_0.gguf; provider = openai
INFO:httpx:HTTP Request: POST http://localhost:8080/v1/chat/completions "HTTP/1.1 200 OK"
[92m11:17:16 - LiteLLM:INFO[0m: utils.py:1213 - Wrapper: Completed Call, calling success_handler
INFO:LiteLLM:Wrapper: Completed Call, calling success_handler
[92m11:17:16 - LiteLLM:INFO[0m: cost_calculator.py:655 - selected model name for cost calculation: openai/Llama-3.2-3B-Instruct-Q8_0.gguf
INFO:LiteLLM:selected model name for cost calculation: openai/Llama-3.2-3B-Instruct-Q8_0.gguf
[92m11:17:16 - LiteLLM:INFO[0m: cost_calculator.py:655 - selected model name for cost calculation: openai/Llama-3.2-3B-Instruct-Q8_0.gguf
INFO:LiteLLM:selected model name for cost calculation: openai/Llama-3.2-3B-Instruct-Q8_0.gguf
[92m11:17:16 - LiteLLM:INFO[0m: cost_calculator.py:655 -

Average Metric: 58.84 / 107 (55.0%):  36%|███▌      | 108/300 [06:03<12:31,  3.91s/it]

[92m11:17:20 - LiteLLM:INFO[0m: utils.py:2991 - 
LiteLLM completion() model= Llama-3.2-3B-Instruct-Q8_0.gguf; provider = openai
INFO:LiteLLM:
LiteLLM completion() model= Llama-3.2-3B-Instruct-Q8_0.gguf; provider = openai
INFO:httpx:HTTP Request: POST http://localhost:8080/v1/chat/completions "HTTP/1.1 200 OK"
[92m11:17:21 - LiteLLM:INFO[0m: utils.py:1213 - Wrapper: Completed Call, calling success_handler
INFO:LiteLLM:Wrapper: Completed Call, calling success_handler
[92m11:17:21 - LiteLLM:INFO[0m: cost_calculator.py:655 - selected model name for cost calculation: openai/Llama-3.2-3B-Instruct-Q8_0.gguf
INFO:LiteLLM:selected model name for cost calculation: openai/Llama-3.2-3B-Instruct-Q8_0.gguf
[92m11:17:21 - LiteLLM:INFO[0m: cost_calculator.py:655 - selected model name for cost calculation: openai/Llama-3.2-3B-Instruct-Q8_0.gguf
[92m11:17:21 - LiteLLM:INFO[0m: cost_calculator.py:655 - selected model name for cost calculation: openai/Llama-3.2-3B-Instruct-Q8_0.gguf
INFO:LiteLLM

Average Metric: 59.50 / 108 (55.1%):  36%|███▋      | 109/300 [06:04<09:45,  3.07s/it]

[92m11:17:21 - LiteLLM:INFO[0m: utils.py:2991 - 
LiteLLM completion() model= Llama-3.2-3B-Instruct-Q8_0.gguf; provider = openai
INFO:LiteLLM:
LiteLLM completion() model= Llama-3.2-3B-Instruct-Q8_0.gguf; provider = openai
INFO:httpx:HTTP Request: POST http://localhost:8080/v1/chat/completions "HTTP/1.1 200 OK"
[92m11:17:24 - LiteLLM:INFO[0m: utils.py:1213 - Wrapper: Completed Call, calling success_handler
INFO:LiteLLM:Wrapper: Completed Call, calling success_handler
[92m11:17:24 - LiteLLM:INFO[0m: cost_calculator.py:655 - selected model name for cost calculation: openai/Llama-3.2-3B-Instruct-Q8_0.gguf
INFO:LiteLLM:selected model name for cost calculation: openai/Llama-3.2-3B-Instruct-Q8_0.gguf
[92m11:17:24 - LiteLLM:INFO[0m: cost_calculator.py:655 - selected model name for cost calculation: openai/Llama-3.2-3B-Instruct-Q8_0.gguf
INFO:LiteLLM:selected model name for cost calculation: openai/Llama-3.2-3B-Instruct-Q8_0.gguf
[92m11:17:24 - LiteLLM:INFO[0m: cost_calculator.py:655 -

Average Metric: 59.50 / 109 (54.6%):  37%|███▋      | 110/300 [06:09<11:15,  3.55s/it]

[92m11:17:26 - LiteLLM:INFO[0m: utils.py:2991 - 
LiteLLM completion() model= Llama-3.2-3B-Instruct-Q8_0.gguf; provider = openai
INFO:LiteLLM:
LiteLLM completion() model= Llama-3.2-3B-Instruct-Q8_0.gguf; provider = openai
INFO:httpx:HTTP Request: POST http://localhost:8080/v1/chat/completions "HTTP/1.1 200 OK"
[92m11:17:27 - LiteLLM:INFO[0m: utils.py:1213 - Wrapper: Completed Call, calling success_handler
INFO:LiteLLM:Wrapper: Completed Call, calling success_handler
[92m11:17:27 - LiteLLM:INFO[0m: cost_calculator.py:655 - selected model name for cost calculation: openai/Llama-3.2-3B-Instruct-Q8_0.gguf
INFO:LiteLLM:selected model name for cost calculation: openai/Llama-3.2-3B-Instruct-Q8_0.gguf
[92m11:17:27 - LiteLLM:INFO[0m: cost_calculator.py:655 - selected model name for cost calculation: openai/Llama-3.2-3B-Instruct-Q8_0.gguf
[92m11:17:27 - LiteLLM:INFO[0m: cost_calculator.py:655 - selected model name for cost calculation: openai/Llama-3.2-3B-Instruct-Q8_0.gguf
INFO:LiteLLM

Average Metric: 60.39 / 110 (54.9%):  37%|███▋      | 111/300 [06:10<08:36,  2.73s/it]

[92m11:17:27 - LiteLLM:INFO[0m: utils.py:2991 - 
LiteLLM completion() model= Llama-3.2-3B-Instruct-Q8_0.gguf; provider = openai
INFO:LiteLLM:
LiteLLM completion() model= Llama-3.2-3B-Instruct-Q8_0.gguf; provider = openai
INFO:httpx:HTTP Request: POST http://localhost:8080/v1/chat/completions "HTTP/1.1 200 OK"
[92m11:17:28 - LiteLLM:INFO[0m: utils.py:1213 - Wrapper: Completed Call, calling success_handler
INFO:LiteLLM:Wrapper: Completed Call, calling success_handler
[92m11:17:28 - LiteLLM:INFO[0m: cost_calculator.py:655 - selected model name for cost calculation: openai/Llama-3.2-3B-Instruct-Q8_0.gguf
INFO:LiteLLM:selected model name for cost calculation: openai/Llama-3.2-3B-Instruct-Q8_0.gguf
[92m11:17:28 - LiteLLM:INFO[0m: cost_calculator.py:655 - selected model name for cost calculation: openai/Llama-3.2-3B-Instruct-Q8_0.gguf
[92m11:17:28 - LiteLLM:INFO[0m: cost_calculator.py:655 - selected model name for cost calculation: openai/Llama-3.2-3B-Instruct-Q8_0.gguf
INFO:LiteLLM

Average Metric: 60.99 / 111 (54.9%):  37%|███▋      | 112/300 [06:10<06:40,  2.13s/it]

[92m11:17:28 - LiteLLM:INFO[0m: utils.py:2991 - 
LiteLLM completion() model= Llama-3.2-3B-Instruct-Q8_0.gguf; provider = openai
INFO:LiteLLM:
LiteLLM completion() model= Llama-3.2-3B-Instruct-Q8_0.gguf; provider = openai
INFO:httpx:HTTP Request: POST http://localhost:8080/v1/chat/completions "HTTP/1.1 200 OK"
[92m11:17:29 - LiteLLM:INFO[0m: utils.py:1213 - Wrapper: Completed Call, calling success_handler
INFO:LiteLLM:Wrapper: Completed Call, calling success_handler
[92m11:17:29 - LiteLLM:INFO[0m: cost_calculator.py:655 - selected model name for cost calculation: openai/Llama-3.2-3B-Instruct-Q8_0.gguf
INFO:LiteLLM:selected model name for cost calculation: openai/Llama-3.2-3B-Instruct-Q8_0.gguf
[92m11:17:29 - LiteLLM:INFO[0m: cost_calculator.py:655 - selected model name for cost calculation: openai/Llama-3.2-3B-Instruct-Q8_0.gguf
INFO:LiteLLM:selected model name for cost calculation: openai/Llama-3.2-3B-Instruct-Q8_0.gguf
[92m11:17:29 - LiteLLM:INFO[0m: cost_calculator.py:655 -

Average Metric: 61.66 / 112 (55.1%):  38%|███▊      | 113/300 [06:12<05:58,  1.92s/it]

[92m11:17:29 - LiteLLM:INFO[0m: utils.py:2991 - 
LiteLLM completion() model= Llama-3.2-3B-Instruct-Q8_0.gguf; provider = openai
INFO:LiteLLM:
LiteLLM completion() model= Llama-3.2-3B-Instruct-Q8_0.gguf; provider = openai
INFO:httpx:HTTP Request: POST http://localhost:8080/v1/chat/completions "HTTP/1.1 200 OK"
[92m11:17:30 - LiteLLM:INFO[0m: utils.py:1213 - Wrapper: Completed Call, calling success_handler
INFO:LiteLLM:Wrapper: Completed Call, calling success_handler
[92m11:17:30 - LiteLLM:INFO[0m: cost_calculator.py:655 - selected model name for cost calculation: openai/Llama-3.2-3B-Instruct-Q8_0.gguf
INFO:LiteLLM:selected model name for cost calculation: openai/Llama-3.2-3B-Instruct-Q8_0.gguf
[92m11:17:30 - LiteLLM:INFO[0m: cost_calculator.py:655 - selected model name for cost calculation: openai/Llama-3.2-3B-Instruct-Q8_0.gguf
INFO:LiteLLM:selected model name for cost calculation: openai/Llama-3.2-3B-Instruct-Q8_0.gguf
[92m11:17:30 - LiteLLM:INFO[0m: cost_calculator.py:655 -

Average Metric: 62.55 / 113 (55.4%):  38%|███▊      | 114/300 [06:13<04:51,  1.57s/it]

[92m11:17:30 - LiteLLM:INFO[0m: utils.py:2991 - 
LiteLLM completion() model= Llama-3.2-3B-Instruct-Q8_0.gguf; provider = openai
INFO:LiteLLM:
LiteLLM completion() model= Llama-3.2-3B-Instruct-Q8_0.gguf; provider = openai
INFO:httpx:HTTP Request: POST http://localhost:8080/v1/chat/completions "HTTP/1.1 200 OK"
[92m11:17:31 - LiteLLM:INFO[0m: utils.py:1213 - Wrapper: Completed Call, calling success_handler
INFO:LiteLLM:Wrapper: Completed Call, calling success_handler
[92m11:17:31 - LiteLLM:INFO[0m: cost_calculator.py:655 - selected model name for cost calculation: openai/Llama-3.2-3B-Instruct-Q8_0.gguf
INFO:LiteLLM:selected model name for cost calculation: openai/Llama-3.2-3B-Instruct-Q8_0.gguf
[92m11:17:31 - LiteLLM:INFO[0m: cost_calculator.py:655 - selected model name for cost calculation: openai/Llama-3.2-3B-Instruct-Q8_0.gguf
INFO:LiteLLM:selected model name for cost calculation: openai/Llama-3.2-3B-Instruct-Q8_0.gguf
[92m11:17:31 - LiteLLM:INFO[0m: cost_calculator.py:655 -

Average Metric: 62.55 / 114 (54.9%):  38%|███▊      | 115/300 [06:33<21:57,  7.12s/it]

[92m11:17:50 - LiteLLM:INFO[0m: utils.py:2991 - 
LiteLLM completion() model= Llama-3.2-3B-Instruct-Q8_0.gguf; provider = openai
INFO:LiteLLM:
LiteLLM completion() model= Llama-3.2-3B-Instruct-Q8_0.gguf; provider = openai
INFO:httpx:HTTP Request: POST http://localhost:8080/v1/chat/completions "HTTP/1.1 200 OK"
[92m11:17:50 - LiteLLM:INFO[0m: utils.py:1213 - Wrapper: Completed Call, calling success_handler
INFO:LiteLLM:Wrapper: Completed Call, calling success_handler
[92m11:17:50 - LiteLLM:INFO[0m: cost_calculator.py:655 - selected model name for cost calculation: openai/Llama-3.2-3B-Instruct-Q8_0.gguf
INFO:LiteLLM:selected model name for cost calculation: openai/Llama-3.2-3B-Instruct-Q8_0.gguf
[92m11:17:50 - LiteLLM:INFO[0m: cost_calculator.py:655 - selected model name for cost calculation: openai/Llama-3.2-3B-Instruct-Q8_0.gguf
[92m11:17:50 - LiteLLM:INFO[0m: cost_calculator.py:655 - selected model name for cost calculation: openai/Llama-3.2-3B-Instruct-Q8_0.gguf
INFO:LiteLLM

Average Metric: 63.21 / 115 (55.0%):  39%|███▊      | 116/300 [06:33<15:25,  5.03s/it]

[92m11:17:50 - LiteLLM:INFO[0m: utils.py:2991 - 
LiteLLM completion() model= Llama-3.2-3B-Instruct-Q8_0.gguf; provider = openai
INFO:LiteLLM:
LiteLLM completion() model= Llama-3.2-3B-Instruct-Q8_0.gguf; provider = openai
INFO:httpx:HTTP Request: POST http://localhost:8080/v1/chat/completions "HTTP/1.1 200 OK"
[92m11:17:51 - LiteLLM:INFO[0m: utils.py:1213 - Wrapper: Completed Call, calling success_handler
INFO:LiteLLM:Wrapper: Completed Call, calling success_handler
[92m11:17:51 - LiteLLM:INFO[0m: cost_calculator.py:655 - selected model name for cost calculation: openai/Llama-3.2-3B-Instruct-Q8_0.gguf
INFO:LiteLLM:selected model name for cost calculation: openai/Llama-3.2-3B-Instruct-Q8_0.gguf
[92m11:17:51 - LiteLLM:INFO[0m: cost_calculator.py:655 - selected model name for cost calculation: openai/Llama-3.2-3B-Instruct-Q8_0.gguf
INFO:LiteLLM:selected model name for cost calculation: openai/Llama-3.2-3B-Instruct-Q8_0.gguf
[92m11:17:51 - LiteLLM:INFO[0m: cost_calculator.py:655 -

Average Metric: 63.88 / 116 (55.1%):  39%|███▉      | 117/300 [06:37<14:20,  4.70s/it]

[92m11:17:54 - LiteLLM:INFO[0m: utils.py:2991 - 
LiteLLM completion() model= Llama-3.2-3B-Instruct-Q8_0.gguf; provider = openai
INFO:LiteLLM:
LiteLLM completion() model= Llama-3.2-3B-Instruct-Q8_0.gguf; provider = openai
INFO:httpx:HTTP Request: POST http://localhost:8080/v1/chat/completions "HTTP/1.1 200 OK"
[92m11:17:57 - LiteLLM:INFO[0m: utils.py:1213 - Wrapper: Completed Call, calling success_handler
INFO:LiteLLM:Wrapper: Completed Call, calling success_handler
[92m11:17:57 - LiteLLM:INFO[0m: cost_calculator.py:655 - selected model name for cost calculation: openai/Llama-3.2-3B-Instruct-Q8_0.gguf
INFO:LiteLLM:selected model name for cost calculation: openai/Llama-3.2-3B-Instruct-Q8_0.gguf
[92m11:17:57 - LiteLLM:INFO[0m: cost_calculator.py:655 - selected model name for cost calculation: openai/Llama-3.2-3B-Instruct-Q8_0.gguf
INFO:LiteLLM:selected model name for cost calculation: openai/Llama-3.2-3B-Instruct-Q8_0.gguf
[92m11:17:57 - LiteLLM:INFO[0m: cost_calculator.py:655 -

Average Metric: 63.88 / 117 (54.6%):  39%|███▉      | 118/300 [06:40<13:00,  4.29s/it]

[92m11:17:57 - LiteLLM:INFO[0m: utils.py:2991 - 
LiteLLM completion() model= Llama-3.2-3B-Instruct-Q8_0.gguf; provider = openai
INFO:LiteLLM:
LiteLLM completion() model= Llama-3.2-3B-Instruct-Q8_0.gguf; provider = openai
INFO:httpx:HTTP Request: POST http://localhost:8080/v1/chat/completions "HTTP/1.1 200 OK"
[92m11:18:00 - LiteLLM:INFO[0m: utils.py:1213 - Wrapper: Completed Call, calling success_handler
INFO:LiteLLM:Wrapper: Completed Call, calling success_handler
[92m11:18:00 - LiteLLM:INFO[0m: cost_calculator.py:655 - selected model name for cost calculation: openai/Llama-3.2-3B-Instruct-Q8_0.gguf
INFO:LiteLLM:selected model name for cost calculation: openai/Llama-3.2-3B-Instruct-Q8_0.gguf
[92m11:18:00 - LiteLLM:INFO[0m: cost_calculator.py:655 - selected model name for cost calculation: openai/Llama-3.2-3B-Instruct-Q8_0.gguf
[92m11:18:00 - LiteLLM:INFO[0m: cost_calculator.py:655 - selected model name for cost calculation: openai/Llama-3.2-3B-Instruct-Q8_0.gguf
INFO:LiteLLM

Average Metric: 64.55 / 118 (54.7%):  40%|███▉      | 119/300 [06:42<11:08,  3.69s/it]

[92m11:18:00 - LiteLLM:INFO[0m: utils.py:2991 - 
LiteLLM completion() model= Llama-3.2-3B-Instruct-Q8_0.gguf; provider = openai
INFO:LiteLLM:
LiteLLM completion() model= Llama-3.2-3B-Instruct-Q8_0.gguf; provider = openai
INFO:httpx:HTTP Request: POST http://localhost:8080/v1/chat/completions "HTTP/1.1 200 OK"
[92m11:18:00 - LiteLLM:INFO[0m: utils.py:1213 - Wrapper: Completed Call, calling success_handler
INFO:LiteLLM:Wrapper: Completed Call, calling success_handler
[92m11:18:00 - LiteLLM:INFO[0m: cost_calculator.py:655 - selected model name for cost calculation: openai/Llama-3.2-3B-Instruct-Q8_0.gguf
INFO:LiteLLM:selected model name for cost calculation: openai/Llama-3.2-3B-Instruct-Q8_0.gguf
[92m11:18:00 - LiteLLM:INFO[0m: cost_calculator.py:655 - selected model name for cost calculation: openai/Llama-3.2-3B-Instruct-Q8_0.gguf
INFO:LiteLLM:selected model name for cost calculation: openai/Llama-3.2-3B-Instruct-Q8_0.gguf
[92m11:18:00 - LiteLLM:INFO[0m: cost_calculator.py:655 -

Average Metric: 65.30 / 119 (54.9%):  40%|████      | 120/300 [06:45<10:11,  3.40s/it]

[92m11:18:02 - LiteLLM:INFO[0m: utils.py:2991 - 
LiteLLM completion() model= Llama-3.2-3B-Instruct-Q8_0.gguf; provider = openai
INFO:LiteLLM:
LiteLLM completion() model= Llama-3.2-3B-Instruct-Q8_0.gguf; provider = openai
INFO:httpx:HTTP Request: POST http://localhost:8080/v1/chat/completions "HTTP/1.1 200 OK"
[92m11:18:03 - LiteLLM:INFO[0m: utils.py:1213 - Wrapper: Completed Call, calling success_handler
INFO:LiteLLM:Wrapper: Completed Call, calling success_handler
[92m11:18:03 - LiteLLM:INFO[0m: cost_calculator.py:655 - selected model name for cost calculation: openai/Llama-3.2-3B-Instruct-Q8_0.gguf
INFO:LiteLLM:selected model name for cost calculation: openai/Llama-3.2-3B-Instruct-Q8_0.gguf
[92m11:18:03 - LiteLLM:INFO[0m: cost_calculator.py:655 - selected model name for cost calculation: openai/Llama-3.2-3B-Instruct-Q8_0.gguf
INFO:LiteLLM:selected model name for cost calculation: openai/Llama-3.2-3B-Instruct-Q8_0.gguf
[92m11:18:03 - LiteLLM:INFO[0m: cost_calculator.py:655 -

Average Metric: 65.96 / 120 (55.0%):  40%|████      | 121/300 [06:48<10:00,  3.36s/it]

[92m11:18:06 - LiteLLM:INFO[0m: utils.py:2991 - 
LiteLLM completion() model= Llama-3.2-3B-Instruct-Q8_0.gguf; provider = openai
INFO:LiteLLM:
LiteLLM completion() model= Llama-3.2-3B-Instruct-Q8_0.gguf; provider = openai
INFO:httpx:HTTP Request: POST http://localhost:8080/v1/chat/completions "HTTP/1.1 200 OK"
[92m11:18:06 - LiteLLM:INFO[0m: utils.py:1213 - Wrapper: Completed Call, calling success_handler
INFO:LiteLLM:Wrapper: Completed Call, calling success_handler
[92m11:18:06 - LiteLLM:INFO[0m: cost_calculator.py:655 - selected model name for cost calculation: openai/Llama-3.2-3B-Instruct-Q8_0.gguf
INFO:LiteLLM:selected model name for cost calculation: openai/Llama-3.2-3B-Instruct-Q8_0.gguf
[92m11:18:06 - LiteLLM:INFO[0m: cost_calculator.py:655 - selected model name for cost calculation: openai/Llama-3.2-3B-Instruct-Q8_0.gguf
[92m11:18:06 - LiteLLM:INFO[0m: cost_calculator.py:655 - selected model name for cost calculation: openai/Llama-3.2-3B-Instruct-Q8_0.gguf
INFO:LiteLLM

Average Metric: 66.63 / 121 (55.1%):  41%|████      | 122/300 [06:51<09:03,  3.05s/it]

[92m11:18:08 - LiteLLM:INFO[0m: utils.py:2991 - 
LiteLLM completion() model= Llama-3.2-3B-Instruct-Q8_0.gguf; provider = openai
INFO:LiteLLM:
LiteLLM completion() model= Llama-3.2-3B-Instruct-Q8_0.gguf; provider = openai
INFO:httpx:HTTP Request: POST http://localhost:8080/v1/chat/completions "HTTP/1.1 200 OK"
[92m11:18:09 - LiteLLM:INFO[0m: utils.py:1213 - Wrapper: Completed Call, calling success_handler
INFO:LiteLLM:Wrapper: Completed Call, calling success_handler
[92m11:18:09 - LiteLLM:INFO[0m: cost_calculator.py:655 - selected model name for cost calculation: openai/Llama-3.2-3B-Instruct-Q8_0.gguf
INFO:LiteLLM:selected model name for cost calculation: openai/Llama-3.2-3B-Instruct-Q8_0.gguf
[92m11:18:09 - LiteLLM:INFO[0m: cost_calculator.py:655 - selected model name for cost calculation: openai/Llama-3.2-3B-Instruct-Q8_0.gguf
INFO:LiteLLM:selected model name for cost calculation: openai/Llama-3.2-3B-Instruct-Q8_0.gguf
[92m11:18:09 - LiteLLM:INFO[0m: cost_calculator.py:655 -

Average Metric: 66.63 / 122 (54.6%):  41%|████      | 123/300 [06:51<07:01,  2.38s/it]

[92m11:18:09 - LiteLLM:INFO[0m: utils.py:2991 - 
LiteLLM completion() model= Llama-3.2-3B-Instruct-Q8_0.gguf; provider = openai
INFO:LiteLLM:
LiteLLM completion() model= Llama-3.2-3B-Instruct-Q8_0.gguf; provider = openai
INFO:httpx:HTTP Request: POST http://localhost:8080/v1/chat/completions "HTTP/1.1 200 OK"
[92m11:18:09 - LiteLLM:INFO[0m: utils.py:1213 - Wrapper: Completed Call, calling success_handler
INFO:LiteLLM:Wrapper: Completed Call, calling success_handler
[92m11:18:09 - LiteLLM:INFO[0m: cost_calculator.py:655 - selected model name for cost calculation: openai/Llama-3.2-3B-Instruct-Q8_0.gguf
INFO:LiteLLM:selected model name for cost calculation: openai/Llama-3.2-3B-Instruct-Q8_0.gguf
[92m11:18:09 - LiteLLM:INFO[0m: cost_calculator.py:655 - selected model name for cost calculation: openai/Llama-3.2-3B-Instruct-Q8_0.gguf
INFO:LiteLLM:selected model name for cost calculation: openai/Llama-3.2-3B-Instruct-Q8_0.gguf
[92m11:18:09 - LiteLLM:INFO[0m: cost_calculator.py:655 -

Average Metric: 67.30 / 123 (54.7%):  41%|████      | 123/300 [06:52<07:01,  2.38s/it]

[92m11:18:09 - LiteLLM:INFO[0m: utils.py:2991 - 
LiteLLM completion() model= Llama-3.2-3B-Instruct-Q8_0.gguf; provider = openai
INFO:LiteLLM:
LiteLLM completion() model= Llama-3.2-3B-Instruct-Q8_0.gguf; provider = openai
INFO:httpx:HTTP Request: POST http://localhost:8080/v1/chat/completions "HTTP/1.1 200 OK"
[92m11:18:10 - LiteLLM:INFO[0m: utils.py:1213 - Wrapper: Completed Call, calling success_handler
INFO:LiteLLM:Wrapper: Completed Call, calling success_handler
[92m11:18:10 - LiteLLM:INFO[0m: cost_calculator.py:655 - selected model name for cost calculation: openai/Llama-3.2-3B-Instruct-Q8_0.gguf
INFO:LiteLLM:selected model name for cost calculation: openai/Llama-3.2-3B-Instruct-Q8_0.gguf
[92m11:18:10 - LiteLLM:INFO[0m: cost_calculator.py:655 - selected model name for cost calculation: openai/Llama-3.2-3B-Instruct-Q8_0.gguf
INFO:LiteLLM:selected model name for cost calculation: openai/Llama-3.2-3B-Instruct-Q8_0.gguf
[92m11:18:10 - LiteLLM:INFO[0m: cost_calculator.py:655 -

Average Metric: 67.30 / 124 (54.3%):  42%|████▏     | 125/300 [06:53<04:33,  1.56s/it]

[92m11:18:10 - LiteLLM:INFO[0m: utils.py:2991 - 
LiteLLM completion() model= Llama-3.2-3B-Instruct-Q8_0.gguf; provider = openai
INFO:LiteLLM:
LiteLLM completion() model= Llama-3.2-3B-Instruct-Q8_0.gguf; provider = openai
INFO:httpx:HTTP Request: POST http://localhost:8080/v1/chat/completions "HTTP/1.1 200 OK"
[92m11:18:12 - LiteLLM:INFO[0m: utils.py:1213 - Wrapper: Completed Call, calling success_handler
INFO:LiteLLM:Wrapper: Completed Call, calling success_handler
[92m11:18:12 - LiteLLM:INFO[0m: cost_calculator.py:655 - selected model name for cost calculation: openai/Llama-3.2-3B-Instruct-Q8_0.gguf
INFO:LiteLLM:selected model name for cost calculation: openai/Llama-3.2-3B-Instruct-Q8_0.gguf
[92m11:18:12 - LiteLLM:INFO[0m: cost_calculator.py:655 - selected model name for cost calculation: openai/Llama-3.2-3B-Instruct-Q8_0.gguf
INFO:LiteLLM:selected model name for cost calculation: openai/Llama-3.2-3B-Instruct-Q8_0.gguf
[92m11:18:12 - LiteLLM:INFO[0m: cost_calculator.py:655 -

Average Metric: 68.19 / 125 (54.5%):  42%|████▏     | 126/300 [06:54<04:40,  1.61s/it]

[92m11:18:12 - LiteLLM:INFO[0m: utils.py:2991 - 
LiteLLM completion() model= Llama-3.2-3B-Instruct-Q8_0.gguf; provider = openai
INFO:LiteLLM:
LiteLLM completion() model= Llama-3.2-3B-Instruct-Q8_0.gguf; provider = openai
INFO:httpx:HTTP Request: POST http://localhost:8080/v1/chat/completions "HTTP/1.1 200 OK"
[92m11:18:12 - LiteLLM:INFO[0m: utils.py:1213 - Wrapper: Completed Call, calling success_handler
INFO:LiteLLM:Wrapper: Completed Call, calling success_handler
[92m11:18:12 - LiteLLM:INFO[0m: cost_calculator.py:655 - selected model name for cost calculation: openai/Llama-3.2-3B-Instruct-Q8_0.gguf
INFO:LiteLLM:selected model name for cost calculation: openai/Llama-3.2-3B-Instruct-Q8_0.gguf
[92m11:18:12 - LiteLLM:INFO[0m: cost_calculator.py:655 - selected model name for cost calculation: openai/Llama-3.2-3B-Instruct-Q8_0.gguf
[92m11:18:12 - LiteLLM:INFO[0m: cost_calculator.py:655 - selected model name for cost calculation: openai/Llama-3.2-3B-Instruct-Q8_0.gguf
INFO:LiteLLM

Average Metric: 68.51 / 126 (54.4%):  42%|████▏     | 127/300 [07:01<08:37,  2.99s/it]

[92m11:18:19 - LiteLLM:INFO[0m: utils.py:2991 - 
LiteLLM completion() model= Llama-3.2-3B-Instruct-Q8_0.gguf; provider = openai
INFO:LiteLLM:
LiteLLM completion() model= Llama-3.2-3B-Instruct-Q8_0.gguf; provider = openai
INFO:httpx:HTTP Request: POST http://localhost:8080/v1/chat/completions "HTTP/1.1 200 OK"
[92m11:18:20 - LiteLLM:INFO[0m: utils.py:1213 - Wrapper: Completed Call, calling success_handler
INFO:LiteLLM:Wrapper: Completed Call, calling success_handler
[92m11:18:20 - LiteLLM:INFO[0m: cost_calculator.py:655 - selected model name for cost calculation: openai/Llama-3.2-3B-Instruct-Q8_0.gguf
INFO:LiteLLM:selected model name for cost calculation: openai/Llama-3.2-3B-Instruct-Q8_0.gguf
[92m11:18:20 - LiteLLM:INFO[0m: cost_calculator.py:655 - selected model name for cost calculation: openai/Llama-3.2-3B-Instruct-Q8_0.gguf
INFO:LiteLLM:selected model name for cost calculation: openai/Llama-3.2-3B-Instruct-Q8_0.gguf
[92m11:18:20 - LiteLLM:INFO[0m: cost_calculator.py:655 -

Average Metric: 69.39 / 127 (54.6%):  43%|████▎     | 128/300 [07:03<07:10,  2.50s/it]

[92m11:18:20 - LiteLLM:INFO[0m: utils.py:2991 - 
LiteLLM completion() model= Llama-3.2-3B-Instruct-Q8_0.gguf; provider = openai
INFO:LiteLLM:
LiteLLM completion() model= Llama-3.2-3B-Instruct-Q8_0.gguf; provider = openai
INFO:httpx:HTTP Request: POST http://localhost:8080/v1/chat/completions "HTTP/1.1 200 OK"
[92m11:18:20 - LiteLLM:INFO[0m: utils.py:1213 - Wrapper: Completed Call, calling success_handler
INFO:LiteLLM:Wrapper: Completed Call, calling success_handler
[92m11:18:20 - LiteLLM:INFO[0m: cost_calculator.py:655 - selected model name for cost calculation: openai/Llama-3.2-3B-Instruct-Q8_0.gguf
INFO:LiteLLM:selected model name for cost calculation: openai/Llama-3.2-3B-Instruct-Q8_0.gguf
[92m11:18:20 - LiteLLM:INFO[0m: cost_calculator.py:655 - selected model name for cost calculation: openai/Llama-3.2-3B-Instruct-Q8_0.gguf
INFO:LiteLLM:selected model name for cost calculation: openai/Llama-3.2-3B-Instruct-Q8_0.gguf
[92m11:18:20 - LiteLLM:INFO[0m: cost_calculator.py:655 -

Average Metric: 70.25 / 128 (54.9%):  43%|████▎     | 129/300 [07:03<05:18,  1.86s/it]

[92m11:18:20 - LiteLLM:INFO[0m: utils.py:2991 - 
LiteLLM completion() model= Llama-3.2-3B-Instruct-Q8_0.gguf; provider = openai
INFO:LiteLLM:
LiteLLM completion() model= Llama-3.2-3B-Instruct-Q8_0.gguf; provider = openai
INFO:httpx:HTTP Request: POST http://localhost:8080/v1/chat/completions "HTTP/1.1 200 OK"
[92m11:18:21 - LiteLLM:INFO[0m: utils.py:1213 - Wrapper: Completed Call, calling success_handler
INFO:LiteLLM:Wrapper: Completed Call, calling success_handler
[92m11:18:21 - LiteLLM:INFO[0m: cost_calculator.py:655 - selected model name for cost calculation: openai/Llama-3.2-3B-Instruct-Q8_0.gguf
INFO:LiteLLM:selected model name for cost calculation: openai/Llama-3.2-3B-Instruct-Q8_0.gguf
[92m11:18:21 - LiteLLM:INFO[0m: cost_calculator.py:655 - selected model name for cost calculation: openai/Llama-3.2-3B-Instruct-Q8_0.gguf
[92m11:18:21 - LiteLLM:INFO[0m: cost_calculator.py:655 - selected model name for cost calculation: openai/Llama-3.2-3B-Instruct-Q8_0.gguf
INFO:LiteLLM

Average Metric: 70.25 / 129 (54.5%):  43%|████▎     | 130/300 [07:04<04:43,  1.67s/it]

[92m11:18:21 - LiteLLM:INFO[0m: utils.py:2991 - 
LiteLLM completion() model= Llama-3.2-3B-Instruct-Q8_0.gguf; provider = openai
INFO:LiteLLM:
LiteLLM completion() model= Llama-3.2-3B-Instruct-Q8_0.gguf; provider = openai
INFO:httpx:HTTP Request: POST http://localhost:8080/v1/chat/completions "HTTP/1.1 200 OK"
[92m11:18:22 - LiteLLM:INFO[0m: utils.py:1213 - Wrapper: Completed Call, calling success_handler
INFO:LiteLLM:Wrapper: Completed Call, calling success_handler
[92m11:18:22 - LiteLLM:INFO[0m: cost_calculator.py:655 - selected model name for cost calculation: openai/Llama-3.2-3B-Instruct-Q8_0.gguf
INFO:LiteLLM:selected model name for cost calculation: openai/Llama-3.2-3B-Instruct-Q8_0.gguf
[92m11:18:22 - LiteLLM:INFO[0m: cost_calculator.py:655 - selected model name for cost calculation: openai/Llama-3.2-3B-Instruct-Q8_0.gguf
INFO:LiteLLM:selected model name for cost calculation: openai/Llama-3.2-3B-Instruct-Q8_0.gguf
[92m11:18:22 - LiteLLM:INFO[0m: cost_calculator.py:655 -

Average Metric: 70.92 / 130 (54.6%):  44%|████▎     | 131/300 [07:06<05:10,  1.84s/it]

[92m11:18:23 - LiteLLM:INFO[0m: utils.py:2991 - 
LiteLLM completion() model= Llama-3.2-3B-Instruct-Q8_0.gguf; provider = openai
INFO:LiteLLM:
LiteLLM completion() model= Llama-3.2-3B-Instruct-Q8_0.gguf; provider = openai
INFO:httpx:HTTP Request: POST http://localhost:8080/v1/chat/completions "HTTP/1.1 200 OK"
[92m11:18:24 - LiteLLM:INFO[0m: utils.py:1213 - Wrapper: Completed Call, calling success_handler
INFO:LiteLLM:Wrapper: Completed Call, calling success_handler
[92m11:18:24 - LiteLLM:INFO[0m: cost_calculator.py:655 - selected model name for cost calculation: openai/Llama-3.2-3B-Instruct-Q8_0.gguf
INFO:LiteLLM:selected model name for cost calculation: openai/Llama-3.2-3B-Instruct-Q8_0.gguf
[92m11:18:24 - LiteLLM:INFO[0m: cost_calculator.py:655 - selected model name for cost calculation: openai/Llama-3.2-3B-Instruct-Q8_0.gguf
INFO:LiteLLM:selected model name for cost calculation: openai/Llama-3.2-3B-Instruct-Q8_0.gguf
[92m11:18:24 - LiteLLM:INFO[0m: cost_calculator.py:655 -

Average Metric: 71.58 / 131 (54.6%):  44%|████▍     | 132/300 [07:17<12:29,  4.46s/it]

[92m11:18:34 - LiteLLM:INFO[0m: utils.py:2991 - 
LiteLLM completion() model= Llama-3.2-3B-Instruct-Q8_0.gguf; provider = openai
INFO:LiteLLM:
LiteLLM completion() model= Llama-3.2-3B-Instruct-Q8_0.gguf; provider = openai
[92m11:18:34 - LiteLLM:INFO[0m: utils.py:2991 - 
LiteLLM completion() model= Llama-3.2-3B-Instruct-Q8_0.gguf; provider = openai
INFO:LiteLLM:
LiteLLM completion() model= Llama-3.2-3B-Instruct-Q8_0.gguf; provider = openai
INFO:httpx:HTTP Request: POST http://localhost:8080/v1/chat/completions "HTTP/1.1 200 OK"
[92m11:18:37 - LiteLLM:INFO[0m: utils.py:1213 - Wrapper: Completed Call, calling success_handler
INFO:LiteLLM:Wrapper: Completed Call, calling success_handler
[92m11:18:37 - LiteLLM:INFO[0m: cost_calculator.py:655 - selected model name for cost calculation: openai/Llama-3.2-3B-Instruct-Q8_0.gguf
INFO:LiteLLM:selected model name for cost calculation: openai/Llama-3.2-3B-Instruct-Q8_0.gguf
[92m11:18:37 - LiteLLM:INFO[0m: cost_calculator.py:655 - selected m

Average Metric: 72.25 / 132 (54.7%):  44%|████▍     | 133/300 [07:38<26:03,  9.36s/it]

[92m11:18:55 - LiteLLM:INFO[0m: utils.py:2991 - 
LiteLLM completion() model= Llama-3.2-3B-Instruct-Q8_0.gguf; provider = openai
INFO:LiteLLM:
LiteLLM completion() model= Llama-3.2-3B-Instruct-Q8_0.gguf; provider = openai
INFO:httpx:HTTP Request: POST http://localhost:8080/v1/chat/completions "HTTP/1.1 200 OK"
[92m11:18:56 - LiteLLM:INFO[0m: utils.py:1213 - Wrapper: Completed Call, calling success_handler
INFO:LiteLLM:Wrapper: Completed Call, calling success_handler
[92m11:18:56 - LiteLLM:INFO[0m: cost_calculator.py:655 - selected model name for cost calculation: openai/Llama-3.2-3B-Instruct-Q8_0.gguf
INFO:LiteLLM:selected model name for cost calculation: openai/Llama-3.2-3B-Instruct-Q8_0.gguf
[92m11:18:56 - LiteLLM:INFO[0m: cost_calculator.py:655 - selected model name for cost calculation: openai/Llama-3.2-3B-Instruct-Q8_0.gguf
[92m11:18:56 - LiteLLM:INFO[0m: cost_calculator.py:655 - selected model name for cost calculation: openai/Llama-3.2-3B-Instruct-Q8_0.gguf
INFO:LiteLLM

Average Metric: 73.11 / 133 (55.0%):  45%|████▍     | 134/300 [07:43<22:41,  8.20s/it]

[92m11:19:01 - LiteLLM:INFO[0m: utils.py:2991 - 
LiteLLM completion() model= Llama-3.2-3B-Instruct-Q8_0.gguf; provider = openai
INFO:LiteLLM:
LiteLLM completion() model= Llama-3.2-3B-Instruct-Q8_0.gguf; provider = openai
INFO:httpx:HTTP Request: POST http://localhost:8080/v1/chat/completions "HTTP/1.1 200 OK"
[92m11:19:01 - LiteLLM:INFO[0m: utils.py:1213 - Wrapper: Completed Call, calling success_handler
INFO:LiteLLM:Wrapper: Completed Call, calling success_handler
[92m11:19:01 - LiteLLM:INFO[0m: cost_calculator.py:655 - selected model name for cost calculation: openai/Llama-3.2-3B-Instruct-Q8_0.gguf
INFO:LiteLLM:selected model name for cost calculation: openai/Llama-3.2-3B-Instruct-Q8_0.gguf
[92m11:19:01 - LiteLLM:INFO[0m: cost_calculator.py:655 - selected model name for cost calculation: openai/Llama-3.2-3B-Instruct-Q8_0.gguf
INFO:LiteLLM:selected model name for cost calculation: openai/Llama-3.2-3B-Instruct-Q8_0.gguf
[92m11:19:01 - LiteLLM:INFO[0m: cost_calculator.py:655 -

Average Metric: 73.77 / 134 (55.1%):  45%|████▌     | 135/300 [07:47<18:58,  6.90s/it]

[92m11:19:05 - LiteLLM:INFO[0m: utils.py:2991 - 
LiteLLM completion() model= Llama-3.2-3B-Instruct-Q8_0.gguf; provider = openai
INFO:LiteLLM:
LiteLLM completion() model= Llama-3.2-3B-Instruct-Q8_0.gguf; provider = openai
INFO:httpx:HTTP Request: POST http://localhost:8080/v1/chat/completions "HTTP/1.1 200 OK"
[92m11:19:07 - LiteLLM:INFO[0m: utils.py:1213 - Wrapper: Completed Call, calling success_handler
INFO:LiteLLM:Wrapper: Completed Call, calling success_handler
[92m11:19:07 - LiteLLM:INFO[0m: cost_calculator.py:655 - selected model name for cost calculation: openai/Llama-3.2-3B-Instruct-Q8_0.gguf
INFO:LiteLLM:selected model name for cost calculation: openai/Llama-3.2-3B-Instruct-Q8_0.gguf
[92m11:19:07 - LiteLLM:INFO[0m: cost_calculator.py:655 - selected model name for cost calculation: openai/Llama-3.2-3B-Instruct-Q8_0.gguf
INFO:LiteLLM:selected model name for cost calculation: openai/Llama-3.2-3B-Instruct-Q8_0.gguf
[92m11:19:07 - LiteLLM:INFO[0m: cost_calculator.py:655 -

Average Metric: 74.44 / 135 (55.1%):  45%|████▌     | 136/300 [07:50<15:01,  5.50s/it]

[92m11:19:07 - LiteLLM:INFO[0m: utils.py:2991 - 
LiteLLM completion() model= Llama-3.2-3B-Instruct-Q8_0.gguf; provider = openai
INFO:LiteLLM:
LiteLLM completion() model= Llama-3.2-3B-Instruct-Q8_0.gguf; provider = openai
INFO:httpx:HTTP Request: POST http://localhost:8080/v1/chat/completions "HTTP/1.1 200 OK"
[92m11:19:09 - LiteLLM:INFO[0m: utils.py:1213 - Wrapper: Completed Call, calling success_handler
INFO:LiteLLM:Wrapper: Completed Call, calling success_handler
[92m11:19:09 - LiteLLM:INFO[0m: cost_calculator.py:655 - selected model name for cost calculation: openai/Llama-3.2-3B-Instruct-Q8_0.gguf
INFO:LiteLLM:selected model name for cost calculation: openai/Llama-3.2-3B-Instruct-Q8_0.gguf
[92m11:19:09 - LiteLLM:INFO[0m: cost_calculator.py:655 - selected model name for cost calculation: openai/Llama-3.2-3B-Instruct-Q8_0.gguf
[92m11:19:09 - LiteLLM:INFO[0m: cost_calculator.py:655 - selected model name for cost calculation: openai/Llama-3.2-3B-Instruct-Q8_0.gguf
INFO:LiteLLM

Average Metric: 75.33 / 136 (55.4%):  46%|████▌     | 137/300 [07:52<12:19,  4.54s/it]

[92m11:19:09 - LiteLLM:INFO[0m: utils.py:2991 - 
LiteLLM completion() model= Llama-3.2-3B-Instruct-Q8_0.gguf; provider = openai
INFO:LiteLLM:
LiteLLM completion() model= Llama-3.2-3B-Instruct-Q8_0.gguf; provider = openai
INFO:httpx:HTTP Request: POST http://localhost:8080/v1/chat/completions "HTTP/1.1 200 OK"
[92m11:19:09 - LiteLLM:INFO[0m: utils.py:1213 - Wrapper: Completed Call, calling success_handler
INFO:LiteLLM:Wrapper: Completed Call, calling success_handler
[92m11:19:09 - LiteLLM:INFO[0m: cost_calculator.py:655 - selected model name for cost calculation: openai/Llama-3.2-3B-Instruct-Q8_0.gguf
INFO:LiteLLM:selected model name for cost calculation: openai/Llama-3.2-3B-Instruct-Q8_0.gguf
[92m11:19:09 - LiteLLM:INFO[0m: cost_calculator.py:655 - selected model name for cost calculation: openai/Llama-3.2-3B-Instruct-Q8_0.gguf
INFO:LiteLLM:selected model name for cost calculation: openai/Llama-3.2-3B-Instruct-Q8_0.gguf
[92m11:19:09 - LiteLLM:INFO[0m: cost_calculator.py:655 -

Average Metric: 76.00 / 137 (55.5%):  46%|████▌     | 138/300 [07:54<10:05,  3.74s/it]

[92m11:19:11 - LiteLLM:INFO[0m: utils.py:2991 - 
LiteLLM completion() model= Llama-3.2-3B-Instruct-Q8_0.gguf; provider = openai
INFO:LiteLLM:
LiteLLM completion() model= Llama-3.2-3B-Instruct-Q8_0.gguf; provider = openai
INFO:httpx:HTTP Request: POST http://localhost:8080/v1/chat/completions "HTTP/1.1 200 OK"
[92m11:19:13 - LiteLLM:INFO[0m: utils.py:1213 - Wrapper: Completed Call, calling success_handler
INFO:LiteLLM:Wrapper: Completed Call, calling success_handler
[92m11:19:13 - LiteLLM:INFO[0m: cost_calculator.py:655 - selected model name for cost calculation: openai/Llama-3.2-3B-Instruct-Q8_0.gguf
INFO:LiteLLM:selected model name for cost calculation: openai/Llama-3.2-3B-Instruct-Q8_0.gguf
[92m11:19:13 - LiteLLM:INFO[0m: cost_calculator.py:655 - selected model name for cost calculation: openai/Llama-3.2-3B-Instruct-Q8_0.gguf
INFO:LiteLLM:selected model name for cost calculation: openai/Llama-3.2-3B-Instruct-Q8_0.gguf
[92m11:19:13 - LiteLLM:INFO[0m: cost_calculator.py:655 -

Average Metric: 76.84 / 138 (55.7%):  46%|████▋     | 139/300 [07:55<08:25,  3.14s/it]

[92m11:19:13 - LiteLLM:INFO[0m: utils.py:2991 - 
LiteLLM completion() model= Llama-3.2-3B-Instruct-Q8_0.gguf; provider = openai
INFO:LiteLLM:
LiteLLM completion() model= Llama-3.2-3B-Instruct-Q8_0.gguf; provider = openai
INFO:httpx:HTTP Request: POST http://localhost:8080/v1/chat/completions "HTTP/1.1 200 OK"
[92m11:19:13 - LiteLLM:INFO[0m: utils.py:1213 - Wrapper: Completed Call, calling success_handler
INFO:LiteLLM:Wrapper: Completed Call, calling success_handler
[92m11:19:13 - LiteLLM:INFO[0m: cost_calculator.py:655 - selected model name for cost calculation: openai/Llama-3.2-3B-Instruct-Q8_0.gguf
INFO:LiteLLM:selected model name for cost calculation: openai/Llama-3.2-3B-Instruct-Q8_0.gguf
[92m11:19:13 - LiteLLM:INFO[0m: cost_calculator.py:655 - selected model name for cost calculation: openai/Llama-3.2-3B-Instruct-Q8_0.gguf
INFO:LiteLLM:selected model name for cost calculation: openai/Llama-3.2-3B-Instruct-Q8_0.gguf
[92m11:19:13 - LiteLLM:INFO[0m: cost_calculator.py:655 -

Average Metric: 77.51 / 139 (55.8%):  47%|████▋     | 140/300 [07:56<05:58,  2.24s/it]

[92m11:19:13 - LiteLLM:INFO[0m: utils.py:2991 - 
LiteLLM completion() model= Llama-3.2-3B-Instruct-Q8_0.gguf; provider = openai
INFO:LiteLLM:
LiteLLM completion() model= Llama-3.2-3B-Instruct-Q8_0.gguf; provider = openai
INFO:httpx:HTTP Request: POST http://localhost:8080/v1/chat/completions "HTTP/1.1 200 OK"
[92m11:19:14 - LiteLLM:INFO[0m: utils.py:1213 - Wrapper: Completed Call, calling success_handler
INFO:LiteLLM:Wrapper: Completed Call, calling success_handler
[92m11:19:14 - LiteLLM:INFO[0m: cost_calculator.py:655 - selected model name for cost calculation: openai/Llama-3.2-3B-Instruct-Q8_0.gguf
INFO:LiteLLM:selected model name for cost calculation: openai/Llama-3.2-3B-Instruct-Q8_0.gguf
[92m11:19:14 - LiteLLM:INFO[0m: cost_calculator.py:655 - selected model name for cost calculation: openai/Llama-3.2-3B-Instruct-Q8_0.gguf
[92m11:19:14 - LiteLLM:INFO[0m: cost_calculator.py:655 - selected model name for cost calculation: openai/Llama-3.2-3B-Instruct-Q8_0.gguf
INFO:LiteLLM

Average Metric: 78.13 / 140 (55.8%):  47%|████▋     | 141/300 [07:58<06:16,  2.37s/it]

[92m11:19:16 - LiteLLM:INFO[0m: utils.py:2991 - 
LiteLLM completion() model= Llama-3.2-3B-Instruct-Q8_0.gguf; provider = openai
INFO:LiteLLM:
LiteLLM completion() model= Llama-3.2-3B-Instruct-Q8_0.gguf; provider = openai
INFO:httpx:HTTP Request: POST http://localhost:8080/v1/chat/completions "HTTP/1.1 200 OK"
[92m11:19:17 - LiteLLM:INFO[0m: utils.py:1213 - Wrapper: Completed Call, calling success_handler
INFO:LiteLLM:Wrapper: Completed Call, calling success_handler
[92m11:19:17 - LiteLLM:INFO[0m: cost_calculator.py:655 - selected model name for cost calculation: openai/Llama-3.2-3B-Instruct-Q8_0.gguf
INFO:LiteLLM:selected model name for cost calculation: openai/Llama-3.2-3B-Instruct-Q8_0.gguf
[92m11:19:17 - LiteLLM:INFO[0m: cost_calculator.py:655 - selected model name for cost calculation: openai/Llama-3.2-3B-Instruct-Q8_0.gguf
[92m11:19:17 - LiteLLM:INFO[0m: cost_calculator.py:655 - selected model name for cost calculation: openai/Llama-3.2-3B-Instruct-Q8_0.gguf
INFO:LiteLLM

Average Metric: 78.97 / 141 (56.0%):  47%|████▋     | 142/300 [08:01<06:35,  2.50s/it]

[92m11:19:18 - LiteLLM:INFO[0m: utils.py:2991 - 
LiteLLM completion() model= Llama-3.2-3B-Instruct-Q8_0.gguf; provider = openai
INFO:LiteLLM:
LiteLLM completion() model= Llama-3.2-3B-Instruct-Q8_0.gguf; provider = openai
INFO:httpx:HTTP Request: POST http://localhost:8080/v1/chat/completions "HTTP/1.1 200 OK"
[92m11:19:19 - LiteLLM:INFO[0m: utils.py:1213 - Wrapper: Completed Call, calling success_handler
INFO:LiteLLM:Wrapper: Completed Call, calling success_handler
[92m11:19:19 - LiteLLM:INFO[0m: cost_calculator.py:655 - selected model name for cost calculation: openai/Llama-3.2-3B-Instruct-Q8_0.gguf
INFO:LiteLLM:selected model name for cost calculation: openai/Llama-3.2-3B-Instruct-Q8_0.gguf
[92m11:19:19 - LiteLLM:INFO[0m: cost_calculator.py:655 - selected model name for cost calculation: openai/Llama-3.2-3B-Instruct-Q8_0.gguf
INFO:LiteLLM:selected model name for cost calculation: openai/Llama-3.2-3B-Instruct-Q8_0.gguf
[92m11:19:19 - LiteLLM:INFO[0m: cost_calculator.py:655 -

Average Metric: 79.82 / 142 (56.2%):  48%|████▊     | 143/300 [08:04<06:37,  2.53s/it]

[92m11:19:21 - LiteLLM:INFO[0m: utils.py:2991 - 
LiteLLM completion() model= Llama-3.2-3B-Instruct-Q8_0.gguf; provider = openai
INFO:LiteLLM:
LiteLLM completion() model= Llama-3.2-3B-Instruct-Q8_0.gguf; provider = openai
INFO:httpx:HTTP Request: POST http://localhost:8080/v1/chat/completions "HTTP/1.1 200 OK"
[92m11:19:22 - LiteLLM:INFO[0m: utils.py:1213 - Wrapper: Completed Call, calling success_handler
INFO:LiteLLM:Wrapper: Completed Call, calling success_handler
[92m11:19:22 - LiteLLM:INFO[0m: cost_calculator.py:655 - selected model name for cost calculation: openai/Llama-3.2-3B-Instruct-Q8_0.gguf
INFO:LiteLLM:selected model name for cost calculation: openai/Llama-3.2-3B-Instruct-Q8_0.gguf
[92m11:19:22 - LiteLLM:INFO[0m: cost_calculator.py:655 - selected model name for cost calculation: openai/Llama-3.2-3B-Instruct-Q8_0.gguf
[92m11:19:22 - LiteLLM:INFO[0m: cost_calculator.py:655 - selected model name for cost calculation: openai/Llama-3.2-3B-Instruct-Q8_0.gguf
INFO:LiteLLM

Average Metric: 80.67 / 143 (56.4%):  48%|████▊     | 144/300 [08:09<08:38,  3.33s/it]

[92m11:19:26 - LiteLLM:INFO[0m: utils.py:2991 - 
LiteLLM completion() model= Llama-3.2-3B-Instruct-Q8_0.gguf; provider = openai
INFO:LiteLLM:
LiteLLM completion() model= Llama-3.2-3B-Instruct-Q8_0.gguf; provider = openai
INFO:httpx:HTTP Request: POST http://localhost:8080/v1/chat/completions "HTTP/1.1 200 OK"
[92m11:19:28 - LiteLLM:INFO[0m: utils.py:1213 - Wrapper: Completed Call, calling success_handler
INFO:LiteLLM:Wrapper: Completed Call, calling success_handler
[92m11:19:28 - LiteLLM:INFO[0m: cost_calculator.py:655 - selected model name for cost calculation: openai/Llama-3.2-3B-Instruct-Q8_0.gguf
INFO:LiteLLM:selected model name for cost calculation: openai/Llama-3.2-3B-Instruct-Q8_0.gguf
[92m11:19:28 - LiteLLM:INFO[0m: cost_calculator.py:655 - selected model name for cost calculation: openai/Llama-3.2-3B-Instruct-Q8_0.gguf
INFO:LiteLLM:selected model name for cost calculation: openai/Llama-3.2-3B-Instruct-Q8_0.gguf
[92m11:19:28 - LiteLLM:INFO[0m: cost_calculator.py:655 -

Average Metric: 81.51 / 144 (56.6%):  48%|████▊     | 145/300 [08:11<07:36,  2.95s/it]

[92m11:19:28 - LiteLLM:INFO[0m: utils.py:2991 - 
LiteLLM completion() model= Llama-3.2-3B-Instruct-Q8_0.gguf; provider = openai
INFO:LiteLLM:
LiteLLM completion() model= Llama-3.2-3B-Instruct-Q8_0.gguf; provider = openai
INFO:httpx:HTTP Request: POST http://localhost:8080/v1/chat/completions "HTTP/1.1 200 OK"
[92m11:19:30 - LiteLLM:INFO[0m: utils.py:1213 - Wrapper: Completed Call, calling success_handler
INFO:LiteLLM:Wrapper: Completed Call, calling success_handler
[92m11:19:30 - LiteLLM:INFO[0m: cost_calculator.py:655 - selected model name for cost calculation: openai/Llama-3.2-3B-Instruct-Q8_0.gguf
INFO:LiteLLM:selected model name for cost calculation: openai/Llama-3.2-3B-Instruct-Q8_0.gguf
[92m11:19:30 - LiteLLM:INFO[0m: cost_calculator.py:655 - selected model name for cost calculation: openai/Llama-3.2-3B-Instruct-Q8_0.gguf
INFO:LiteLLM:selected model name for cost calculation: openai/Llama-3.2-3B-Instruct-Q8_0.gguf
[92m11:19:30 - LiteLLM:INFO[0m: cost_calculator.py:655 -

Average Metric: 82.18 / 145 (56.7%):  49%|████▊     | 146/300 [08:15<08:28,  3.30s/it]

[92m11:19:32 - LiteLLM:INFO[0m: utils.py:2991 - 
LiteLLM completion() model= Llama-3.2-3B-Instruct-Q8_0.gguf; provider = openai
INFO:LiteLLM:
LiteLLM completion() model= Llama-3.2-3B-Instruct-Q8_0.gguf; provider = openai
INFO:httpx:HTTP Request: POST http://localhost:8080/v1/chat/completions "HTTP/1.1 200 OK"
[92m11:19:33 - LiteLLM:INFO[0m: utils.py:1213 - Wrapper: Completed Call, calling success_handler
INFO:LiteLLM:Wrapper: Completed Call, calling success_handler
[92m11:19:33 - LiteLLM:INFO[0m: cost_calculator.py:655 - selected model name for cost calculation: openai/Llama-3.2-3B-Instruct-Q8_0.gguf
INFO:LiteLLM:selected model name for cost calculation: openai/Llama-3.2-3B-Instruct-Q8_0.gguf
[92m11:19:33 - LiteLLM:INFO[0m: cost_calculator.py:655 - selected model name for cost calculation: openai/Llama-3.2-3B-Instruct-Q8_0.gguf
[92m11:19:33 - LiteLLM:INFO[0m: cost_calculator.py:655 - selected model name for cost calculation: openai/Llama-3.2-3B-Instruct-Q8_0.gguf
INFO:LiteLLM

Average Metric: 82.85 / 146 (56.7%):  49%|████▉     | 147/300 [08:20<09:22,  3.68s/it]

[92m11:19:37 - LiteLLM:INFO[0m: utils.py:2991 - 
LiteLLM completion() model= Llama-3.2-3B-Instruct-Q8_0.gguf; provider = openai
INFO:LiteLLM:
LiteLLM completion() model= Llama-3.2-3B-Instruct-Q8_0.gguf; provider = openai
INFO:httpx:HTTP Request: POST http://localhost:8080/v1/chat/completions "HTTP/1.1 200 OK"
[92m11:19:39 - LiteLLM:INFO[0m: utils.py:1213 - Wrapper: Completed Call, calling success_handler
INFO:LiteLLM:Wrapper: Completed Call, calling success_handler
[92m11:19:39 - LiteLLM:INFO[0m: cost_calculator.py:655 - selected model name for cost calculation: openai/Llama-3.2-3B-Instruct-Q8_0.gguf
INFO:LiteLLM:selected model name for cost calculation: openai/Llama-3.2-3B-Instruct-Q8_0.gguf
[92m11:19:39 - LiteLLM:INFO[0m: cost_calculator.py:655 - selected model name for cost calculation: openai/Llama-3.2-3B-Instruct-Q8_0.gguf
INFO:LiteLLM:selected model name for cost calculation: openai/Llama-3.2-3B-Instruct-Q8_0.gguf
[92m11:19:39 - LiteLLM:INFO[0m: cost_calculator.py:655 -

In [None]:
# Define an evaluator that we can re-use.
#evaluate = dspy.Evaluate(devset=devset, metric=metric, num_threads=24,
#                         display_progress=True, display_table=2)

# Evaluate the Chain-of-Thought program.
#evaluate(cot)

  0%|          | 0/300 [00:00<?, ?it/s]

[92m11:07:07 - LiteLLM:INFO[0m: utils.py:2991 - 
LiteLLM completion() model= Llama-3.2-3B-Instruct-Q8_0.gguf; provider = openai
INFO:LiteLLM:
LiteLLM completion() model= Llama-3.2-3B-Instruct-Q8_0.gguf; provider = openai
[92m11:07:07 - LiteLLM:INFO[0m: utils.py:2991 - 
LiteLLM completion() model= Llama-3.2-3B-Instruct-Q8_0.gguf; provider = openai
INFO:LiteLLM:
LiteLLM completion() model= Llama-3.2-3B-Instruct-Q8_0.gguf; provider = openai
[92m11:07:07 - LiteLLM:INFO[0m: utils.py:2991 - 
LiteLLM completion() model= Llama-3.2-3B-Instruct-Q8_0.gguf; provider = openai
INFO:LiteLLM:
LiteLLM completion() model= Llama-3.2-3B-Instruct-Q8_0.gguf; provider = openai
[92m11:07:07 - LiteLLM:INFO[0m: utils.py:2991 - 
LiteLLM completion() model= Llama-3.2-3B-Instruct-Q8_0.gguf; provider = openai
INFO:LiteLLM:
LiteLLM completion() model= Llama-3.2-3B-Instruct-Q8_0.gguf; provider = openai
[92m11:07:07 - LiteLLM:INFO[0m: utils.py:2991 - 
LiteLLM completion() model= Llama-3.2-3B-Instruct-Q8_0.gg

Average Metric: 0.00 / 1 (0.0%):   0%|          | 1/300 [00:46<3:51:57, 46.55s/it]

[92m11:07:53 - LiteLLM:INFO[0m: utils.py:2991 - 
LiteLLM completion() model= Llama-3.2-3B-Instruct-Q8_0.gguf; provider = openai
INFO:LiteLLM:
LiteLLM completion() model= Llama-3.2-3B-Instruct-Q8_0.gguf; provider = openai
INFO:httpx:HTTP Request: POST http://localhost:8080/v1/chat/completions "HTTP/1.1 200 OK"
[92m11:07:54 - LiteLLM:INFO[0m: utils.py:1213 - Wrapper: Completed Call, calling success_handler
INFO:LiteLLM:Wrapper: Completed Call, calling success_handler
[92m11:07:54 - LiteLLM:INFO[0m: cost_calculator.py:655 - selected model name for cost calculation: openai/Llama-3.2-3B-Instruct-Q8_0.gguf
INFO:LiteLLM:selected model name for cost calculation: openai/Llama-3.2-3B-Instruct-Q8_0.gguf
[92m11:07:54 - LiteLLM:INFO[0m: cost_calculator.py:655 - selected model name for cost calculation: openai/Llama-3.2-3B-Instruct-Q8_0.gguf
INFO:LiteLLM:selected model name for cost calculation: openai/Llama-3.2-3B-Instruct-Q8_0.gguf
[92m11:07:54 - LiteLLM:INFO[0m: cost_calculator.py:655 -

Average Metric: 0.67 / 2 (33.3%):   1%|          | 2/300 [00:47<1:38:20, 19.80s/it]

[92m11:07:54 - LiteLLM:INFO[0m: utils.py:2991 - 
LiteLLM completion() model= Llama-3.2-3B-Instruct-Q8_0.gguf; provider = openai
INFO:LiteLLM:
LiteLLM completion() model= Llama-3.2-3B-Instruct-Q8_0.gguf; provider = openai
INFO:httpx:HTTP Request: POST http://localhost:8080/v1/chat/completions "HTTP/1.1 200 OK"
[92m11:07:55 - LiteLLM:INFO[0m: utils.py:1213 - Wrapper: Completed Call, calling success_handler
INFO:LiteLLM:Wrapper: Completed Call, calling success_handler
[92m11:07:55 - LiteLLM:INFO[0m: cost_calculator.py:655 - selected model name for cost calculation: openai/Llama-3.2-3B-Instruct-Q8_0.gguf
INFO:LiteLLM:selected model name for cost calculation: openai/Llama-3.2-3B-Instruct-Q8_0.gguf
[92m11:07:55 - LiteLLM:INFO[0m: cost_calculator.py:655 - selected model name for cost calculation: openai/Llama-3.2-3B-Instruct-Q8_0.gguf
INFO:LiteLLM:selected model name for cost calculation: openai/Llama-3.2-3B-Instruct-Q8_0.gguf
[92m11:07:55 - LiteLLM:INFO[0m: utils.py:2991 - 
LiteLLM

Average Metric: 0.67 / 3 (22.2%):   1%|          | 3/300 [00:49<56:27, 11.40s/it]  

[92m11:07:56 - LiteLLM:INFO[0m: utils.py:2991 - 
LiteLLM completion() model= Llama-3.2-3B-Instruct-Q8_0.gguf; provider = openai
INFO:LiteLLM:
LiteLLM completion() model= Llama-3.2-3B-Instruct-Q8_0.gguf; provider = openai
INFO:httpx:HTTP Request: POST http://localhost:8080/v1/chat/completions "HTTP/1.1 200 OK"
[92m11:07:57 - LiteLLM:INFO[0m: utils.py:1213 - Wrapper: Completed Call, calling success_handler
INFO:LiteLLM:Wrapper: Completed Call, calling success_handler
[92m11:07:57 - LiteLLM:INFO[0m: cost_calculator.py:655 - selected model name for cost calculation: openai/Llama-3.2-3B-Instruct-Q8_0.gguf
INFO:LiteLLM:selected model name for cost calculation: openai/Llama-3.2-3B-Instruct-Q8_0.gguf
[92m11:07:57 - LiteLLM:INFO[0m: cost_calculator.py:655 - selected model name for cost calculation: openai/Llama-3.2-3B-Instruct-Q8_0.gguf
INFO:LiteLLM:selected model name for cost calculation: openai/Llama-3.2-3B-Instruct-Q8_0.gguf


Average Metric: 0.67 / 4 (16.7%):   1%|▏         | 4/300 [00:51<37:52,  7.68s/it]

[92m11:07:58 - LiteLLM:INFO[0m: utils.py:2991 - 
LiteLLM completion() model= Llama-3.2-3B-Instruct-Q8_0.gguf; provider = openai
INFO:LiteLLM:
LiteLLM completion() model= Llama-3.2-3B-Instruct-Q8_0.gguf; provider = openai
INFO:httpx:HTTP Request: POST http://localhost:8080/v1/chat/completions "HTTP/1.1 200 OK"
[92m11:07:59 - LiteLLM:INFO[0m: utils.py:1213 - Wrapper: Completed Call, calling success_handler
INFO:LiteLLM:Wrapper: Completed Call, calling success_handler
[92m11:07:59 - LiteLLM:INFO[0m: cost_calculator.py:655 - selected model name for cost calculation: openai/Llama-3.2-3B-Instruct-Q8_0.gguf
INFO:LiteLLM:selected model name for cost calculation: openai/Llama-3.2-3B-Instruct-Q8_0.gguf
[92m11:07:59 - LiteLLM:INFO[0m: cost_calculator.py:655 - selected model name for cost calculation: openai/Llama-3.2-3B-Instruct-Q8_0.gguf
[92m11:07:59 - LiteLLM:INFO[0m: cost_calculator.py:655 - selected model name for cost calculation: openai/Llama-3.2-3B-Instruct-Q8_0.gguf
INFO:LiteLLM

Average Metric: 0.67 / 5 (13.3%):   2%|▏         | 5/300 [00:52<26:52,  5.47s/it]

[92m11:07:59 - LiteLLM:INFO[0m: utils.py:2991 - 
LiteLLM completion() model= Llama-3.2-3B-Instruct-Q8_0.gguf; provider = openai
INFO:LiteLLM:
LiteLLM completion() model= Llama-3.2-3B-Instruct-Q8_0.gguf; provider = openai
INFO:httpx:HTTP Request: POST http://localhost:8080/v1/chat/completions "HTTP/1.1 200 OK"
[92m11:08:02 - LiteLLM:INFO[0m: utils.py:1213 - Wrapper: Completed Call, calling success_handler
INFO:LiteLLM:Wrapper: Completed Call, calling success_handler
[92m11:08:02 - LiteLLM:INFO[0m: cost_calculator.py:655 - selected model name for cost calculation: openai/Llama-3.2-3B-Instruct-Q8_0.gguf
INFO:LiteLLM:selected model name for cost calculation: openai/Llama-3.2-3B-Instruct-Q8_0.gguf
[92m11:08:02 - LiteLLM:INFO[0m: cost_calculator.py:655 - selected model name for cost calculation: openai/Llama-3.2-3B-Instruct-Q8_0.gguf
INFO:LiteLLM:selected model name for cost calculation: openai/Llama-3.2-3B-Instruct-Q8_0.gguf
[92m11:08:02 - LiteLLM:INFO[0m: cost_calculator.py:655 -

Average Metric: 1.33 / 6 (22.2%):   2%|▏         | 6/300 [00:55<22:28,  4.59s/it]

[92m11:08:02 - LiteLLM:INFO[0m: utils.py:2991 - 
LiteLLM completion() model= Llama-3.2-3B-Instruct-Q8_0.gguf; provider = openai
INFO:LiteLLM:
LiteLLM completion() model= Llama-3.2-3B-Instruct-Q8_0.gguf; provider = openai
INFO:httpx:HTTP Request: POST http://localhost:8080/v1/chat/completions "HTTP/1.1 200 OK"
[92m11:08:02 - LiteLLM:INFO[0m: utils.py:1213 - Wrapper: Completed Call, calling success_handler
INFO:LiteLLM:Wrapper: Completed Call, calling success_handler
[92m11:08:02 - LiteLLM:INFO[0m: cost_calculator.py:655 - selected model name for cost calculation: openai/Llama-3.2-3B-Instruct-Q8_0.gguf
INFO:LiteLLM:selected model name for cost calculation: openai/Llama-3.2-3B-Instruct-Q8_0.gguf
[92m11:08:02 - LiteLLM:INFO[0m: cost_calculator.py:655 - selected model name for cost calculation: openai/Llama-3.2-3B-Instruct-Q8_0.gguf
INFO:LiteLLM:selected model name for cost calculation: openai/Llama-3.2-3B-Instruct-Q8_0.gguf


Average Metric: 2.18 / 7 (31.1%):   2%|▏         | 7/300 [00:55<15:24,  3.15s/it]

[92m11:08:02 - LiteLLM:INFO[0m: utils.py:2991 - 
LiteLLM completion() model= Llama-3.2-3B-Instruct-Q8_0.gguf; provider = openai
INFO:LiteLLM:
LiteLLM completion() model= Llama-3.2-3B-Instruct-Q8_0.gguf; provider = openai
INFO:httpx:HTTP Request: POST http://localhost:8080/v1/chat/completions "HTTP/1.1 200 OK"
[92m11:08:04 - LiteLLM:INFO[0m: utils.py:1213 - Wrapper: Completed Call, calling success_handler
INFO:LiteLLM:Wrapper: Completed Call, calling success_handler
[92m11:08:04 - LiteLLM:INFO[0m: cost_calculator.py:655 - selected model name for cost calculation: openai/Llama-3.2-3B-Instruct-Q8_0.gguf
INFO:LiteLLM:selected model name for cost calculation: openai/Llama-3.2-3B-Instruct-Q8_0.gguf
[92m11:08:04 - LiteLLM:INFO[0m: cost_calculator.py:655 - selected model name for cost calculation: openai/Llama-3.2-3B-Instruct-Q8_0.gguf
[92m11:08:04 - LiteLLM:INFO[0m: cost_calculator.py:655 - selected model name for cost calculation: openai/Llama-3.2-3B-Instruct-Q8_0.gguf
INFO:LiteLLM

Average Metric: 2.85 / 8 (35.6%):   3%|▎         | 8/300 [00:57<13:48,  2.84s/it]

[92m11:08:04 - LiteLLM:INFO[0m: utils.py:2991 - 
LiteLLM completion() model= Llama-3.2-3B-Instruct-Q8_0.gguf; provider = openai
INFO:LiteLLM:
LiteLLM completion() model= Llama-3.2-3B-Instruct-Q8_0.gguf; provider = openai
INFO:httpx:HTTP Request: POST http://localhost:8080/v1/chat/completions "HTTP/1.1 200 OK"
[92m11:08:05 - LiteLLM:INFO[0m: utils.py:1213 - Wrapper: Completed Call, calling success_handler
INFO:httpx:HTTP Request: POST http://localhost:8080/v1/chat/completions "HTTP/1.1 200 OK"
INFO:LiteLLM:Wrapper: Completed Call, calling success_handler
[92m11:08:05 - LiteLLM:INFO[0m: utils.py:1213 - Wrapper: Completed Call, calling success_handler
INFO:LiteLLM:Wrapper: Completed Call, calling success_handler
[92m11:08:05 - LiteLLM:INFO[0m: cost_calculator.py:655 - selected model name for cost calculation: openai/Llama-3.2-3B-Instruct-Q8_0.gguf
INFO:LiteLLM:selected model name for cost calculation: openai/Llama-3.2-3B-Instruct-Q8_0.gguf
[92m11:08:05 - LiteLLM:INFO[0m: cost_ca

Average Metric: 3.51 / 9 (39.0%):   3%|▎         | 9/300 [00:58<11:00,  2.27s/it]

[92m11:08:05 - LiteLLM:INFO[0m: utils.py:2991 - 
LiteLLM completion() model= Llama-3.2-3B-Instruct-Q8_0.gguf; provider = openai
INFO:LiteLLM:
LiteLLM completion() model= Llama-3.2-3B-Instruct-Q8_0.gguf; provider = openai
[92m11:08:05 - LiteLLM:INFO[0m: utils.py:2991 - 
LiteLLM completion() model= Llama-3.2-3B-Instruct-Q8_0.gguf; provider = openai
INFO:LiteLLM:
LiteLLM completion() model= Llama-3.2-3B-Instruct-Q8_0.gguf; provider = openai
INFO:httpx:HTTP Request: POST http://localhost:8080/v1/chat/completions "HTTP/1.1 200 OK"
[92m11:08:07 - LiteLLM:INFO[0m: utils.py:1213 - Wrapper: Completed Call, calling success_handler
INFO:LiteLLM:Wrapper: Completed Call, calling success_handler
[92m11:08:07 - LiteLLM:INFO[0m: cost_calculator.py:655 - selected model name for cost calculation: openai/Llama-3.2-3B-Instruct-Q8_0.gguf
INFO:LiteLLM:selected model name for cost calculation: openai/Llama-3.2-3B-Instruct-Q8_0.gguf
[92m11:08:07 - LiteLLM:INFO[0m: cost_calculator.py:655 - selected m

Average Metric: 4.18 / 10 (41.8%):   3%|▎         | 10/300 [01:03<14:21,  2.97s/it]

[92m11:08:10 - LiteLLM:INFO[0m: utils.py:2991 - 
LiteLLM completion() model= Llama-3.2-3B-Instruct-Q8_0.gguf; provider = openai
INFO:LiteLLM:
LiteLLM completion() model= Llama-3.2-3B-Instruct-Q8_0.gguf; provider = openai
INFO:httpx:HTTP Request: POST http://localhost:8080/v1/chat/completions "HTTP/1.1 200 OK"
[92m11:08:11 - LiteLLM:INFO[0m: utils.py:1213 - Wrapper: Completed Call, calling success_handler
INFO:LiteLLM:Wrapper: Completed Call, calling success_handler
[92m11:08:11 - LiteLLM:INFO[0m: cost_calculator.py:655 - selected model name for cost calculation: openai/Llama-3.2-3B-Instruct-Q8_0.gguf
INFO:LiteLLM:selected model name for cost calculation: openai/Llama-3.2-3B-Instruct-Q8_0.gguf
[92m11:08:11 - LiteLLM:INFO[0m: cost_calculator.py:655 - selected model name for cost calculation: openai/Llama-3.2-3B-Instruct-Q8_0.gguf
INFO:LiteLLM:selected model name for cost calculation: openai/Llama-3.2-3B-Instruct-Q8_0.gguf
[92m11:08:11 - LiteLLM:INFO[0m: utils.py:2991 - 
LiteLLM

Average Metric: 4.85 / 11 (44.1%):   4%|▎         | 11/300 [01:07<15:51,  3.29s/it]

[92m11:08:14 - LiteLLM:INFO[0m: utils.py:2991 - 
LiteLLM completion() model= Llama-3.2-3B-Instruct-Q8_0.gguf; provider = openai
INFO:LiteLLM:
LiteLLM completion() model= Llama-3.2-3B-Instruct-Q8_0.gguf; provider = openai
INFO:httpx:HTTP Request: POST http://localhost:8080/v1/chat/completions "HTTP/1.1 200 OK"
[92m11:08:16 - LiteLLM:INFO[0m: utils.py:1213 - Wrapper: Completed Call, calling success_handler
INFO:LiteLLM:Wrapper: Completed Call, calling success_handler
[92m11:08:16 - LiteLLM:INFO[0m: cost_calculator.py:655 - selected model name for cost calculation: openai/Llama-3.2-3B-Instruct-Q8_0.gguf
INFO:LiteLLM:selected model name for cost calculation: openai/Llama-3.2-3B-Instruct-Q8_0.gguf
[92m11:08:16 - LiteLLM:INFO[0m: cost_calculator.py:655 - selected model name for cost calculation: openai/Llama-3.2-3B-Instruct-Q8_0.gguf
INFO:LiteLLM:selected model name for cost calculation: openai/Llama-3.2-3B-Instruct-Q8_0.gguf


Average Metric: 5.51 / 12 (45.9%):   4%|▍         | 12/300 [01:09<14:16,  2.97s/it]

[92m11:08:16 - LiteLLM:INFO[0m: utils.py:2991 - 
LiteLLM completion() model= Llama-3.2-3B-Instruct-Q8_0.gguf; provider = openai
INFO:LiteLLM:
LiteLLM completion() model= Llama-3.2-3B-Instruct-Q8_0.gguf; provider = openai
INFO:httpx:HTTP Request: POST http://localhost:8080/v1/chat/completions "HTTP/1.1 200 OK"
[92m11:08:16 - LiteLLM:INFO[0m: utils.py:1213 - Wrapper: Completed Call, calling success_handler
INFO:LiteLLM:Wrapper: Completed Call, calling success_handler
[92m11:08:16 - LiteLLM:INFO[0m: cost_calculator.py:655 - selected model name for cost calculation: openai/Llama-3.2-3B-Instruct-Q8_0.gguf
INFO:LiteLLM:selected model name for cost calculation: openai/Llama-3.2-3B-Instruct-Q8_0.gguf
[92m11:08:16 - LiteLLM:INFO[0m: cost_calculator.py:655 - selected model name for cost calculation: openai/Llama-3.2-3B-Instruct-Q8_0.gguf
[92m11:08:16 - LiteLLM:INFO[0m: cost_calculator.py:655 - selected model name for cost calculation: openai/Llama-3.2-3B-Instruct-Q8_0.gguf
INFO:LiteLLM

Average Metric: 6.26 / 13 (48.2%):   4%|▍         | 12/300 [01:09<14:16,  2.97s/it]

[92m11:08:16 - LiteLLM:INFO[0m: utils.py:2991 - 
LiteLLM completion() model= Llama-3.2-3B-Instruct-Q8_0.gguf; provider = openai
INFO:LiteLLM:
LiteLLM completion() model= Llama-3.2-3B-Instruct-Q8_0.gguf; provider = openai
INFO:httpx:HTTP Request: POST http://localhost:8080/v1/chat/completions "HTTP/1.1 200 OK"
[92m11:08:18 - LiteLLM:INFO[0m: utils.py:1213 - Wrapper: Completed Call, calling success_handler
INFO:LiteLLM:Wrapper: Completed Call, calling success_handler
[92m11:08:18 - LiteLLM:INFO[0m: cost_calculator.py:655 - selected model name for cost calculation: openai/Llama-3.2-3B-Instruct-Q8_0.gguf
INFO:LiteLLM:selected model name for cost calculation: openai/Llama-3.2-3B-Instruct-Q8_0.gguf
[92m11:08:18 - LiteLLM:INFO[0m: cost_calculator.py:655 - selected model name for cost calculation: openai/Llama-3.2-3B-Instruct-Q8_0.gguf
INFO:LiteLLM:selected model name for cost calculation: openai/Llama-3.2-3B-Instruct-Q8_0.gguf


Average Metric: 6.88 / 14 (49.1%):   5%|▍         | 14/300 [01:11<09:51,  2.07s/it]

[92m11:08:18 - LiteLLM:INFO[0m: utils.py:2991 - 
LiteLLM completion() model= Llama-3.2-3B-Instruct-Q8_0.gguf; provider = openai
INFO:LiteLLM:
LiteLLM completion() model= Llama-3.2-3B-Instruct-Q8_0.gguf; provider = openai
INFO:httpx:HTTP Request: POST http://localhost:8080/v1/chat/completions "HTTP/1.1 200 OK"
[92m11:08:19 - LiteLLM:INFO[0m: utils.py:1213 - Wrapper: Completed Call, calling success_handler
INFO:LiteLLM:Wrapper: Completed Call, calling success_handler
[92m11:08:19 - LiteLLM:INFO[0m: cost_calculator.py:655 - selected model name for cost calculation: openai/Llama-3.2-3B-Instruct-Q8_0.gguf
INFO:LiteLLM:selected model name for cost calculation: openai/Llama-3.2-3B-Instruct-Q8_0.gguf
[92m11:08:19 - LiteLLM:INFO[0m: cost_calculator.py:655 - selected model name for cost calculation: openai/Llama-3.2-3B-Instruct-Q8_0.gguf
INFO:LiteLLM:selected model name for cost calculation: openai/Llama-3.2-3B-Instruct-Q8_0.gguf
[92m11:08:19 - LiteLLM:INFO[0m: cost_calculator.py:655 -

Average Metric: 7.55 / 15 (50.3%):   5%|▌         | 15/300 [01:12<08:47,  1.85s/it]

[92m11:08:19 - LiteLLM:INFO[0m: utils.py:2991 - 
LiteLLM completion() model= Llama-3.2-3B-Instruct-Q8_0.gguf; provider = openai
INFO:LiteLLM:
LiteLLM completion() model= Llama-3.2-3B-Instruct-Q8_0.gguf; provider = openai
INFO:httpx:HTTP Request: POST http://localhost:8080/v1/chat/completions "HTTP/1.1 200 OK"
[92m11:08:20 - LiteLLM:INFO[0m: utils.py:1213 - Wrapper: Completed Call, calling success_handler
INFO:httpx:HTTP Request: POST http://localhost:8080/v1/chat/completions "HTTP/1.1 200 OK"
[92m11:08:20 - LiteLLM:INFO[0m: utils.py:1213 - Wrapper: Completed Call, calling success_handler
INFO:LiteLLM:Wrapper: Completed Call, calling success_handler
[92m11:08:20 - LiteLLM:INFO[0m: cost_calculator.py:655 - selected model name for cost calculation: openai/Llama-3.2-3B-Instruct-Q8_0.gguf
INFO:LiteLLM:Wrapper: Completed Call, calling success_handler
[92m11:08:20 - LiteLLM:INFO[0m: cost_calculator.py:655 - selected model name for cost calculation: openai/Llama-3.2-3B-Instruct-Q8_0.

Average Metric: 9.06 / 17 (53.3%):   5%|▌         | 16/300 [01:13<06:45,  1.43s/it]

[92m11:08:20 - LiteLLM:INFO[0m: utils.py:2991 - 
LiteLLM completion() model= Llama-3.2-3B-Instruct-Q8_0.gguf; provider = openai
INFO:LiteLLM:
LiteLLM completion() model= Llama-3.2-3B-Instruct-Q8_0.gguf; provider = openai
[92m11:08:20 - LiteLLM:INFO[0m: utils.py:2991 - 
LiteLLM completion() model= Llama-3.2-3B-Instruct-Q8_0.gguf; provider = openai
INFO:LiteLLM:
LiteLLM completion() model= Llama-3.2-3B-Instruct-Q8_0.gguf; provider = openai
INFO:httpx:HTTP Request: POST http://localhost:8080/v1/chat/completions "HTTP/1.1 200 OK"
[92m11:08:21 - LiteLLM:INFO[0m: utils.py:1213 - Wrapper: Completed Call, calling success_handler
INFO:LiteLLM:Wrapper: Completed Call, calling success_handler
[92m11:08:21 - LiteLLM:INFO[0m: cost_calculator.py:655 - selected model name for cost calculation: openai/Llama-3.2-3B-Instruct-Q8_0.gguf
INFO:LiteLLM:selected model name for cost calculation: openai/Llama-3.2-3B-Instruct-Q8_0.gguf
[92m11:08:21 - LiteLLM:INFO[0m: cost_calculator.py:655 - selected m

Average Metric: 9.06 / 18 (50.3%):   6%|▌         | 18/300 [01:14<05:33,  1.18s/it]

[92m11:08:21 - LiteLLM:INFO[0m: utils.py:2991 - 
LiteLLM completion() model= Llama-3.2-3B-Instruct-Q8_0.gguf; provider = openai
INFO:LiteLLM:
LiteLLM completion() model= Llama-3.2-3B-Instruct-Q8_0.gguf; provider = openai
INFO:httpx:HTTP Request: POST http://localhost:8080/v1/chat/completions "HTTP/1.1 200 OK"
[92m11:08:22 - LiteLLM:INFO[0m: utils.py:1213 - Wrapper: Completed Call, calling success_handler
INFO:LiteLLM:Wrapper: Completed Call, calling success_handler
[92m11:08:22 - LiteLLM:INFO[0m: cost_calculator.py:655 - selected model name for cost calculation: openai/Llama-3.2-3B-Instruct-Q8_0.gguf
INFO:LiteLLM:selected model name for cost calculation: openai/Llama-3.2-3B-Instruct-Q8_0.gguf
[92m11:08:22 - LiteLLM:INFO[0m: cost_calculator.py:655 - selected model name for cost calculation: openai/Llama-3.2-3B-Instruct-Q8_0.gguf
[92m11:08:22 - LiteLLM:INFO[0m: cost_calculator.py:655 - selected model name for cost calculation: openai/Llama-3.2-3B-Instruct-Q8_0.gguf
INFO:LiteLLM

Average Metric: 9.73 / 19 (51.2%):   6%|▋         | 19/300 [01:17<06:58,  1.49s/it]

[92m11:08:24 - LiteLLM:INFO[0m: utils.py:2991 - 
LiteLLM completion() model= Llama-3.2-3B-Instruct-Q8_0.gguf; provider = openai
INFO:LiteLLM:
LiteLLM completion() model= Llama-3.2-3B-Instruct-Q8_0.gguf; provider = openai
INFO:httpx:HTTP Request: POST http://localhost:8080/v1/chat/completions "HTTP/1.1 200 OK"
[92m11:08:25 - LiteLLM:INFO[0m: utils.py:1213 - Wrapper: Completed Call, calling success_handler
INFO:LiteLLM:Wrapper: Completed Call, calling success_handler
[92m11:08:25 - LiteLLM:INFO[0m: cost_calculator.py:655 - selected model name for cost calculation: openai/Llama-3.2-3B-Instruct-Q8_0.gguf
INFO:LiteLLM:selected model name for cost calculation: openai/Llama-3.2-3B-Instruct-Q8_0.gguf
[92m11:08:25 - LiteLLM:INFO[0m: cost_calculator.py:655 - selected model name for cost calculation: openai/Llama-3.2-3B-Instruct-Q8_0.gguf
[92m11:08:25 - LiteLLM:INFO[0m: cost_calculator.py:655 - selected model name for cost calculation: openai/Llama-3.2-3B-Instruct-Q8_0.gguf
INFO:LiteLLM

Average Metric: 10.34 / 20 (51.7%):   7%|▋         | 20/300 [01:22<10:45,  2.30s/it]

[92m11:08:29 - LiteLLM:INFO[0m: utils.py:2991 - 
LiteLLM completion() model= Llama-3.2-3B-Instruct-Q8_0.gguf; provider = openai
INFO:LiteLLM:
LiteLLM completion() model= Llama-3.2-3B-Instruct-Q8_0.gguf; provider = openai
INFO:httpx:HTTP Request: POST http://localhost:8080/v1/chat/completions "HTTP/1.1 200 OK"
[92m11:08:29 - LiteLLM:INFO[0m: utils.py:1213 - Wrapper: Completed Call, calling success_handler
INFO:LiteLLM:Wrapper: Completed Call, calling success_handler
[92m11:08:29 - LiteLLM:INFO[0m: cost_calculator.py:655 - selected model name for cost calculation: openai/Llama-3.2-3B-Instruct-Q8_0.gguf
INFO:LiteLLM:selected model name for cost calculation: openai/Llama-3.2-3B-Instruct-Q8_0.gguf
[92m11:08:29 - LiteLLM:INFO[0m: cost_calculator.py:655 - selected model name for cost calculation: openai/Llama-3.2-3B-Instruct-Q8_0.gguf
INFO:LiteLLM:selected model name for cost calculation: openai/Llama-3.2-3B-Instruct-Q8_0.gguf
[92m11:08:29 - LiteLLM:INFO[0m: utils.py:2991 - 
LiteLLM

🏃 View run eval at: http://localhost:5500/#/experiments/414799578984116612/runs/6aadb11566024c76b4a1aae148c9766a
🧪 View experiment at: http://localhost:5500/#/experiments/414799578984116612


KeyboardInterrupt: 

INFO:httpx:HTTP Request: POST http://localhost:8080/v1/chat/completions "HTTP/1.1 200 OK"
[92m11:08:43 - LiteLLM:INFO[0m: utils.py:1213 - Wrapper: Completed Call, calling success_handler
INFO:LiteLLM:Wrapper: Completed Call, calling success_handler
[92m11:08:43 - LiteLLM:INFO[0m: cost_calculator.py:655 - selected model name for cost calculation: openai/Llama-3.2-3B-Instruct-Q8_0.gguf
INFO:LiteLLM:selected model name for cost calculation: openai/Llama-3.2-3B-Instruct-Q8_0.gguf
[92m11:08:43 - LiteLLM:INFO[0m: cost_calculator.py:655 - selected model name for cost calculation: openai/Llama-3.2-3B-Instruct-Q8_0.gguf
INFO:LiteLLM:selected model name for cost calculation: openai/Llama-3.2-3B-Instruct-Q8_0.gguf
INFO:httpx:HTTP Request: POST http://localhost:8080/v1/chat/completions "HTTP/1.1 200 OK"
[92m11:08:44 - LiteLLM:INFO[0m: utils.py:1213 - Wrapper: Completed Call, calling success_handler
INFO:LiteLLM:Wrapper: Completed Call, calling success_handler
[92m11:08:44 - LiteLLM:INFO[