# Read Value Benchmark

In [None]:
# add parent directory to path
import sys
sys.path.append('..')

import t2ebm
from t2ebm import graphs
from t2ebm import prompts

import json
import numpy as np
import random
import pickle

## Create the benchmark

In [None]:
# load graphs (pickle)
with open("all_graphs.pkl", "rb") as f:
    all_graphs = pickle.load(f)

In [None]:
def graph_mean(graph, x_val):
    """Returns the mean of the graph at x_val."""
    # find the bin that x_val is in
    bin_index = 0
    for idx, x_bin in enumerate(graph.x_vals):
        if x_val >= x_bin[0] and x_val <= x_bin[1]:
            bin_index = idx
            break
    # return the mean of that bin
    return graph.scores[bin_index]

In [None]:
example_graph = all_graphs[0]
print(example_graph[1])

graph_mean(graphs.text_to_graph(example_graph[1]), -124.1)

This graph represents a continuous-valued feature. The keys are intervals that represent ranges where the function predicts the same value.

Feature Name: longitude
Feature Type: continuous
Means: {"(-124.35, -124.10499999999999)": -50430.1, "(-124.10499999999999, -124.08500000000001)": -38925.6, "(-124.08500000000001, -124.07499999999999)": -23742.3, "(-124.07499999999999, -123.3)": -12526.0, "(-123.3, -122.955)": -1690.2, "(-122.955, -122.66499999999999)": 19040.8, "(-122.66499999999999, -122.60499999999999)": 29856.3, "(-122.60499999999999, -122.58500000000001)": 44315.6, "(-122.58500000000001, -122.555)": 75515.2, "(-122.555, -122.455)": 86444.1, "(-122.455, -122.445)": 99533.8, "(-122.445, -122.42500000000001)": 112351.5, "(-122.42500000000001, -122.405)": 89733.4, "(-122.405, -122.39500000000001)": 78586.0, "(-122.39500000000001, -122.375)": 46429.6, "(-122.375, -122.36500000000001)": 35622.6, "(-122.36500000000001, -122.305)": 20538.8, "(-122.305, -122.155)": 6386.6, "(-122.155,

-38925.6

In [None]:
questions = []
for graph, graph_as_text in all_graphs:
    # only continuous graphs
    if graph.feature_type != "continuous":
        continue
    graph = graphs.text_to_graph(graph_as_text)
    # choose a random bin and then sample a random value from within that bin
    num_bins = len(graph.x_vals)
    bin_index = np.random.randint(num_bins)
    bin = graph.x_vals[bin_index]
    x_val = np.random.uniform(bin[0], bin[1])
    # round to 2 decimal places
    x_val = round(x_val, 2)
    # the actual value of the graph at x_value
    y_val = graph_mean(graph, x_val)
    #print(x_val, y_val)

    question = """Below is the graph of a generalized additive model (GAM). The graph is presented as a JSON object with keys representing the x-axis and values representing the y-axis. For continuous features, the keys are intervals that represent ranges where the function predicts the same value. For categorical features, each key represents a possible value that the feature can take.
    
The graph is provided in the following format:
    - The name of the feature depicted in the graph
    - The type of the feature (continuous, categorical, or boolean)
    - Mean values
    - Lower bounds of confidence interval (optional)
    - Upper bounds of confidence interval (optional)\n\n"""
    question += f"Here is the graph:\n\n{graph_as_text}\n\n"
    question += f"Your task is to provide the mean value of the graph at {x_val}. What is the mean value of the graph at {x_val}?"
    questions.append((question, str(y_val)))

In [None]:
# subset 100 random questions
random.shuffle(questions)
questions = questions[:100]

In [41]:
# print a random question
random.shuffle(questions)
print(questions[0][0])
print('SOLUTION: ', questions[0][1])

Below is the graph of a generalized additive model (GAM). The graph is presented as a JSON object with keys representing the x-axis and values representing the y-axis. For continuous features, the keys are intervals that represent ranges where the function predicts the same value. For categorical features, each key represents a possible value that the feature can take.
    
The graph is provided in the following format:
    - The name of the feature depicted in the graph
    - The type of the feature (continuous, categorical, or boolean)
    - Mean values
    - Lower bounds of confidence interval (optional)
    - Upper bounds of confidence interval (optional)

Here is the graph:

This graph represents a continuous-valued feature. The keys are intervals that represent ranges where the function predicts the same value.

Feature Name: ejection_fraction
Feature Type: continuous
Means: {"(14.0, 16.0)": 4.55, "(16.0, 22.5)": 3.26, "(22.5, 27.5)": 1.89, "(27.5, 32.5)": -0.42, "(32.5, 36.5)": 

In [42]:
# save the questions to json
with open("../benchmark/read-value", "w") as f:
    json.dump(questions, f, indent=2)

# Run the benchmark

In [47]:
# load the json file
with open("../benchmark/read-value.json", "r") as f:
    questions = json.load(f)

In [48]:
system_msg = "You are an expert statistician and data scientist. You interpret global explanations produced by a generalized additive model (GAM). You answer all questions to the best of your ability, combining the data contained in the graph, any data set description you are given, and your knowledge about the real world."
for question in questions:
    messages = [{"role": "system", "content": system_msg}, {"role": "user", "content": question[0]}]
    response = t2ebm.utils.openai_completion_query('gpt-3.5-turbo-0125', messages, temperature=0.0)
    print('LLM RESPONSE: ', response)
    print('SOLUTION: ', question[1])
    print('-'*80)

LLM RESPONSE:  To find the mean value of the graph at 400.75, we need to identify the interval that includes this value in the "total_rooms" feature.

From the given data, we can see that the intervals are defined as follows:
- "(385.5, 406.5)": -46655.0
- "(406.5, 413.5)": -17805.2

Since 400.75 falls within the interval "(385.5, 406.5)", the mean value of the graph at 400.75 is -46655.0.
SOLUTION:  -46655.0
--------------------------------------------------------------------------------
LLM RESPONSE:  To find the mean value of the graph at 8.93, we need to look at the interval that includes 8.93. 

From the provided data, we can see that the feature "DrainageSystems" is a continuous feature with intervals ranging from 0.0 to 14.0. The mean values for each interval are given.

The interval that includes 8.93 is "(8.5, 10.5)" with a mean value of 0.02253.

Therefore, the mean value of the graph at 8.93 is 0.02253.
SOLUTION:  0.02253
-----------------------------------------------------

### Gemini

In [None]:
import google.generativeai as genai

genai.configure(api_key=os.environ['GEMINI_API_KEY'])

In [46]:
import time

model = genai.GenerativeModel(model_name="gemini-1.5-pro-latest")

for question in questions:
    messages = [{'role':'user', 'parts': [question[0]]}]
    response = model.generate_content(
                messages,
                generation_config=genai.types.GenerationConfig(
                    candidate_count=1,
                    max_output_tokens=500,
                    temperature=0.2),
            )
    try:
        response = response.text
    except:
        print(f"Gemini: Invalid response with parts {response.parts}.")
        response = ""
    print('LLM RESPONSE: ', response)
    print('SOLUTION: ', question[1])
    print('-'*80)
    # sleep 20 sec to avoid rate limit
    time.sleep(20)

LLM RESPONSE:  The mean value at 400.75 is **-46655.0**. 

You can find this by looking at the "Means" data for the feature "total_rooms". The value 400.75 falls within the interval "(385.5, 406.5)", and the corresponding mean value for this interval is -46655.0. 

SOLUTION:  -46655.0
--------------------------------------------------------------------------------
LLM RESPONSE:  The provided interval for 8.93 is "(8.5, 10.5)" which corresponds to a mean value of **0.02253**. 

SOLUTION:  0.02253
--------------------------------------------------------------------------------
LLM RESPONSE:  The mean value at 14.15 falls within the interval **"(13.5, 15.0)"**.  

Therefore, the mean value of the graph at 14.15 is **0.01734**. 

SOLUTION:  0.01734
--------------------------------------------------------------------------------
LLM RESPONSE:  The mean value at 26.19 is **0.355**. 

Here's why:

* The provided JSON object represents a piecewise function.  
* The value 26.19 falls within the