In [None]:
%pip install pandas --upgrade



In [1]:
import ipywidgets as widgets
from IPython.display import display
import pandas as pd
from src.genAIClient import GenerativeAIClient
import time
import random

In [2]:
client = GenerativeAIClient(show_info_log=False)

In [3]:
# Define two variants of the prompt
prompt_A = """Product description: A pair of shoes that can fit any foot size.
Themes: adaptable, fit, omni-fit.
Product names:

# output format:
comma separated list."""

prompt_B = """Product description: A home milkshake maker.
Themes: fast, healthy, compact, flexible.
Product names: HomeShaker, Fit Shaker, QuickShake, Shake Maker

Product description: A watch that can tell accurate time in space.
Themes: astronaut, space-hardened, eliptical orbit, outer space.
Product names: AstroTime, SpaceGuard, Orbit-Accurate, EliptoTime.

Product description: A pair of shoes that can fit any foot size.
Themes: adaptable, fit, omni-fit.
Product names:

# output format:
comma separated list."""

def delay(ll_delay=1, ul_delay=2):
    return round(random.uniform(ll_delay, ul_delay), 2)

In [4]:
output = client.invoke(prompt_A)
print(output)

AdaptaFoot, OmniFit, FitForAll


In [5]:
# Iterate through the prompts and get responses
test_prompts = [prompt_A, prompt_B]
responses = []
num_tests_per_prompt = 6
max_retries = 3

for idx, prompt in enumerate(test_prompts):
    # prompt number as a letter
    var_name = chr(ord('A') + idx)

    for i in range(num_tests_per_prompt):
        # Get a response from the model
        response = None
        retries = 0
        while response is None and retries < max_retries:
            response = client.invoke(prompt)
            # print(response)
            if response is None:
                seconds = delay()
                print(f"{var_name}: {i}: delaying for {seconds} seconds...")
                time.sleep(seconds)
                retries += 1
                continue
            data = {
                "variant": var_name,
                "prompt": prompt,
                "response": response
                }
            responses.append(data)
            # print(data)


# Convert responses into a DataFrame
df = pd.DataFrame(responses)


In [6]:
df

Unnamed: 0,variant,prompt,response
0,A,Product description: A pair of shoes that can ...,"Omni-Fit Shoes, Omni-Fit Footwear, Adaptable F..."
1,A,Product description: A pair of shoes that can ...,"Omni-Fit, All-Fit, Adapt-A-Foot, Size-Less, Un..."
2,A,Product description: A pair of shoes that can ...,"AdaptaFit, OmniFit, FitForAll"
3,A,Product description: A pair of shoes that can ...,"Omni-Fit Shoes, Adapt-A-Footwear, Fit-All Foot..."
4,A,Product description: A pair of shoes that can ...,"Omni-Fit Shoes, Adaptable Footwear, One-Size-F..."
5,A,Product description: A pair of shoes that can ...,"Omni-Fit, Fit-All, Adapta-Steps"
6,B,Product description: A home milkshake maker.\n...,"Fit-On-Demand, Adaptashoe, OmniFit"
7,B,Product description: A home milkshake maker.\n...,"OmniFit, FitStep, AdaptaSole"
8,B,Product description: A home milkshake maker.\n...,"FitFit, OmniFit, AdaptaShoe"
9,B,Product description: A home milkshake maker.\n...,"OmniFit, AdaptaStep, FlexFit"


------------------------------------------------------

In [7]:
# Shuffle the DataFrame
df = df.sample(frac=1).reset_index(drop=True)

# Initialize variables
response_index = 0
df["feedback"] = pd.Series(dtype="str")  # Add a feedback column

response = widgets.HTML()
count_label = widgets.Label()

def update_response():
    """Update the displayed response and counter."""
    if response_index < len(df):
        new_response = df.iloc[response_index]["response"]
        new_response = f"<p>{new_response}</p>" if pd.notna(new_response) else "<p>No response</p>"
        response.value = new_response
        count_label.value = f"Response: {response_index + 1} / {len(df)}"
    else:
        response.value = "<p>All responses reviewed.</p>"
        count_label.value = f"Response: {response_index} / {len(df)}"

def on_button_clicked(b):
    global response_index

    if response_index < len(df):
        user_feedback = 1 if b.description == "👍" else 0
        df.at[response_index, "feedback"] = user_feedback
        response_index += 1
        print(f"Clicked! response_index is now {response_index}")

        if response_index < len(df):
            update_response()


In [8]:
update_response()

# Create buttons
thumbs_down_button = widgets.Button(description="👎")
thumbs_down_button.on_click(on_button_clicked)

thumbs_up_button = widgets.Button(description="👍")
thumbs_up_button.on_click(on_button_clicked)

# Arrange buttons
button_box = widgets.HBox([thumbs_up_button, thumbs_down_button])

# Display UI elements
display(response, button_box, count_label)

HTML(value='<p>Fit-On-Demand, Adaptashoe, OmniFit</p>')

HBox(children=(Button(description='👍', style=ButtonStyle()), Button(description='👎', style=ButtonStyle())))

Label(value='Response: 1 / 12')

In [13]:
df

Unnamed: 0,variant,prompt,response,feedback
0,B,Product description: A home milkshake maker.\n...,"FitSteps, OmniShoez, AdaptaFit",0
1,A,Product description: A pair of shoes that can ...,Omni-Fit Shoes,1
2,B,Product description: A home milkshake maker.\n...,"OneFit, OmniShoe, FitForAll",0
3,B,Product description: A home milkshake maker.\n...,"FitFlex, OmniFoot, AdaptiveSole",1
4,A,Product description: A pair of shoes that can ...,"Omni-Fit Shoes, Adapta-Fit Sneakers",1
5,A,Product description: A pair of shoes that can ...,"Omni-Fit, Adapt-A-Foot, Size-Less, Universal Fit",0
6,A,Product description: A pair of shoes that can ...,"Omni-Fit, Fit-All",0
7,B,Product description: A home milkshake maker.\n...,"OmniFit, FitAny, FitForAll",1
8,B,Product description: A home milkshake maker.\n...,"OmniFit, FitForAll, AdaptaShoe",0
9,B,Product description: A home milkshake maker.\n...,"FitShoe, OmniFit, AdaptFit",1


In [80]:
# Save the DataFrame as a CSV file
csv_file = "../data/responses.csv"
df.to_csv(csv_file, index=False)

In [14]:
print("A/B testing completed. Here are the results:")

if "variant" in df.columns:
    summary_df = df.groupby("variant").agg(
        count=("feedback", "count"),
        score=("feedback", "mean")
    ).reset_index()

    display(summary_df)
    print("Summary displayed!")  # DEBUG
else:
    print("No 'variant' column found. Summary cannot be generated.")

A/B testing completed. Here are the results:


Unnamed: 0,variant,count,score
0,A,6,0.5
1,B,6,0.5


Summary displayed!
