In [None]:
# Import libraries
import pandas as pd
import numpy as np
from openai import OpenAI
import plotly.graph_objects as go
import pandas as pd

# Set the API key
open_ai_api_key = 'YOUR_API_KEY'

In [None]:
# Load data
scores_data = pd.read_json('../../data/headphones-data.json')
scores_data.replace(r'^\s*$', np.nan, regex=True, inplace=True)
scores_data = scores_data.dropna()
scores_data = scores_data.sample(frac=1).reset_index(drop=True)

In [None]:
# Functions
def generate_prompt_for_aspect(headphone_data, dataset, aspect):
    dataset_filtered = dataset[dataset['id'] != headphone_data['id']]
    
    score_columns = [f'{aspect}AccuracyScore']
    distances = dataset_filtered[score_columns].apply(
        lambda row: np.linalg.norm(row - headphone_data[score_columns]), axis=1
    )
    
    closest_indices = distances.nsmallest(10).index
    
    prompt = "[CONTEXT]\n"
    
    for idx in closest_indices:
        headphone = dataset_filtered.loc[idx]
        prompt += f"- Headphone: {headphone['fullname']}\n"
        prompt += f"  - {aspect.capitalize()} accuracy: {headphone[f'{aspect}AccuracyScore']}\n"
        prompt += f"  - {aspect.capitalize()} accuracy description: {headphone[f'{aspect}AccuracyDescription']}\n"
        prompt += "\n"

    prompt += "[TASK]\nGenerate a description of {} performance score for the following headphone based on this data:\n\n".format(aspect)
    prompt += f"- Headphone: {headphone_data['fullname']}\n"
    prompt += f"  - {aspect.capitalize()} accuracy: {headphone_data[f'{aspect}AccuracyScore']}\n"
    
    return prompt

def call_gpt4(prompt, client, system_content):
    completion = client.chat.completions.create(
    model="gpt-4o",
    messages=[
        {"role": "system", "content": system_content },
        {"role": "user", "content": prompt}
    ]
    )
    response = completion.choices[0].message.content
    return response.strip()

In [None]:
# Generate descriptions
client = OpenAI(api_key=open_ai_api_key)

example_headphone_data = scores_data[scores_data['id'] == 325].iloc[0]

system_content = "You are a headphone reviewer. Your descriptions of sound performance scores are short, concise and colorful. You write like your reviewer colleagues, whose writing is provided as reference in the [CONTEXT] section of the prompts that you receive. You do not mention the exact value of the accuracy score that you are describing"

bass_prompt = generate_prompt_for_aspect(example_headphone_data, scores_data, 'bass')
print(bass_prompt)
bass_description = call_gpt4(bass_prompt, client, system_content)

mid_prompt = generate_prompt_for_aspect(example_headphone_data, scores_data, 'mid')
mid_description = call_gpt4(mid_prompt, client, system_content)

treble_prompt = generate_prompt_for_aspect(example_headphone_data, scores_data, 'treble')
treble_description = call_gpt4(treble_prompt, client, system_content)

In [None]:
# Compare generated and real descriptions
real_bass_description = example_headphone_data['bassAccuracyDescription']
real_mid_description = example_headphone_data['midAccuracyDescription']
real_treble_description = example_headphone_data['trebleAccuracyDescription']

data = {
    'Aspect': ['Bass', 'Mid', 'Treble'],
    'Real Description': [real_bass_description, real_mid_description, real_treble_description],
    'Generated Description': [bass_description, mid_description, treble_description]
}

df = pd.DataFrame(data)

fig = go.Figure(data=[go.Table(
    header=dict(values=list(df.columns),
                fill_color='paleturquoise',
                align='left'),
    cells=dict(values=[df['Aspect'], df['Real Description'], df['Generated Description']],
               fill_color='lavender',
               align='left'))
])

fig.update_layout(
    title="Comparison of Real and Generated Descriptions",
    height=800
)

fig.show()