In [None]:
!pip install wheel setuptools pip --upgrade
!pip install --upgrade openai

In [None]:
!pip install nltk
!pip install optuna

import pandas as pd
import random
from openai import OpenAI
import time
import numpy as np
from sklearn.metrics.pairwise import cosine_similarity
import re
import nltk
from nltk.translate.meteor_score import meteor_score
from itertools import combinations
import optuna

# Ensure the necessary NLTK resources are downloaded
nltk.download('wordnet')

API_KEY = ''
client = OpenAI(api_key = API_KEY)
model_id = 'gpt-3.5-turbo-0125'

In [None]:
import pandas as pd
import random
from openai import OpenAI
import time
import numpy as np
import gzip
import re


def parse(path):
  g = gzip.open(path, 'rb')
  for l in g:
    yield eval(l)

def getDF(path):
  i = 0
  df = {}
  for d in parse(path):
    df[i] = d
    i += 1
  return pd.DataFrame.from_dict(df, orient='index')

# Function to truncate the string to 20 words or less
def truncate_to_20_words(s):
    # Check if the input is a string
    if isinstance(s, str):
        words = s.split()
        return ' '.join(words[:20])
    else:
        # Return the input unchanged if it's not a string
        return s

Beautydf = getDF('reviews_Beauty_5.json.gz')
Beautymetadf = getDF('meta_Beauty.json.gz')

# Apply the function to the column
Beautymetadf['title'] = Beautymetadf['title'].apply(truncate_to_20_words)

merged_df = pd.merge(Beautydf, Beautymetadf, on='asin', how='left')
merged_df = merged_df.dropna(subset=['title','description','categories','brand'])
merged_df = merged_df.groupby('title').filter(lambda x: x['asin'].nunique() == 1)

# Filter groups by size and apply the function
merged_df6 = (merged_df.groupby('reviewerID').filter(lambda x: len(x) >= 6))  # Keep only users with >= 6 purchase history

beauty_df = merged_df6.reset_index(drop = True)
all_cand_items = list( beauty_df['title'].unique() )

# use a subset for tuning
tuning_size = 20 # change to reset tuning set
tuning_users = beauty_df['reviewerID'].dropna().unique()[:tuning_size]
tuning_beauty_df = beauty_df[beauty_df['reviewerID'].isin(tuning_users)]

test_users = beauty_df['reviewerID'].dropna().unique()[tuning_size:]
test_beauty_df = beauty_df[beauty_df['reviewerID'].isin(test_users)]


In [None]:
def mean_pairwise_meteor(prompts):
    """
    Compute the mean pairwise METEOR score among a list of prompts.

    :param prompts: List of generated text outputs
    :return: Mean METEOR score across all prompt pairs
    """
    # Tokenize each prompt into a list of words
    tokenized_prompts = [p.split() for p in prompts]

    pairs = list(combinations(tokenized_prompts, 2))  # Get all unique pairs
    scores = [meteor_score([p1], p2) for p1, p2 in pairs]  # Compute METEOR for each pair

    return sum(scores) / len(scores) if scores else 0

# # Example usage
# prompts = ["this is a test sentence", "this is an example test", "another test case"]
# mean_score = mean_pairwise_meteor(prompts)
# print("Mean Pairwise METEOR Score:", mean_score)



def mean_pairwise_cosine_similarity(arrays):
    # Stack arrays into a single matrix
    matrix = np.vstack(arrays)

    # Compute cosine similarity matrix
    sim_matrix = cosine_similarity(matrix)

    # Exclude diagonal elements (self-similarity)
    n = len(arrays)
    mask = np.ones((n, n), dtype=bool)
    np.fill_diagonal(mask, 0)

    # Compute the average pairwise similarity
    avg_similarity = sim_matrix[mask].mean()

    return avg_similarity

# # Example usage
# arrays = [np.random.rand(10) for _ in range(5)]  # List of 5 random 10-dimensional vectors
# avg_sim = mean_pairwise_cosine_similarity(arrays)
# print("Mean Pairwise Cosine Similarity:", avg_sim)



def ADO(num_prompts, cos_sim, lexical_sim):

    for id in tuning_beauty_df['reviewerID'].unique():

        user_df = tuning_beauty_df[ tuning_beauty_df['reviewerID'] == id ]
        user_df = user_df.sort_values(by='unixReviewTime', ascending = True)

        user_items = list( user_df['title'].unique() )

        # keep last 15 items
        user_items_applied = user_items[-15:]
        break

    ADO_prompt = f"""
    We will employ GPT-3.5 to perform personalized recommendation for Beauty Products, in which we will feed GPT-3.5 with a user's prior purchase history as well as a set of candidate items to select.
    The user's prior history consists a list of beauty products, each represented by its product title. The following can be an example for the user's prior purchase history: {user_items_applied}
    The candidate items are a list of beauty products, each also represented by its product title.

    Now, please propose a novel, detailed, and step-by-step algorithm to reformulate the user purchase history into a format that is most suitable for GPT-3.5.
    """

    search_rounds = 3
    prompt_performances = {}

    for s in range(search_rounds):

        total_try = 0
        prompts = []
        prompt_embeddings = []
        prompt_cos_dict = {}
        mean_cos_sim = 1.0

        while mean_cos_sim > cos_sim:

            for i in range(num_prompts):
                completion = client.chat.completions.create(
                    model = model_id, temperature = 1, max_tokens = 1000,

                    messages=[{"role": "system", "content": "Please enrich and then reformulate the user purchase history to be much more informative and detailed based on the narrative provided."},
                                {"role": "user", "content": ADO_prompt}],
                    timeout = 1200)

                candidate_prompt = completion.choices[0].message.content
                prompts.append(candidate_prompt)

                response = client.embeddings.create(
                    input=candidate_prompt,
                    model="text-embedding-3-small",
                )

                candidate_prompt_embedding = np.array(response.data[0].embedding)
                prompt_embeddings.append(candidate_prompt_embedding)

                # ADO_prompt += '\n\n\n Please generate each step to be completely different in wording and semantics from this one: \n\n' + candidate_prompt + '\n\n\n'


            total_try += 1
            mean_meteor = mean_pairwise_meteor(prompts)
            mean_cos_sim = mean_pairwise_cosine_similarity(prompt_embeddings)

            # Define min and max based on typical ranges (adjustable)
            meteor_min, meteor_max = 0.0, 1.0  # METEOR is usually in [0,1]
            cos_min, cos_max = 0.5, 1.0  # Cosine similarity often ranges [0.5,1] in text similarity tasks

            # Min-max normalization
            meteor_norm = (mean_meteor - meteor_min) / (meteor_max - meteor_min)
            cos_norm = (mean_cos_sim - cos_min) / (cos_max - cos_min)

            prompt_cos_dict[tuple(prompts)] = meteor_norm + cos_norm

            if mean_cos_sim <= cos_sim and mean_meteor <= lexical_sim:
                print('Qualifying prompts generated!')
                break

            if total_try > 5:
                prompts = min(prompt_cos_dict, key=prompt_cos_dict.get)
                break

            prompts = []
            prompt_embeddings = []


        # Now we have the candidate prompts, start tuning set evaluation:
        system_msg = "Please serve as a Recommender System on Beauty Products, based on user's prior purchase information provided."

        for candidate_prompt in prompts:

            right_count = 0
            compressed_right_count = 0
            total = 0
            for id in tuning_beauty_df['reviewerID'].unique():

                user_df = tuning_beauty_df[ tuning_beauty_df['reviewerID'] == id ]
                user_df = user_df.sort_values(by='unixReviewTime', ascending = True)

                user_items = list( user_df['title'].unique() )

                # keep last 15 items
                user_items_applied = user_items[-15:]

                # randomly generate 99 negative items (exclude all purchased items) + 1 positive item
                filtered_list = [x for x in all_cand_items if x not in user_items]
                sampled_items = list( random.sample(filtered_list, 99) ) # sampled items may include ground truth item (remove)

                sampled_items.append( user_items_applied[-1] )
                random.shuffle(sampled_items)

                target = user_items_applied[-1]


                augmented_prompt = (
                        f"Given the user has purchased the following items in chronological order: "
                        f"{user_items_applied[:-1]}; output a list of 10 items to recommend out of the following candidate items ONLY; do NOT explain anything, just output the items:"
                        f"\n{sampled_items}"
                    )

                completion = client.chat.completions.create(
                        model = model_id, temperature = 0,
                        messages=[{"role": "system", "content": system_msg},
                                    {"role": "user", "content": augmented_prompt}],
                        timeout = 1200)

                pred = completion.choices[0].message.content

                total += 1
                if target in pred:
                    right_count += 1

                # Perform ADO:
                reformulation_prompt = f'Please thoroughly reformulate the user purchase history based on the following algorithm:\n\n{candidate_prompt}\n\nUser purchase history to reformulate: {user_items_applied[:-1]}\n\nReturn the reformulation of the user purchase history ONLY.'

                completion = client.chat.completions.create(
                        model = model_id, temperature = 1.0,

                        messages=[{"role": "system", "content": 'Please reformulate the user purchase history to be much more informative and detailed based on the narrative provided.'},
                                    {"role": "user", "content": reformulation_prompt}],
                        timeout = 1200)

                reformulated_history = completion.choices[0].message.content


                compressed_prompt = (
                        f"Given the user has purchased the following items in chronological order:\n\n"
                        f"{reformulated_history}\n\nOutput a list of 10 items to recommend out of the following 100 candidate items ONLY; do NOT explain anything, just output the items:"
                        f"\n{sampled_items}"
                    )

                completion = client.chat.completions.create(
                        model = model_id, temperature = 0,
                        messages=[{"role": "system", "content": system_msg},
                                    {"role": "user", "content": compressed_prompt}],
                        timeout = 1200)

                compressed_pred = completion.choices[0].message.content

                if target in compressed_pred:
                    compressed_right_count += 1

                print(right_count, compressed_right_count)
                print()

                if total % 20 == 0:
                    print(f"Accuracy: {right_count/total}")
                    print(f"Compressed Accuracy: {compressed_right_count/total}")
                    print()

                    if compressed_right_count <= right_count:
                        break

            prompt_performances[candidate_prompt] = compressed_right_count/total
            print( prompt_performances[candidate_prompt] )

        prior_exp = ""
        for k,v in prompt_performances.items():
            prior_exp += '\n\n' + 'Algorithm: ' + k + '\n\n' + 'Score: ' + str(v)

        ADO_prompt = f"""
        We will employ GPT-3.5 to perform personalized recommendation for Beauty Products, in which we will feed GPT-3.5 with a user's prior purchase history as well as a set of candidate items to select.
        The user's prior history consists a list of beauty products, each represented by its product title. The following can be an example for the user's prior purchase history: {user_items_applied}
        The candidate items are a list of beauty products, each also represented by its product title.

        Now, please propose a novel, detailed, and step-by-step algorithm to reformulate the user purchase history into a format that is most suitable for GPT-3.5.
        """

        ADO_prompt += '\n\nBelow are some algorithm-score pairs for you to refer to prior to generation:' + '\n' + prior_exp
        print(ADO_prompt)
        print()

    prompt = max(prompt_performances, key=prompt_performances.get)
    return [prompt, prompt_performances[prompt]]


# # example run
# output = ADO(2, 0.7, 0.3)

# print(output[0])
# print()
# print(output[1])


In [None]:
# perform Bayes Opt.
best_prompt_per_round = {}

def objective(trial):
    num_prompts = trial.suggest_int('num of prompts', 2, 4)
    cos_sim = trial.suggest_float('Mean Cosine Sim among prompts', 0.6, 0.95, step = 0.05)
    lexical_sim = trial.suggest_float('Mean Meteor among prompts', 0.2, 0.5, step = 0.05)

    outputs = ADO(num_prompts, cos_sim, lexical_sim)
    best_prompt_per_round[ outputs[0] ] = outputs[1]
    print(outputs[1])

    return outputs[1] # use val accuracy to search for best hyper-param set


# start hyper-param tuning
study = optuna.create_study(direction = 'maximize')
study.optimize(objective, n_trials = 8)

final_prompt = max(best_prompt_per_round, key=best_prompt_per_round.get)

num_prompts = study.best_params['num of prompts']
cos_sim = study.best_params['Mean Cosine Sim among prompts']
lexical_sim = study.best_params['Mean Meteor among prompts']

## Performance Evaluation

In [None]:
system_msg = "Please serve as a Recommender System on Beauty Products, based on user's prior purchase information provided."

right_count = 0
compressed_right_count = 0
total = 0
for id in test_beauty_df['reviewerID'].unique():

    user_df = test_beauty_df[ test_beauty_df['reviewerID'] == id ]
    user_df = user_df.sort_values(by='unixReviewTime', ascending = True)

    user_items = list( user_df['title'].unique() )

    # keep last 15 items
    user_items_applied = user_items[-15:]

    # randomly generate 99 negative items (exclude all purchased items) + 1 positive item
    filtered_list = [x for x in all_cand_items if x not in user_items]
    sampled_items = list( random.sample(filtered_list, 99) ) # sampled items may include ground truth item (remove)

    sampled_items.append( user_items_applied[-1] )
    random.shuffle(sampled_items)

    target = user_items_applied[-1]


    augmented_prompt = (
            f"Given the user has purchased the following items in chronological order: "
            f"{user_items_applied[:-1]}; output a list of 10 items to recommend out of the following candidate items ONLY; do NOT explain anything, just output the items:"
            f"\n{sampled_items}"
        )

    completion = client.chat.completions.create(
            model = model_id, temperature = 0,
            messages=[{"role": "system", "content": system_msg},
                        {"role": "user", "content": augmented_prompt}],
            timeout = 1200)

    pred = completion.choices[0].message.content

    total += 1
    if target in pred:
        right_count += 1

    # Perform ADO:
    reformulation_prompt = f'Please thoroughly reformulate the user purchase history based on the following algorithm:\n\n{final_prompt}\n\nUser purchase history to reformulate: {user_items_applied[:-1]}\n\nReturn the reformulation of the user purchase history ONLY.'

    completion = client.chat.completions.create(
            model = model_id, temperature = 1.0,

            messages=[{"role": "system", "content": 'Please reformulate the user purchase history to be much more informative and detailed based on the narrative provided.'},
                        {"role": "user", "content": reformulation_prompt}],
            timeout = 1200)

    reformulated_history = completion.choices[0].message.content


    compressed_prompt = (
            f"Given the user has purchased the following items in chronological order:\n\n"
            f"{reformulated_history}\n\nOutput a list of 10 items to recommend out of the following 100 candidate items ONLY; do NOT explain anything, just output the items:"
            f"\n{sampled_items}"
        )

    completion = client.chat.completions.create(
            model = model_id, temperature = 0,
            messages=[{"role": "system", "content": system_msg},
                        {"role": "user", "content": compressed_prompt}],
            timeout = 1200)

    compressed_pred = completion.choices[0].message.content

    if target in compressed_pred:
        compressed_right_count += 1

    print(right_count, compressed_right_count)
    print()

    if total % 20 == 0:
        print(f"Accuracy: {right_count/total}")
        print(f"Compressed Accuracy: {compressed_right_count/total}")
        print()