In [35]:
import os, sys
import pandas as pd
import numpy as np
import json
from typing import Tuple, List, Union, Optional
from enum import Enum, IntEnum

from langchain_google_vertexai import VertexAI
from langchain_core.pydantic_v1 import BaseModel, Field
from langchain.output_parsers import PydanticOutputParser
from langchain.prompts import PromptTemplate

In [49]:
def evaluate_sentiments(trues, preds):

    if len(preds.shape) == 3: 
        preds_argmax = preds.argmax(axis=-1)
    elif len(preds.shape) == 2:
        preds_argmax = preds
    else:
        raise Exception('The shape of `preds` needs to be either 2 dimensions or 3 dimensions.')
    matches = trues == preds_argmax
    return matches

class SentimentEnum(str, Enum):
    positive = 'positive'
    neutral = 'neutral'
    negative = 'negative'

class SentimentExtraction(BaseModel):
    reason: str = Field(
        description="A string field for step by step reasoning before arriving at the final answer"
    )
    sentiment: SentimentEnum = Field(
        description="An enumerated field for sentiment"
    )

In [61]:
MODEL_TO_USE = 'gemini-1.5-flash-001'

PREAMBLE = (
    "Woolworths is one of the largest supermarket chains in Australia and New Zealand. "
    "Woolworths primarily sells common household grocery-related products ranging from fruit, vegetables, meat, dairy products, condiments, pet food and more.\n" 
    "Woolworths has a loyalty (or rewards) program that gives its members extra benefits including targeted personalised offers. "
    "Customers need to sign up to join the program, at which point they'll be given a rewards card. "
    "In order to enjoy more benefits from this program, members need to continue to scan their rewards / loyalty cards when making purchases. "
    "Typical scan of rewards card would register transaction details such as the item purchased, purchase date time and purchase price against their unique personal identifier. "
    "Customer would also get 1 reward point for each dollar they spend in the transactions that are scanned. "
    "The main value proposition comes in the form of points where every 2000 points is equivalent to $10 off the customer's next shop. "
    "From time to time, members can also enjoy other benefits such as more points for redeeming targeted offers on a certain range of products and free samples of products."
)

INSTRUCTION = """\
You have expertise in the domain of Responsible AI and how best practices should be adopted in large businesses. \
Below is Responsible AI-related aspects or area that Woolworths can look into to address for its loyalty program with respect to the customer comments. \

### Aspect
{aspect}

Now for the customer comment below, please give the polarity of the customer's sentiment as either positive, negative or neutral against each of the aspects above \
Neutral can either mean the customer's comment on the aspect is neither positive nor negative or the customer's comment does not mention anything about the aspect. \
Think step-by-step through the reasoning before arriving at an answer. You must output in JSON format like the example below: 
{{
    "reason": "The customer does not mention anything about it."
    "sentiment": "neutral"
}}


### Customer Comment
{customer_comment}
"""

In [60]:
llm = VertexAI(
    model=MODEL_TO_USE,
    temperature=0.05,
    top_p=0.95,
    top_k=40,
    max_tokens=2**13,
    max_retries=1,
    stop=None,
)

In [52]:
classes = ['positive', 'neutral', 'negative']
classes_map = {c: i for i, c in enumerate(classes)}
n_classes = len(classes)

In [53]:
aspects = [
    "Fairness and Bias: How customers think about the fairness of offers they receive and whether they think there are unjust biases in the targetting of offers.",
    "Transparency and Explainability: How customers perceive the process of being targeting, boosting and re deeming offers for whether the entire personalisation process is clear and easily understandable.",
    "Data Privacy and Security: How customers think about the handling of their personal data with the program as well as data breach and fraud concerns."
]

In [56]:
# ASPECTS = """\
# * Fairness and Bias: How customers think about the fairness of offers they receive and whether they think there are unjust biases in the targetting of offers.
# * Transparency and Explainability: How customers perceive the process of being targeting, boosting and re deeming offers for whether the entire personalisation process is clear and easily understandable.
# * Data Privacy and Security: How customers think about the handling of their personal data with the program as well as data breach and fraud concerns.
# """

COMMENT = test_inputs[0]

In [62]:
prompt_template = PREAMBLE + INSTRUCTION
parser = PydanticOutputParser(pydantic_object=SentimentExtraction)
prompt = PromptTemplate.from_template(
    prompt_template,
)

chain = prompt | llm | parser

In [66]:
results = []
for aspect in aspects:
    for comment in test_inputs:
        result = chain.invoke({'aspect': aspect, 'customer_comment': comment})
        results.append(result)
    break

In [67]:
results

[SentimentExtraction(reason="The customer comment focuses on the program's simplicity and personalized marketing, without mentioning fairness or bias in offer targeting.", sentiment=<SentimentEnum.neutral: 'neutral'>),
 SentimentExtraction(reason="The customer is expressing frustration about not being able to benefit from the 'boost' feature due to forgetting to activate it before shopping. This suggests they feel the system is unfair in its requirement for timely activation, potentially leading to missed rewards.", sentiment=<SentimentEnum.negative: 'negative'>),
 SentimentExtraction(reason='The customer is complaining about the change in rewards structure, specifically the lack of bulk rewards for ordinary members. This suggests they feel the new system is unfair, as it benefits individual items rather than overall spending.', sentiment=<SentimentEnum.negative: 'negative'>),
 SentimentExtraction(reason='The customer explicitly states that they feel the program is unfair because it is

In [26]:
minimal_test_set = pd.read_csv('input/minimal-test-set.csv')
raw_classes = minimal_test_set[['Transparency', 'Privacy', 'Bias']].values

test_inputs = minimal_test_set['Input'].values
test_labels = np.vectorize(classes_map.get)(raw_classes)

In [18]:
trues = np.array([
    [0, 1, 2],
    [1, 0, 0],
    [0, 2, 1],
])

preds = np.array([
    [[0.1, 0.2, 0.7], [0.1, 0.2, 0.7], [0.1, 0.2, 0.7]],
    [[0.1, 0.2, 0.7], [0.1, 0.2, 0.7], [0.1, 0.2, 0.7]],
    [[0.1, 0.2, 0.7], [0.1, 0.2, 0.7], [0.1, 0.2, 0.7]],
])

assert trues.max() == n_classes - 1 # TODO: cases where subsets of data doesn't contain all the labels
assert preds.shape[-1] == n_classes

In [14]:
preds_argmax = preds.argmax(axis=-1)
matches = trues == preds_argmax

In [21]:
matches = evaluate_sentiments(trues, preds)

In [32]:
accuracy = matches.sum() / matches.size