In [19]:
import os, sys
import pandas as pd
import numpy as np
import json
from tqdm import tqdm
from typing import Tuple, List, Union, Optional
from enum import Enum, IntEnum

from langchain_google_vertexai import VertexAI
from langchain_core.pydantic_v1 import BaseModel, Field
from langchain.output_parsers import PydanticOutputParser
from langchain.prompts import PromptTemplate

In [2]:
def evaluate_sentiments(trues, preds):

    if len(preds.shape) == 3: 
        preds_argmax = preds.argmax(axis=-1)
    elif len(preds.shape) == 2:
        preds_argmax = preds
    else:
        raise Exception('The shape of `preds` needs to be either 2 dimensions or 3 dimensions.')
    matches = trues == preds_argmax
    return matches

class SentimentEnum(str, Enum):
    positive = 'positive'
    neutral = 'neutral'
    negative = 'negative'

class SentimentExtraction(BaseModel):
    reason: str = Field(
        description="A string field for step by step reasoning before arriving at the final answer"
    )
    sentiment: SentimentEnum = Field(
        description="An enumerated field for sentiment"
    )

In [12]:
MODEL_TO_USE = 'gemini-1.5-flash-001'

PREAMBLE = (
    "Woolworths is one of the largest supermarket chains in Australia and New Zealand. "
    "Woolworths primarily sells common household grocery-related products ranging from fruit, vegetables, meat, dairy products, condiments, pet food and more.\n" 
    "Woolworths has a loyalty (or rewards) program that gives its members extra benefits including targeted personalised offers. "
    "Customers need to sign up to join the program, at which point they'll be given a rewards card. "
    "In order to enjoy more benefits from this program, members need to continue to scan their rewards / loyalty cards when making purchases. "
    "Typical scan of rewards card would register transaction details such as the item purchased, purchase date time and purchase price against their unique personal identifier. "
    "Customer would also get 1 reward point for each dollar they spend in the transactions that are scanned. "
    "The main value proposition comes in the form of points where every 2000 points is equivalent to $10 off the customer's next shop. "
    "From time to time, members can also enjoy other benefits such as more points for redeeming targeted offers on a certain range of products and free samples of products."
)

INSTRUCTION = """\
You have expertise in the domain of Responsible AI and how best practices should be adopted in large businesses. \
Below is Responsible AI-related aspects or area that Woolworths can look into to address for its loyalty program with respect to the customer comments. \

### Aspect
{aspect}

Now for the customer comment below, please give the polarity of the customer's sentiment as either positive, negative or neutral against each of the aspects above \
Neutral can either mean the customer's comment on the aspect is neither positive nor negative or the customer's comment does not mention anything about the aspect. \
Think step-by-step through the reasoning before arriving at an answer. You must output in JSON format like the example below: 
{{
    "reason": "The customer does not mention anything about that aspect."
    "sentiment": "neutral"
}}


### Customer Comment
{customer_comment}
"""

In [15]:
classes = ['positive', 'neutral', 'negative']
classes_map = {c: i for i, c in enumerate(classes)}
n_classes = len(classes)

In [13]:
llm = VertexAI(
    model=MODEL_TO_USE,
    temperature=0.05,
    top_p=0.95,
    top_k=40,
    max_tokens=2**13,
    max_retries=1,
    stop=None,
)

In [16]:
minimal_test_set = pd.read_csv('input/minimal-test-set.csv')
raw_classes = minimal_test_set[['Transparency', 'Privacy', 'Bias']].values

test_inputs = minimal_test_set['Input'].values
test_labels = np.vectorize(classes_map.get)(raw_classes)

In [23]:
aspects = {
    "Fairness and Bias" : \
        "Fairness and Bias: How customers think about the fairness of offers they receive and whether they think there are unjust biases in the targetting of offers.",
    "Transparency and Explainability" : \
        "Transparency and Explainability: How customers perceive the process of being targeting, boosting and re deeming offers for whether the entire personalisation process is clear and easily understandable.",
    "Data Privacy and Security" : \
        "Data Privacy and Security: How customers think about the handling of their personal data with the program as well as data breach and fraud concerns."
}

In [9]:
# ASPECTS = """\
# * Fairness and Bias: How customers think about the fairness of offers they receive and whether they think there are unjust biases in the targetting of offers.
# * Transparency and Explainability: How customers perceive the process of being targeting, boosting and re deeming offers for whether the entire personalisation process is clear and easily understandable.
# * Data Privacy and Security: How customers think about the handling of their personal data with the program as well as data breach and fraud concerns.
# """

COMMENT = test_inputs[0]

In [33]:
output_fix_tmpl = PromptTemplate(
    input_variables=['completion', 'error', 'instructions'], 
    template=(
        'Instructions:\n--------------\n{instructions}\n--------------\n'
        'Completion:\n--------------\n{completion}\n--------------\n'
        '\nAbove, the Completion did not satisfy the constraints given in the Instructions.'
        '\nError:\n--------------\n{error}\n--------------\n\n'
        'Please try again. Please only respond with an answer that satisfies the constraints laid out in the Instructions:'
        )
)
outputfix_chain = output_fix_tmpl | llm

In [28]:
prompt_template = PREAMBLE + INSTRUCTION
parser = PydanticOutputParser(pydantic_object=SentimentExtraction)
prompt = PromptTemplate.from_template(
    prompt_template,
)

chain = prompt | llm # | parser

In [None]:
result = chain.invoke({})

In [None]:
try:
    result = parser.parse(result)
except Exception as err_msg:
    outputfix_result = outputfix_chain.invoke(
        {
            'instructions': parser.get_format_instructions(),
            'completion': result,
            'error' : err_msg
        }
    )
    result = parser.parse(outputfix_result)

In [31]:
def chain_w_outputfix_parser(input:dict, prompt_tmpl:PromptTemplate, parser, llm, output_fix_tmpl:PromptTemplate):

    outputfix_chain = output_fix_tmpl | llm
    chain = prompt_tmpl | llm 
    result = chain.invoke(input)
    try: # try without outputfix parser
        result = parser.parse(result)
    except Exception as err_msg: # using outputfix parser
        outputfix_result = outputfix_chain.invoke(
            {
                'instructions': parser.get_format_instructions(),
                'completion': result,
                'error' : err_msg
            }
        )
        result = parser.parse(outputfix_result)
    return result

In [25]:
test_inputs

array(['a simple straightforward rewards program with clever personalised customer marketing.',
       'automatic boost really frustrates me when i forget to boost prior to shopping on time to be rewardsed',
       'since the paid rewardss started there is no bulk rewardss for ordinary members and it’s showing rewardss for individual items rather a whole money like it used to be.',
       "it's unfair that as a single person i can't get the bonus points for spending $100 plus a week as i don't need that much. it's geared towards families. plus as a single person it takes me ages to get enough point for a measly $10 discount. also the fuel vouchers should last longer, say 12 months. better rewardss for single people and needing less points for discounts if single. ",
       'security control is an issue. i found out an ex partner was able to call in & fraudulently change details (address etc) on my account & use rewardss. this was a breach of my privacy & completely inappropriate. woolw

In [48]:
results = {}
for aspect_key, aspect_value in aspects.items():
    print(f'ASPECT ====> {aspect_key}')
    aspect_sentiments = []
    for comment in tqdm(test_inputs):
        # result = chain.invoke({'aspect': aspect_value, 'customer_comment': comment})
        result = chain_w_outputfix_parser({'aspect': aspect_value, 'customer_comment': comment}, prompt, parser, llm, output_fix_tmpl)
        result_dict = {}
        result_dict['reason'] = result.reason
        result_dict['sentiment'] = result.sentiment.name
        aspect_sentiments.append(result_dict)
    results[aspect_key] = aspect_sentiments

ASPECT ====> Fairness and Bias


100%|██████████| 10/10 [00:04<00:00,  2.01it/s]


ASPECT ====> Transparency and Explainability


100%|██████████| 10/10 [00:05<00:00,  1.68it/s]


ASPECT ====> Data Privacy and Security


100%|██████████| 10/10 [00:07<00:00,  1.38it/s]


In [50]:
test_labels

array([[0, 1, 1],
       [2, 1, 1],
       [1, 1, 2],
       [1, 1, 2],
       [1, 2, 1],
       [2, 2, 1],
       [0, 1, 1],
       [1, 1, 2],
       [1, 2, 1],
       [1, 1, 1]])

In [49]:
results

{'Fairness and Bias': [{'reason': 'The customer comment focuses on the simplicity and personalization of the program, without mentioning fairness or bias in the offers.',
   'sentiment': 'neutral'},
  {'reason': 'The customer is expressing frustration about not being rewarded for forgetting to boost their rewards card before shopping. This suggests they feel the system is unfair, as they are not being rewarded for their usual spending.',
   'sentiment': 'negative'},
  {'reason': 'The customer is complaining about the change in rewards structure, specifically the lack of bulk rewards for ordinary members. This suggests they feel the new system is unfair, as it benefits individual items rather than overall spending.',
   'sentiment': 'negative'},
  {'reason': 'The customer explicitly states that they feel the program is unfair because it is geared towards families and not single people. They also mention that it takes them a long time to earn enough points for a discount, implying that t

In [18]:
trues = np.array([
    [0, 1, 2],
    [1, 0, 0],
    [0, 2, 1],
])

preds = np.array([
    [[0.1, 0.2, 0.7], [0.1, 0.2, 0.7], [0.1, 0.2, 0.7]],
    [[0.1, 0.2, 0.7], [0.1, 0.2, 0.7], [0.1, 0.2, 0.7]],
    [[0.1, 0.2, 0.7], [0.1, 0.2, 0.7], [0.1, 0.2, 0.7]],
])

assert trues.max() == n_classes - 1 # TODO: cases where subsets of data doesn't contain all the labels
assert preds.shape[-1] == n_classes

In [14]:
preds_argmax = preds.argmax(axis=-1)
matches = trues == preds_argmax

In [21]:
matches = evaluate_sentiments(trues, preds)

In [32]:
accuracy = matches.sum() / matches.size