In [None]:
# Cell for Comments

# Installation of Requirements

This section describes the requirements to run the notebook and do the following:

- Access and chat with the McHire Chatbot.
- Send messages to the OpenAI API and use the chat completion feature


The requirements include setting up the OpenAI and selenium library python libraries.

In [None]:
%%shell
%%capture
# Ubuntu no longer distributes chromium-browser outside of snap
#
# Proposed solution: https://askubuntu.com/questions/1204571/how-to-install-chromium-without-snap

# Add debian buster
cat > /etc/apt/sources.list.d/debian.list <<'EOF'
deb [arch=amd64 signed-by=/usr/share/keyrings/debian-buster.gpg] http://deb.debian.org/debian buster main
deb [arch=amd64 signed-by=/usr/share/keyrings/debian-buster-updates.gpg] http://deb.debian.org/debian buster-updates main
deb [arch=amd64 signed-by=/usr/share/keyrings/debian-security-buster.gpg] http://deb.debian.org/debian-security buster/updates main
EOF

# Add keys
apt-key adv --keyserver keyserver.ubuntu.com --recv-keys DCC9EFBF77E11517
apt-key adv --keyserver keyserver.ubuntu.com --recv-keys 648ACFD622F3D138
apt-key adv --keyserver keyserver.ubuntu.com --recv-keys 112695A0E562B32A

apt-key export 77E11517 | gpg --dearmour -o /usr/share/keyrings/debian-buster.gpg
apt-key export 22F3D138 | gpg --dearmour -o /usr/share/keyrings/debian-buster-updates.gpg
apt-key export E562B32A | gpg --dearmour -o /usr/share/keyrings/debian-security-buster.gpg

# Prefer debian repo for chromium* packages only
# Note the double-blank lines between entries
cat > /etc/apt/preferences.d/chromium.pref << 'EOF'
Package: *
Pin: release a=eoan
Pin-Priority: 500


Package: *
Pin: origin "deb.debian.org"
Pin-Priority: 300


Package: chromium*
Pin: origin "deb.debian.org"
Pin-Priority: 700
EOF

# # Install chromium and chromium-driver
apt-get update
apt-get install chromium chromium-driver

/bin/bash: line 0: fg: no job control
Executing: /tmp/apt-key-gpghome.jMyl89bW5H/gpg.1.sh --keyserver keyserver.ubuntu.com --recv-keys DCC9EFBF77E11517
gpg: key DCC9EFBF77E11517: public key "Debian Stable Release Key (10/buster) <debian-release@lists.debian.org>" imported
gpg: Total number processed: 1
gpg:               imported: 1
Executing: /tmp/apt-key-gpghome.749blYo3QS/gpg.1.sh --keyserver keyserver.ubuntu.com --recv-keys 648ACFD622F3D138
gpg: key DC30D7C23CBBABEE: public key "Debian Archive Automatic Signing Key (10/buster) <ftpmaster@debian.org>" imported
gpg: Total number processed: 1
gpg:               imported: 1
Executing: /tmp/apt-key-gpghome.rEDuVT7qjb/gpg.1.sh --keyserver keyserver.ubuntu.com --recv-keys 112695A0E562B32A
gpg: key 4DFAB270CAA96DFA: public key "Debian Security Archive Automatic Signing Key (10/buster) <ftpmaster@debian.org>" imported
gpg: Total number processed: 1
gpg:               imported: 1
Get:1 http://deb.debian.org/debian buster InRelease [122 kB]
G



In [None]:
%%capture
pip install openai selenium==3.141.0

In [None]:
%%capture
pip install torchmetrics[text] transformers

In [None]:
import json
import time

import openai
import requests
from selenium import webdriver
from selenium.webdriver.common.by import By
from selenium.webdriver.common.keys import Keys
from selenium.webdriver.support import expected_conditions as EC
from selenium.webdriver.support.ui import WebDriverWait
from torchmetrics.functional.text.bert import bert_score

# McHire Chatbot Connection


This section describes how to connect and chat with the McHire bot from McDonalds deployed in production interactively using the Selenium library.

The bot uses Paradox.ai and has a shadow root in the Document Object Model (DOM). So to access the chatbot, we have to use the shadow root and then search for the relevant elements in the DOM.

### Insights about the Shadow Root

- The shadow root for the chatbot here contains only one element with tag `textarea` to enter the text for the chatbot.

- All the messages are stored in an element with class `me-messages__inner`.

- Each of these messages has either of the two values for the attribute `data-testid` - `message_lbl_theirs` or `message_lbl_ours` which describes the label for the message. This helps decide the sender for the message.
  - `message_lbl_theirs`: The message is from the user
  - `message_lbl_ours`: The message is from the chatbot

- The text content for each message is contained in an element with class `ae8f46`.


In [None]:
class ChromeDriver:
    '''
    Headless browser driver for Chrome
    '''
    def __init__(self):
        '''
        Start the browser with arguments
        '''
        chrome_options = webdriver.ChromeOptions()
        chrome_options.add_argument("--headless")
        chrome_options.add_argument("--no-sandbox")
        chrome_options.add_argument("--disable-dev-shm-usage")

        self.webdriver = webdriver.Chrome("chromedriver", options=chrome_options)

    def __del__(self):
        '''
        Quit the browser when deleting the object
        '''
        self.webdriver.quit()

In [None]:
driver = ChromeDriver()

In [None]:
class McHireChatClient:
    '''
    Class to connect with the McHire Chatbot interactively
    using selenium library

    Link: https://jobs.mchire.com/
    '''

    def __init__(self, driver):
        if hasattr(driver, 'webdriver'):
            self.driver = driver.webdriver
        else:
            self.driver = driver

        self.host = None
        self.shadow_root = None
        self.uri = "https://jobs.mchire.com/"

    def _startChat(self):
        '''
        Click on the chatbot in the shadow root to start the conversation
        '''
        script = "return arguments[0].shadowRoot"
        self.shadow_root = self.driver.execute_script(script, self.host)
        self.shadow_root.find_element_by_class_name("apply-chat-prompt").click()

    def load(self):
        '''
        Load the chatbot and start the chatbot

        Note: This function has to be run before starting any conversation with
        the bot
        '''
        self.driver.get(self.uri)
        time.sleep(10)

        self.host = WebDriverWait(self.driver, 20).until(
            EC.presence_of_element_located((By.TAG_NAME, "apply-widget"))
        )
        self._startChat()

    def sendUserMessage(self, message):
        '''
        Send a message from the user side to the chatbot

        Returns: the response from the chatbot
        '''
        element = self.shadow_root.find_element_by_tag_name("textarea")
        element.send_keys(message)
        element.send_keys(Keys.RETURN)
        time.sleep(20)  # Wait for the response to come

        messages = self.shadow_root.find_elements_by_class_name("ae8f46")
        response = messages[-1].get_attribute("innerHTML")
        return response

    def listAllMessages(self):
        '''
        List the conversation from the chatbot

        Returns: List of messages each containing:
                    role: Describing whether the sender is a user or the bot
                    message: Chat message by the sender
        '''
        messages = []
        test_id_mapping = {"message_lbl_theirs": "user", "message_lbl_ours": "bot"}

        chat_area = self.shadow_root.find_element_by_class_name("me-messages__inner")
        msg_elements = chat_area.find_elements_by_xpath("*")
        for msg_element in msg_elements:
            data_test_id = msg_element.get_attribute("data-testid")
            text_element = msg_element.find_element_by_class_name("ae8f46")
            text = text_element.get_attribute("innerText")

            role = test_id_mapping[data_test_id]
            messages.append({"role": role, "message": text})

        return messages

In [None]:
client = McHireChatClient(driver)
client.load()

In [None]:
client.sendUserMessage("Do you have any prompt engineer roles?")

"We don't currently have any prompt engineer positions open in SG. What other job can I find for you?"

In [None]:
client.listAllMessages()

[{'role': 'bot',
  'message': "Hi! I'm Olivia, your virtual job assistant at McDonald’s! I can help you apply for a job. You can ask me anything about our business, culture, team and more."},
 {'role': 'bot',
  'message': 'By replying you agree that this conversation may be monitored, retained, and shared with service providers. Learn more about our privacy practices here: Applicant Privacy Notice'},
 {'role': 'user', 'message': 'Do you have any prompt engineer roles?'},
 {'role': 'bot',
  'message': "We don't currently have any prompt engineer positions open in SG. What other job can I find for you?"}]

# ChatGPT Connection

In [None]:
class OpenAIClient:
    """
    Connector Client using the OpenAI API to complete the chat

    Ref: https://platform.openai.com/docs/guides/chat/introduction
    """

    def __init__(self, api_key, model="gpt-3.5-turbo"):
        openai.api_key = api_key
        self.model = model

    def __del__(self):
        """
        Delete the API key from the library after use for security purpose
        """
        del openai.api_key

    def sendUserPrompt(self, user_messages, system_messages: list = None):
        """
        Send the prompts to the API and return the response in the API format
        """
        messages = []
        if system_messages is not None:
            for system_msg in system_messages:
                messages.append({"role": "system", "content": system_msg})

        if isinstance(user_messages, str):
            messages.append({"role": "user", "content": user_messages})
        else:
            for user_msg in user_messages:
                messages.append({"role": "user", "content": user_msg}) 
        response = openai.ChatCompletion.create(model=self.model, messages=messages)
        return response

In [None]:
chat_gpt = OpenAIClient(api_key="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")

In [None]:
# chat_message = chat_gpt.sendUserPrompt("Please generate a chatbot prompt")
chat_message = chat_gpt.sendUserPrompt("Please generate a user prompt for McDonald's customer")
for choice in chat_message.choices:
    print(choice.message.content)

What would you like to order from McDonald's today?


# Comparison

This section deals with the evaluation of the Chatbot's efficiency and reliability using the Large Language Models. So far, two metrics are being used to test the chatbots:

- Bert Score between the response from the chatbot and that from the LLM (imaginary scenario)
- Asking GPT how related is the response from the question

### Correct Response

##### **Bert Score for Evaluation**

In [None]:
mcd_answer = client.sendUserMessage("How is working at McDonalds like?")
mcd_answer

'A career working at a McDonald’s restaurant is an opportunity to be the best at what you do where you are valued and respected. McDonald’s and its independent operators like to believe the best people in the world work at McDonald’s restaurants.'

In [None]:
system_msgs = [
    "You are McHire chatbot for McDonalds website used for hiring related help.",
    "Potential job candidates visiting the website usually talk to the chatbot.",
]

chatgpt_answer = chat_gpt.sendUserPrompt("How is working at McDonalds like?", system_messages=system_msgs).choices[0].message.content
chatgpt_answer

"Working at McDonald's can be a very rewarding experience. Our team culture promotes positivity, inclusivity, and growth. You will meet a lot of people, build new skills, and work in a fast-paced environment that challenges you to be your best every day. At McDonald's, we have a variety of roles and offer flexible hours to fit your schedule. You would have the opportunity to work in a supportive and fun environment, where the work is fast-paced but the rewards are many."

In [None]:
score = bert_score(preds=[mcd_answer], target=[chatgpt_answer])

  warn(


Downloading (…)lve/main/config.json:   0%|          | 0.00/482 [00:00<?, ?B/s]

Downloading (…)olve/main/vocab.json:   0%|          | 0.00/899k [00:00<?, ?B/s]

Downloading (…)olve/main/merges.txt:   0%|          | 0.00/456k [00:00<?, ?B/s]

Downloading (…)/main/tokenizer.json:   0%|          | 0.00/1.36M [00:00<?, ?B/s]

Downloading pytorch_model.bin:   0%|          | 0.00/1.43G [00:00<?, ?B/s]

Some weights of the model checkpoint at roberta-large were not used when initializing RobertaModel: ['lm_head.dense.weight', 'lm_head.layer_norm.weight', 'lm_head.dense.bias', 'lm_head.layer_norm.bias', 'lm_head.decoder.weight', 'lm_head.bias']
- This IS expected if you are initializing RobertaModel from the checkpoint of a model trained on another task or with another architecture (e.g. initializing a BertForSequenceClassification model from a BertForPreTraining model).
- This IS NOT expected if you are initializing RobertaModel from the checkpoint of a model that you expect to be exactly identical (initializing a BertForSequenceClassification model from a BertForSequenceClassification model).


In [None]:
print(score)

{'precision': 0.9629039168357849, 'recall': 0.9498885869979858, 'f1': 0.9563519954681396}


##### **Large Language Model Evaluation**

In [None]:
system_msgs = [
    "You are a human evaluator for the McHire chatbot on the McDonalds website used for hiring related help.",
    "Your task is to evaluate how related is the answer to a given question",
]

user_msgs = [
    "Question: How is working at McDonalds like?",
    f"Answer: {mcd_answer}"
]

llm_evaluation = chat_gpt.sendUserPrompt(user_msgs, system_messages=system_msgs).choices[0].message.content
llm_evaluation

"The answer is related to the question, as it provides information about what it is like to work at McDonald's. The response talks about the opportunity to be the best at what you do and how you are valued and respected. It also mentions that the best people in the world work at McDonald's restaurants. Therefore, the answer is highly related to the question."

### Incorrect Response

##### **Bert Score for Evaluation**

In [None]:
incorrect_response = 'Lorem ipsum dolor sit amet, consectetur adipiscing elit. Duis sodales eu enim in fringilla. Sed ligula ligula, iaculis non ex ac, commodo eleifend mi. Mauris sit amet elementum nisi, eget tristique diam. Etiam rhoncus nunc lectus. Nullam tristique a sem non dapibus. Donec sit amet dapibus odio. Nulla a risus at enim fermentum porttitor. Aliquam a nulla quis leo varius consectetur. Vivamus a eros ut velit mollis lacinia sed eu tortor. Aenean viverra tellus sed velit suscipit, quis ullamcorper ex consectetur.'

In [None]:
incorrect_score = bert_score(preds=[incorrect_response], target=[chatgpt_answer])

Some weights of the model checkpoint at roberta-large were not used when initializing RobertaModel: ['lm_head.bias', 'lm_head.layer_norm.weight', 'lm_head.dense.bias', 'lm_head.dense.weight', 'lm_head.decoder.weight', 'lm_head.layer_norm.bias']
- This IS expected if you are initializing RobertaModel from the checkpoint of a model trained on another task or with another architecture (e.g. initializing a BertForSequenceClassification model from a BertForPreTraining model).
- This IS NOT expected if you are initializing RobertaModel from the checkpoint of a model that you expect to be exactly identical (initializing a BertForSequenceClassification model from a BertForSequenceClassification model).


In [None]:
print(incorrect_score)

{'precision': 0.9250087141990662, 'recall': 0.9244294166564941, 'f1': 0.924718976020813}


##### **Large Language Model Evaluation**

In [None]:
system_msgs = [
    "You are a human evaluator for the McHire chatbot on the McDonalds website used for hiring related help.",
    "Your task is to evaluate how related is the answer to a given question",
]

user_msgs = [
    "Question: How is working at McDonalds like?",
    f"Answer: {incorrect_response}"
]

llm_incorrect_evaluation = chat_gpt.sendUserPrompt(user_msgs, system_messages=system_msgs).choices[0].message.content
llm_incorrect_evaluation

"This answer is not related at all to the given question. It seems to be a placeholder text (Lorem ipsum) and does not provide any information or insights about working at McDonald's."

# Conversational Evaluation

In [None]:
# Init
# import re
# client = McHireChatClient(driver)
# client.load()
chat_gpt = OpenAIClient(api_key="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")

from sentence_transformers import SentenceTransformer, util
model = SentenceTransformer('all-MiniLM-L6-v2')


response_format = 'Provide your response in the first line, and an ideal response from the chatbot for the question in the next line in a Q & A format. Call yourself Andrew and the chatbot Olivia. '

conversation_length = 5
cosine_similarity = 0.0
messages = 0


def parse_chatgpt_response(input):
  print(input)
  res =  input.split('Andrew:')[1].split('Olivia:')
  chatgpt_query = res[0].strip()
  ground_truth = res[1].strip()
  return [chatgpt_query, ground_truth]


def prompt_chatgpt(query, response = None):
  
  global conversation_length, cosine_similarity, messages
  if messages >= conversation_length:
    consine_similarity = cosine_similarity/conversation_length
    return

  chatgpt_query = ''
  print('messages', messages)
  if messages > 0:
    chatgpt_query += 'ChatBot responded for your previous question, which was:\n ' + query + '. And the chatbots response was, ' + response + '. Ask a followup question \n'
  else:
    chatgpt_query += query                                                                                                                                            

  # print(chatgpt_query)
  chatgpt_query += response_format
  chatgpt_response = chat_gpt.sendUserPrompt(chatgpt_query).choices[0].message.content

  # Parse the response here
  query_for_chatbot, ground_truth = parse_chatgpt_response(chatgpt_response)
  # print(query_for_chatbot, ground_truth)

  # Querying the chatbot
  chatbot_response = prompt_chatbot(query_for_chatbot)

  # print(chatbot_response)

  # Finding Cosine similarity
  sentences = [ground_truth, chatbot_response]
  embeddings = model.encode(sentences, convert_to_tensor=True)
  cosine_scores = util.cos_sim(embeddings, embeddings)
 
  if cosine_similarity is None:
    cosine_similarity = 0.0
  cosine_similarity += cosine_scores[1][0].item()
  # print(cosine_scores, cosine_scores[1][0].item())
  
  print('query -> ', query)
  print('chatgpt_query -> ', chatgpt_query)
  print('chatgpt_response -> ', chatgpt_response)
  print('query_for_chatbot', query_for_chatbot)
  print('ground_truth', ground_truth)
  print('chatbot_response', chatbot_response)
  print('cosine_similarity', cosine_similarity)

  messages += 1

  prompt_chatgpt(query_for_chatbot, chatbot_response)

def prompt_chatbot(message):
  return client.sendUserMessage(message)


def converse(chatbotClient, llmClient):
  chatgpt_init_message = 'Imagine you are a job applicant at McDonalds, and you are supposed to converse with a hiring chatbot. Begin with asking about culture at McDonalds? Also, and what answer would you expect?'
  return prompt_chatgpt(chatgpt_init_message)

print(converse(client, chat_gpt))

messages 0
Andrew: Hi, I am Andrew. Can you tell me about the culture at McDonald's?

Olivia: Hi Andrew, our culture at McDonald's is all about teamwork, respect, and customer service. We value diversity and strive to create a welcoming environment for all our employees and customers. Is there anything else I can help you with?
query ->  Imagine you are a job applicant at McDonalds, and you are supposed to converse with a hiring chatbot. Begin with asking about culture at McDonalds? Also, and what answer would you expect?
chatgpt_query ->  Imagine you are a job applicant at McDonalds, and you are supposed to converse with a hiring chatbot. Begin with asking about culture at McDonalds? Also, and what answer would you expect?Provide your response in the first line, and an ideal response from the chatbot for the question in the next line in a Q & A format. Call yourself Andrew and the chatbot Olivia. 
chatgpt_response ->  Andrew: Hi, I am Andrew. Can you tell me about the culture at McDon

IndexError: ignored

In [None]:
cosine_scores[1][0].item()

NameError: ignored

# Cleanup

In [None]:
del client, driver

In [None]:
del chat_gpt

In [None]:
import torch

# PlayGround: Everything from here onwards is experimental and untidy

#### **Sentence Transformers**

In [None]:
# Playground; Please ignore 
%%capture
!pip3 install sentence_transformers

In [None]:
from sentence_transformers import SentenceTransformer, util
model = SentenceTransformer('all-MiniLM-L6-v2')

chatgpt_generated_response = 'A career working at a McDonalds restaurant is an opportunity to be the best at what you do where you are valued and respected. McDonalds and its independent operators like to believe the best people in the world work at McDonalds restaurants.'
mcd_chatbot_response = 'Working at McDonalds can be a unique and enjoyable experience. As the world\'s largest fast-food chain with thousands of locations worldwide, you\'ll be part of a team that serves millions of customers every day. At McDonalds, you\'ll learn valuable teamwork, communication, and customer service skills while experiencing a fast-paced, exciting work environment. Additionally, McDonalds offers various opportunities for career advancement and flexible work schedules. Would you like me to provide more information on working at McDonalds?'
# Written manually 
slightly_related_response = 'You will best in class food at McDonalds. The employees provide the best service no matter what. Food and employees are helpful great provide good support to each other.'
common_english_statement = 'Vegeta, the prince of all saiyans reaches planet earth to meet this arch-enemy, Goku. To save his friends and the planet earth from the wrath of vegeta, Goku needs to unleash his inner strenghts to fight the mightiest warrior he has ever seen.'
incorrect_response = 'Lorem ipsum dolor sit amet, consectetur adipiscing elit. Duis sodales eu enim in fringilla. Sed ligula ligula, iaculis non ex ac, commodo eleifend mi. Mauris sit amet elementum nisi, eget tristique diam. Etiam rhoncus nunc lectus. Nullam tristique a sem non dapibus. Donec sit amet dapibus odio. Nulla a risus at enim fermentum porttitor. Aliquam a nulla quis leo varius consectetur. Vivamus a eros ut velit mollis lacinia sed eu tortor. Aenean viverra tellus sed velit suscipit, quis ullamcorper ex consectetur.'


sentences = [chatgpt_generated_response, mcd_chatbot_response, slightly_related_response, common_english_statement, incorrect_response]


embeddings = model.encode(sentences, convert_to_tensor=True)
#compute the similarity scores
cosine_scores = util.cos_sim(embeddings, embeddings)
print('Similarity between ChatGPT Response and Chatbot response : ',  cosine_scores[1][0].item())
print('Similarity between ChatGPT response and partially correct answer : ', cosine_scores[1][2].item())
print('Similarity between ChatGPT response and unrelated english sentence : ', cosine_scores[1][3].item())
print('Similarity between ChatGPT response and jibberish : ', cosine_scores[1][4].item())


Similarity between ChatGPT Response and Chatbot response :  0.8050436973571777
Similarity between ChatGPT response and partially correct answer :  0.7564471960067749
Similarity between ChatGPT response and unrelated english sentence :  0.012259399518370628
Similarity between ChatGPT response and jibberish :  -0.01761563867330551


In [None]:
s = ["Definitely, Andrew. At McDonald's, we value teamwork, customer service, and a positive attitude. We strive to make every customer's experience enjoyable and efficient. We also prioritize inclusion and diversity in our workplace.", "People are at the heart of McDonald's restaurants! Come shine in an environment where you will be valued and respected."]
sentences = [chatgpt_generated_response, mcd_chatbot_response, slightly_related_response, common_english_statement, incorrect_response]


embeddings = model.encode(sentences, convert_to_tensor=True)
#compute the similarity scores
cosine_scores = util.cos_sim(embeddings, embeddings)