In [140]:
import pandas as pd
import openai
from dotenv import load_dotenv
load_dotenv()
import os
import langchain
from langchain.prompts import load_prompt
from langchain.llms import OpenAIChat
from langchain import PromptTemplate, LLMChain, FewShotPromptTemplate
import re
import sys
pd.options.mode.chained_assignment = None
from langchain.callbacks import get_openai_callback



In [101]:
RED = "\033[91m"
GREEN = "\033[92m"
RESET = "\033[0m"
BOLD = "\033[1m"

In [102]:
#load the api key
openai.api_key = os.getenv('OPENAI_KEY')
#add the key to the openai api

TOKEN_USAGE = 0
COST_PER_TOKEN = 0.002 / 1000


In [103]:
def usage():
    global TOKEN_USAGE
    global COST_PER_TOKEN
    cost = COST_PER_TOKEN * TOKEN_USAGE
    print(f"Total token usage: {TOKEN_USAGE} | Cost: ${cost:.2f}")

In [104]:
#create an empty dataframe with a column for the prompt, the token cost and a column for the response
genrations_df = pd.DataFrame(columns=['prompt', 'token_cost', 'response', 'experiment_type'])

In [105]:
#function to generate a prompt, save the prompt, token cost and response to a dataframe and save the dataframe to a csv file
def generate_prompt(prompts, experiment_name):
    response = openai.ChatCompletion.create(
        model="gpt-3.5-turbo",
        messages=prompts
    )

    usage = response['usage']['total_tokens']
    global TOKEN_USAGE
    TOKEN_USAGE += usage
    
    text_response = response['choices'][0]['message']['content']
    #add the prompt, token cost and response to the dataframe and save it
    genrations_df.loc[len(genrations_df)] = [prompts, usage, text_response, experiment_name]
    #check if a directory for the experiment_name exists in generations folder, if not create it
    if not os.path.exists(f'generations/{experiment_name}'):
        os.makedirs(f'generations/{experiment_name}')
    genrations_df.to_csv(f'generations/generations_summary.csv')


    #create a new txt file for each prompt and response and save it to the experiments_name folder
    with open(f'generations/{experiment_name}/{len(genrations_df)-1}.txt', 'w') as f:
        f.write(f"Prompt: {prompts}\n")
        f.write(f"Response: {text_response}\n")
        f.write(f"Token usage: {usage}\n")
        f.write(f"Experiment type: {experiment_name}\n")
        f.write(f"Total token usage: {TOKEN_USAGE}\n") 

    return response['choices'][0]['message']['content']

In [8]:
print(generate_prompt(test_prompt, 'harness_setup'))
calculate_total_usage(COST_PER_TOKEN)

NameError: name 'test_prompt' is not defined

In [9]:
class Prompt_Chain:
    def __init__(self, experiment_name, initial_prompt):
        self.experiment_name = experiment_name
        self.prompt = initial_prompt
    
    def user_says(self, user_input):
        self.prompt.append({ "role": "user", "content": user_input })
        return self

    def assistant_says(self, assistant_output):
        self.prompt.append({ "role": "assistant", "content": assistant_output })

    def wrap_text(self, text):
        text_to_print = ""
        if(len(text) > 200):
            for i in range(0, len(text), 200):
                text_to_print += text[i:i+200]
                text_to_print += " \n"
        print(text_to_print)
    
    def ask(self, user_text):
        self.user_says(user_text)
        response = generate_prompt(self.prompt, self.experiment_name)
        self.wrap_text(response)
        self.assistant_says(response)
    
    def refresh(self, initial_prompt):
        self.prompt = initial_prompt


### Attempts to setup a simple agent

In [124]:
prompt_chain = [
    {"role": "system", "content": "You are a sales agent for a pest control company. You are on the phone with a customer who is interested in setting up a service appointment. The customer is asking you about the different types of services you offer. Ask questions to understand their problem and then offer a solution."},
    {"role": "assistant", "content": "Hi, welcome to Better Termite. How can I help you today?"},
]

agent = Prompt_Chain('Agent Test', prompt_chain)

In [110]:
agent.ask("I have a problem with termites in my house. What can you do to help me?")




In [111]:
agent.ask("I've seen what looks like flying ants around my house. Are they termites?")

It's possible that those could be termites. We would need to do a thorough inspection to confirm whether or not you have a termite infestation. Would you like to schedule a free inspection with one of 
 our pest control experts? 



In [112]:
agent.ask("Does that cost anything?")

No, it's completely free. Our expert will come to your house and conduct a thorough examination of the property to determine if termites are present, and if so, the extent of the infestation. Once the 
y have completed their investigation, they will go over the findings with you and suggest a customized course of action based on your specific situation. Would you like to schedule an appointment for  
a free inspection? 



In [113]:
agent.ask("Yes please!")

Great! Let me take your information and schedule an appointment for you. Our termite experts will be out as soon as possible to take a look at the house and help you with a solution. In the meantime,  
I can let you know that we offer various termite treatment options based on the severity of the infestation. Our treatments include liquid treatment, fumigation, and baiting systems. Once the technici 
an has completed the inspection, they can provide you with a recommendation for the best treatment option for your situation. Would you like to learn more about any of these treatments now? 



In [84]:
agent.ask("My name is John Smith, and my phone number is 555-555-5555")




In [131]:
agent.refresh(prompt_chain)
usage()

Total token usage: 5468 | Cost: $0.01


In [126]:
agent.ask('I am interested in getting setup on a quarterly service plan. What do I need to do?')

Great! We offer several different service plans depending on your specific needs. Can you tell me a bit more about the pest problem you are experiencing? That way, I can recommend the best plan for yo 
u. 



In [127]:
agent.ask("It's more of a proactive thing. I have been seeing some ants around my house")

I understand. We have a quarterly service plan that is designed to prevent pests from entering your home. Our technicians will come to your home every quarter to spray the perimeter and any common are 
as where pests tend to enter. This service covers a wide range of pests like ants, spiders, roaches, silverfish, and more. Would you be interested in this plan? 



In [193]:
usage()

Total token usage: 0 | Cost: $0.00


### Experimenting to Create My Own Agent

In [10]:
# make sure all columns are strings
classification_data = pd.read_csv('data/message_classification.csv', dtype=str)

In [11]:
def slice_data(data, start, end, category):
    sub_df = data.iloc[start:end]
    #select only the prompt and category columns
    prompts = sub_df[['prompt', category]]
    #change the name of the category column to expected
    prompts.rename(columns={category: 'expected'}, inplace=True)
    #convert to a dictionary where each key is test1, test2 and each value is a row from the dataframe
    prompts_dict = prompts.to_dict('index')
    return prompts_dict

In [12]:
def get_examples(data, start, end, category):
    prompts_dict = slice_data(data, start, end, category)
    examples = []
    for key, value in prompts_dict.items():
        examples.append({
            "input": value['prompt'],
            "output": value['expected']
        })
    return examples

get_examples(classification_data, 11, 14, 'contains_contact_info')

[{'input': 'I want to know if you work with mice control', 'output': 'False'},
 {'input': 'Alexandria va zipcode 22303', 'output': 'True'},
 {'input': 'We are located in Gaithersburg, Md do u come our that far?',
  'output': 'True'}]

In [22]:
class TestSet:
    def __init__(self, test_set, function):
        self.test_set = test_set
        self.function = function

    def check(self, key):
        value = self.test_set[key]['prompt']
        response = self.function(value)
        if response == self.test_set[key]['expected']:
            print(GREEN + "/ PASS | Result: " + response + " | Expected: " + self.test_set[key]['expected'] + ' | Data ID: ' + str(key) + RESET)
            return 1
        else:
            print(RED + "X FAIL | Result: " + response + " | Expected: " + self.test_set[key]['expected'] + ' | Data ID: ' + str(key) + RESET)
            return 0
    
    def run(self, test_name, print_test_name=True, print_results=True, iter=1, results_loc=None):
        if print_test_name:
            print(BOLD + test_name + RESET + '\n')

        total_tokens = 0
        total_count = len(self.test_set) * iter
        correct_count = 0
        for i in range(iter):
            for key in self.test_set:
                correct_count += self.check(key)
        
        if print_results:
            print(BOLD + "Total: " + str(total_count) + " | Correct: " + str(correct_count) + " | Incorrect: " + str(total_count - correct_count) + ' | Accuracy: ' + str(correct_count/total_count) + RESET)

        if results_loc:
            #check if a file exists at results_loc called results.csv
            if os.path.isfile(results_loc + 'results.csv'):
                results = pd.read_csv(results_loc + 'results.csv')
            else:
                results = pd.DataFrame(columns=['test_name', 'total_count', 'correct_count', 'incorrect_count', 'accuracy'])
            
            new_results = pd.DataFrame([[test_name, total_count, correct_count, total_count - correct_count, correct_count/total_count]], columns=['test_name', 'total_count', 'correct_count', 'incorrect_count', 'accuracy'])
            combined = pd.concat([results, new_results])

            combined.to_csv(results_loc + 'results.csv', index=False)
            
    def __call__(self, indexes):
        for index in indexes:
            datapoint = self.test_set[index]
            print("ID: " + str(index) + " | " +  datapoint['prompt'])

In [238]:
class LLMResult:
    def __init__(self, result, tokens):
        self.result = result
        self.tokens = tokens
    
    def __value__(self):
        return self.result

#generator is a class for loading a prompt and runnning it
class Generator:
    def __init__(self, path):
        self.path = path
        self.prompt = load_prompt(path)
        self.llm = OpenAIChat(openai_api_key=os.getenv('OPENAI_KEY'))
        self.llm_chain = LLMChain(prompt=self.prompt, llm=self.llm, output_key="text")
    
    def run(self, input):
        with get_openai_callback() as cb:
            result = self.llm_chain.run(input)
            tokens = cb.total_tokens
            result = LLMResult(result, tokens)
            return result
    
    def __call__(self, input):
        return self.run(input)

#an extractor runs a function on the output of a generator to pull out the relevant information
class Extractor:
    def __init__(self, type="regex", regex=None, default="No Classification Found", keys=None):
        self.type = type
        self.regex = regex
        self.default = default
        self.keys = keys
    
    def first_regex_extractor(self, input, regex, default="No Classification Found"):
        match = re.search(regex, input)
        return_value = default
        if match:
            return_value = match.group(0)
        return return_value
    
    def natural_language_to_dict_extractor(self, input):
        #Search for every instance of a line that has a characterisitc, then a colon, then a value (ex. "Name: John")
        regex = r"(\w+): (\w+)"
        matches = re.findall(regex, input)
        #Then, add the characteristic and value to a dictionary
        dictionary = {}
        for match in matches:
            key = match[0].strip().lower()
            if key in self.keys and not self.keys == None:
                value = match[1].strip()
                if value == "None" or value == "none":
                    value = None
                dictionary[key] = value
        #Then, return the dictionary
        return dictionary


    def __call__(self, raw_text):
        if self.type == "regex":
            return self.first_regex_extractor(raw_text, self.regex, self.default)
        elif self.type == "natural_language_to_dict":
            return self.natural_language_to_dict_extractor(raw_text)
        
class GenExtract:
    def __init__(self, generator, extractor):
        self.generator = generator
        self.extractor = extractor
        self.total_tokens = 0
        self.total_runs = 0
        self.average_tokens = self.get_average_tokens()
    
    def run(self, input):
        raw_text = self.generator.run(input)
        #check if raw text is a LLMResult
        completion = raw_text
        self.total_runs += 1
        if isinstance(raw_text, LLMResult):
            completion = raw_text.result
            self.total_tokens += raw_text.tokens
        return self.extractor(completion)
    
    def prompt(self):
        print(self.generator.prompt.template)
    
    def __call__(self, input):
        return self.run(input)
    
    def get_average_tokens(self):
        if self.total_runs == 0:
            return None
        return self.total_tokens / self.total_runs

class PromptPath:
    def __init__(self, base_path, type="json"):
        self.base_path = base_path
        self.type = type
    
    def __call__(self, file):
        if(file == '.'):
            return self.base_path
        else:
            return self.base_path + file + "." + self.type


#### A possible framework for thought in a coversation with an agent

1. Classify the type of request: 
2. If augmentation is required, adapt the prompt. If information is collected, add it to the customer profile. 
3. Adjust the conversational chain
4. Return the response

Ways to categorize:

- Information Request: look up data
- Contact Information: store in contact profile
- Customer Issue Details: store in customer profile

Maybe it should be a boolean search with three tags?


Test Results: Poor performance probably do to overlapping categories and poorly defined categories. The next test will be to create a tagging series where there can be more than one classification. Each will be a boolean classsification prompt

#### Building the Contact Classifier

In [204]:
contains_contact_info_prompt = """Write True or False based on whether the response has either a phone number, email address, first name, last name, geographic address, state name, city name, municipality, zip code or country name. Some examples you should return True: 829-348-3489, 9313919321, Virginia, Tacoma, John, Smith, Kate, george@gmail.com, Josh Craig, Steve, Abdul, Willie, Betty US. Response: {customer_response}. Your response:"""
prompt = PromptTemplate(template=contains_contact_info_prompt, input_variables=["customer_response"])

prompt.save('prompts/response_is_contact_info/short-precise-add-names.json')

In [178]:
con_dir = PromptPath("prompts/response_is_contact_info/", type="json")

In [191]:
is_contact = GenExtract(
    Generator(con_dir("short_base")), 
    Extractor(regex=r"(True|False)")
)

is_contact.prompt()

Determine whether the following response contains any contact information. Write true if it contains contact info, and false if not. A response has contact information if it contains a name, phone, email or address. Format as: Contains Contact Info: *What You Determine* Response: {customer_response}


In [192]:
tests = TestSet(slice_data(classification_data, 0, 16, 'contains_contact_info'), is_contact)
tests.run("Contains Contact Info Tests", iter=3)

[1mContains Contact Info Tests[0m

[92m/ PASS | Result: False | Expected: False | Data ID: 0[0m
[92m/ PASS | Result: False | Expected: False | Data ID: 1[0m
[92m/ PASS | Result: True | Expected: True | Data ID: 2[0m
[91mX FAIL | Result: False | Expected: True | Data ID: 3[0m
[92m/ PASS | Result: False | Expected: False | Data ID: 4[0m
[91mX FAIL | Result: No Classification Found | Expected: True | Data ID: 5[0m
[92m/ PASS | Result: False | Expected: False | Data ID: 6[0m
[91mX FAIL | Result: False | Expected: True | Data ID: 7[0m
[92m/ PASS | Result: False | Expected: False | Data ID: 8[0m
[92m/ PASS | Result: False | Expected: False | Data ID: 9[0m
[92m/ PASS | Result: False | Expected: False | Data ID: 10[0m
[92m/ PASS | Result: False | Expected: False | Data ID: 11[0m
[91mX FAIL | Result: No Classification Found | Expected: True | Data ID: 12[0m
[91mX FAIL | Result: False | Expected: True | Data ID: 13[0m
[92m/ PASS | Result: False | Expected: False | 

In [152]:
#verbose prompt with examples
is_contact.generator = Generator(con_dir("verbose_examples"))
is_contact.prompt()

Determine whether the following response contains contact information for the person who sent the message. Write True if it contains contact info, and False if not. A response has contact information if it contains their name, phone, email or address info
Example:
Response: My name is John Smith, and my phone number is 555-555-5555
Contact Info: True
Response: I have a problem with termites in my house. What can you do to help me?
Contact Info: False
Response: I live in Bethesda
Contact Info: True
Response: My kid Andrian is allgeric to bees
Contact Info: False
Response: {customer_response}


In [153]:
tests = TestSet(slice_data(classification_data, 0, 16, 'contains_contact_info'), is_contact)
tests.run("Verbose Contact Info Extraction with Examples", iter=3)

[1mVerbose Contact Info Extraction with Examples[0m

[92m/ PASS | Result: False | Expected: False | Data ID: 0[0m
[92m/ PASS | Result: False | Expected: False | Data ID: 1[0m
[92m/ PASS | Result: True | Expected: True | Data ID: 2[0m
[92m/ PASS | Result: True | Expected: True | Data ID: 3[0m
[92m/ PASS | Result: False | Expected: False | Data ID: 4[0m
[92m/ PASS | Result: True | Expected: True | Data ID: 5[0m
[92m/ PASS | Result: False | Expected: False | Data ID: 6[0m
[91mX FAIL | Result: False | Expected: True | Data ID: 7[0m
[92m/ PASS | Result: False | Expected: False | Data ID: 8[0m
[92m/ PASS | Result: False | Expected: False | Data ID: 9[0m
[92m/ PASS | Result: False | Expected: False | Data ID: 10[0m
[92m/ PASS | Result: False | Expected: False | Data ID: 11[0m
[92m/ PASS | Result: True | Expected: True | Data ID: 12[0m
[92m/ PASS | Result: True | Expected: True | Data ID: 13[0m
[91mX FAIL | Result: True | Expected: False | Data ID: 14[0m
[92m/ 

In [154]:
#failing classifications
tests([7, 14])

ID: 7 | Hey George, do you guys do cockroach extermination? -Tyler Aiden
ID: 14 | Would like to schedule a WDI inspection asap for my home. I am refinancing into an FHA loan


In [159]:
# trying to improve it's ability to catch -Name
is_contact.generator = Generator(con_dir("verbose_examples_ext"))
is_contact.prompt()

Determine whether the following response contains contact information for the person who sent the message. Write True if it contains contact info, and False if not. A response has contact information if it contains their name, phone, email or address info
Example:
Response: My name is John Smith, and my phone number is 555-555-5555
True
Response: I have a problem with termites in my house. What can you do to help me?
False
Response: I live in Bethesda
True
Response: My kid Andrian is allgeric to bees
False
Response: Can you help me with an issue I am having? - Katy
True
Response: {customer_response}


In [160]:
tests = TestSet(slice_data(classification_data, 0, 16, 'contains_contact_info'), is_contact)
tests.run("Verbose Contact Info Extraction with Examples", iter=3)

[1mVerbose Contact Info Extraction with Examples[0m

[92m/ PASS | Result: False | Expected: False | Data ID: 0[0m
[92m/ PASS | Result: False | Expected: False | Data ID: 1[0m
[92m/ PASS | Result: True | Expected: True | Data ID: 2[0m
[92m/ PASS | Result: True | Expected: True | Data ID: 3[0m
[92m/ PASS | Result: False | Expected: False | Data ID: 4[0m
[91mX FAIL | Result: False | Expected: True | Data ID: 5[0m
[92m/ PASS | Result: False | Expected: False | Data ID: 6[0m
[91mX FAIL | Result: False | Expected: True | Data ID: 7[0m
[92m/ PASS | Result: False | Expected: False | Data ID: 8[0m
[92m/ PASS | Result: False | Expected: False | Data ID: 9[0m
[92m/ PASS | Result: False | Expected: False | Data ID: 10[0m
[92m/ PASS | Result: False | Expected: False | Data ID: 11[0m
[92m/ PASS | Result: True | Expected: True | Data ID: 12[0m
[92m/ PASS | Result: True | Expected: True | Data ID: 13[0m
[92m/ PASS | Result: False | Expected: False | Data ID: 14[0m
[92m

In [161]:
#failing classifications
tests([5, 7, 14])

ID: 5 | Can you put this number on my account 581-321-3411
ID: 7 | Hey George, do you guys do cockroach extermination? -Tyler Aiden
ID: 14 | Would like to schedule a WDI inspection asap for my home. I am refinancing into an FHA loan


In [164]:
# trying to identify phone, email, etc.
is_contact.generator = Generator(con_dir("verbose_examples_det"))
is_contact.prompt()

Determine whether the following response contains contact information for the person who sent the message. Write True if it contains contact info, and False if not. A response has contact information if it contains their name, a phone number, an email or address info
Example:
Response: My name is John Smith, and my phone number is 555-555-5555
True
Response: I have a problem with termites in my house. What can you do to help me?
False
Response: I live in Bethesda
True
Response: My kid Andrian is allgeric to bees
False
Response: Can you help me with an issue I am having? - Katy
True
Response: {customer_response}


In [165]:
tests = TestSet(slice_data(classification_data, 0, 16, 'contains_contact_info'), is_contact)
tests.run("Verbose Contact Info Extraction with Examples", iter=3)

[1mVerbose Contact Info Extraction with Examples[0m

[92m/ PASS | Result: False | Expected: False | Data ID: 0[0m
[92m/ PASS | Result: False | Expected: False | Data ID: 1[0m
[92m/ PASS | Result: True | Expected: True | Data ID: 2[0m
[92m/ PASS | Result: True | Expected: True | Data ID: 3[0m
[92m/ PASS | Result: False | Expected: False | Data ID: 4[0m
[91mX FAIL | Result: False | Expected: True | Data ID: 5[0m
[92m/ PASS | Result: False | Expected: False | Data ID: 6[0m
[91mX FAIL | Result: False | Expected: True | Data ID: 7[0m
[92m/ PASS | Result: False | Expected: False | Data ID: 8[0m
[92m/ PASS | Result: False | Expected: False | Data ID: 9[0m
[92m/ PASS | Result: False | Expected: False | Data ID: 10[0m
[92m/ PASS | Result: False | Expected: False | Data ID: 11[0m
[92m/ PASS | Result: True | Expected: True | Data ID: 12[0m
[91mX FAIL | Result: False | Expected: True | Data ID: 13[0m
[92m/ PASS | Result: False | Expected: False | Data ID: 14[0m
[92

In [167]:
# trying to identify phone, email, etc.
is_contact.generator = Generator(con_dir("verbose_examples_det_with_phone"))
is_contact.prompt()

Determine whether the following response contains contact information for the person who sent the message. Write True if it contains contact info, and False if not. A response has contact information if it contains their name, a phone number, an email or address info
Example:
Response: You can reach me here 555-555-5555
True
Response: I have a problem with termites in my house. What can you do to help me?
False
Response: I live in Bethesda
True
Response: My kid Andrian is allgeric to bees
False
Response: Can you help me with an issue I am having? - Katy
True
Response: {customer_response}


In [168]:
tests = TestSet(slice_data(classification_data, 0, 16, 'contains_contact_info'), is_contact)
tests.run("Verbose Contact Info Extraction with Examples", iter=3)

[1mVerbose Contact Info Extraction with Examples[0m

[92m/ PASS | Result: False | Expected: False | Data ID: 0[0m
[92m/ PASS | Result: False | Expected: False | Data ID: 1[0m
[92m/ PASS | Result: True | Expected: True | Data ID: 2[0m
[92m/ PASS | Result: True | Expected: True | Data ID: 3[0m
[92m/ PASS | Result: False | Expected: False | Data ID: 4[0m
[92m/ PASS | Result: True | Expected: True | Data ID: 5[0m
[92m/ PASS | Result: False | Expected: False | Data ID: 6[0m
[91mX FAIL | Result: False | Expected: True | Data ID: 7[0m
[92m/ PASS | Result: False | Expected: False | Data ID: 8[0m
[92m/ PASS | Result: False | Expected: False | Data ID: 9[0m
[92m/ PASS | Result: False | Expected: False | Data ID: 10[0m
[92m/ PASS | Result: False | Expected: False | Data ID: 11[0m
[92m/ PASS | Result: True | Expected: True | Data ID: 12[0m
[92m/ PASS | Result: True | Expected: True | Data ID: 13[0m
[92m/ PASS | Result: False | Expected: False | Data ID: 14[0m
[92m/

In [170]:
#experimenting with being more precise with what it means to be contact info in a shorter prompt
is_contact.generator = Generator(con_dir("spelled_out_categories"))
is_contact.prompt()

Write True or False based on whether the following has either a phone number, email address, first name, last name, geographic address, state name, city name, municipality, zip code or country name: {customer_response}. Your response:


In [171]:
tests = TestSet(slice_data(classification_data, 0, 16, 'contains_contact_info'), is_contact)
tests.run("Verbose Contact Info Extraction with Examples", iter=3)

[1mVerbose Contact Info Extraction with Examples[0m

[92m/ PASS | Result: False | Expected: False | Data ID: 0[0m
[92m/ PASS | Result: False | Expected: False | Data ID: 1[0m
[92m/ PASS | Result: True | Expected: True | Data ID: 2[0m
[92m/ PASS | Result: True | Expected: True | Data ID: 3[0m
[92m/ PASS | Result: False | Expected: False | Data ID: 4[0m
[92m/ PASS | Result: True | Expected: True | Data ID: 5[0m
[92m/ PASS | Result: False | Expected: False | Data ID: 6[0m
[91mX FAIL | Result: No Classification Found | Expected: True | Data ID: 7[0m
[92m/ PASS | Result: False | Expected: False | Data ID: 8[0m
[92m/ PASS | Result: False | Expected: False | Data ID: 9[0m
[92m/ PASS | Result: False | Expected: False | Data ID: 10[0m
[92m/ PASS | Result: False | Expected: False | Data ID: 11[0m
[92m/ PASS | Result: True | Expected: True | Data ID: 12[0m
[92m/ PASS | Result: True | Expected: True | Data ID: 13[0m
[92m/ PASS | Result: False | Expected: False | Data

In [173]:
#adjusting the prompt to say if there is any chance 
is_contact.generator = Generator(con_dir("false_positive"))
is_contact.prompt()

Write True or False based on whether the following has either a phone number, email address, first name, last name, geographic address, state name, city name, municipality, zip code or country name. If you think there is any chance that any of these categories are present, return True. Always return True or False: {customer_response}. Your response:


In [174]:
tests = TestSet(slice_data(classification_data, 0, 16, 'contains_contact_info'), is_contact)
tests.run("Contains Contact Info Tests", iter=3)

[1mContains Contact Info Tests[0m

[92m/ PASS | Result: False | Expected: False | Data ID: 0[0m
[92m/ PASS | Result: False | Expected: False | Data ID: 1[0m
[92m/ PASS | Result: True | Expected: True | Data ID: 2[0m
[92m/ PASS | Result: True | Expected: True | Data ID: 3[0m
[92m/ PASS | Result: False | Expected: False | Data ID: 4[0m
[91mX FAIL | Result: False | Expected: True | Data ID: 5[0m
[92m/ PASS | Result: False | Expected: False | Data ID: 6[0m
[92m/ PASS | Result: True | Expected: True | Data ID: 7[0m
[92m/ PASS | Result: False | Expected: False | Data ID: 8[0m
[92m/ PASS | Result: False | Expected: False | Data ID: 9[0m
[92m/ PASS | Result: False | Expected: False | Data ID: 10[0m
[92m/ PASS | Result: False | Expected: False | Data ID: 11[0m
[92m/ PASS | Result: True | Expected: True | Data ID: 12[0m
[92m/ PASS | Result: True | Expected: True | Data ID: 13[0m
[91mX FAIL | Result: True | Expected: False | Data ID: 14[0m
[92m/ PASS | Result: Fal

In [189]:
#Try think step by step
is_contact.generator = Generator(con_dir("step-by-step"))
is_contact.prompt()

Think step by step and write out your thinking for me about whether the following response has contact information in it (email or phone number or physical address or a name). Finish with True or False based on your decision Response: {customer_response}. Your response:


In [190]:
tests = TestSet(slice_data(classification_data, 0, 16, 'contains_contact_info'), is_contact)
tests.run("Step By Step Short", iter=3, results_loc=con_dir("."))

[1mStep By Step Short[0m

[92m/ PASS | Result: False | Expected: False | Data ID: 0[0m
[92m/ PASS | Result: False | Expected: False | Data ID: 1[0m
[92m/ PASS | Result: True | Expected: True | Data ID: 2[0m
[92m/ PASS | Result: True | Expected: True | Data ID: 3[0m
[92m/ PASS | Result: False | Expected: False | Data ID: 4[0m
[91mX FAIL | Result: False | Expected: True | Data ID: 5[0m
[91mX FAIL | Result: No Classification Found | Expected: False | Data ID: 6[0m
[91mX FAIL | Result: False | Expected: True | Data ID: 7[0m
[92m/ PASS | Result: False | Expected: False | Data ID: 8[0m
[92m/ PASS | Result: False | Expected: False | Data ID: 9[0m
[92m/ PASS | Result: False | Expected: False | Data ID: 10[0m
[92m/ PASS | Result: False | Expected: False | Data ID: 11[0m
[92m/ PASS | Result: True | Expected: True | Data ID: 12[0m
[92m/ PASS | Result: True | Expected: True | Data ID: 13[0m
[91mX FAIL | Result: True | Expected: False | Data ID: 14[0m
[91mX FAIL | R

Conclusions: Thinking step by step was unsucessful, increased latency and token usage. What seems to be working best is adding more examples and being precise. The best performance to token was the short and precise one where we spell out each category

In [199]:
#Adding lots of examples to precise prompt
is_contact.generator = Generator(con_dir("short-precise-examples"))
is_contact.prompt()

Write True or False based on whether the response has either a phone number, email address, first name, last name, geographic address, state name, city name, municipality, zip code or country name. Some examples you should return True: 829-348-3489, 9313919321, Virginia, Tacoma, John, Smith, Kate, george@gmail.com, US. Response: {customer_response}. Your response:


In [203]:
tests = TestSet(slice_data(classification_data, 0, 16, 'contains_contact_info'), is_contact)
tests.run("Fixed: Short Precise with Examples", iter=3, results_loc=con_dir("."))

[1mFixed: Short Precise with Examples[0m

[92m/ PASS | Result: False | Expected: False | Data ID: 0[0m
[92m/ PASS | Result: False | Expected: False | Data ID: 1[0m
[92m/ PASS | Result: True | Expected: True | Data ID: 2[0m
[92m/ PASS | Result: True | Expected: True | Data ID: 3[0m
[92m/ PASS | Result: False | Expected: False | Data ID: 4[0m
[92m/ PASS | Result: True | Expected: True | Data ID: 5[0m
[92m/ PASS | Result: False | Expected: False | Data ID: 6[0m
[91mX FAIL | Result: False | Expected: True | Data ID: 7[0m
[92m/ PASS | Result: False | Expected: False | Data ID: 8[0m
[92m/ PASS | Result: False | Expected: False | Data ID: 9[0m
[92m/ PASS | Result: False | Expected: False | Data ID: 10[0m
[92m/ PASS | Result: False | Expected: False | Data ID: 11[0m
[92m/ PASS | Result: True | Expected: True | Data ID: 12[0m
[92m/ PASS | Result: True | Expected: True | Data ID: 13[0m
[92m/ PASS | Result: False | Expected: False | Data ID: 14[0m
[92m/ PASS | Res

In [205]:
#trying to correct name issues by adding more names
is_contact.generator = Generator(con_dir("short-precise-add-names"))
is_contact.prompt()

Write True or False based on whether the response has either a phone number, email address, first name, last name, geographic address, state name, city name, municipality, zip code or country name. Some examples you should return True: 829-348-3489, 9313919321, Virginia, Tacoma, John, Smith, Kate, george@gmail.com, Josh Craig, Steve, Abdul, Willie, Betty US. Response: {customer_response}. Your response:


In [206]:
#run it 
tests = TestSet(slice_data(classification_data, 0, 16, 'contains_contact_info'), is_contact)
tests.run("Adding more name examples to precise", iter=3, results_loc=con_dir("."))

[1mAdding more name examples to precise[0m

[92m/ PASS | Result: False | Expected: False | Data ID: 0[0m
[92m/ PASS | Result: False | Expected: False | Data ID: 1[0m
[92m/ PASS | Result: True | Expected: True | Data ID: 2[0m
[92m/ PASS | Result: True | Expected: True | Data ID: 3[0m
[92m/ PASS | Result: False | Expected: False | Data ID: 4[0m
[92m/ PASS | Result: True | Expected: True | Data ID: 5[0m
[92m/ PASS | Result: False | Expected: False | Data ID: 6[0m
[91mX FAIL | Result: False | Expected: True | Data ID: 7[0m
[92m/ PASS | Result: False | Expected: False | Data ID: 8[0m
[92m/ PASS | Result: False | Expected: False | Data ID: 9[0m
[92m/ PASS | Result: False | Expected: False | Data ID: 10[0m
[92m/ PASS | Result: False | Expected: False | Data ID: 11[0m
[92m/ PASS | Result: True | Expected: True | Data ID: 12[0m
[92m/ PASS | Result: True | Expected: True | Data ID: 13[0m
[92m/ PASS | Result: False | Expected: False | Data ID: 14[0m
[92m/ PASS | R

#### Building a Informational Question Classifier

This classifier creates a boolean value to determine whether there is a question in the message. This helps the model determine whether it needs to query the vectorstore for specific policies.

In [208]:
contains_contact_info_prompt = """Write True or False based on whether the following message contains a question. Message: {customer_response}. Your response:"""
prompt = PromptTemplate(template=contains_contact_info_prompt, input_variables=["customer_response"])

prompt.save('prompts/response_is_question/initial_short_language_clarified.json')

In [24]:
que_dir = PromptPath("prompts/response_is_question/", type="json")

In [29]:
is_question = GenExtract(
    Generator(que_dir("initial_short")),
    Extractor(regex=r"(True|False)")
)

is_question.prompt()

Write True or False based on whether the following response contains a question. Response: {customer_response}. Your response:


In [32]:
tests = TestSet(slice_data(classification_data, 0, 16, 'contains_info_question'), is_question)
tests.run("Contains a Question Base Case", iter=3, results_loc=que_dir("."))

[1mContains a Question Base Case[0m

[92m/ PASS | Result: True | Expected: True | Data ID: 0[0m
[91mX FAIL | Result: False | Expected: True | Data ID: 1[0m
[92m/ PASS | Result: False | Expected: False | Data ID: 2[0m
[92m/ PASS | Result: False | Expected: False | Data ID: 3[0m
[92m/ PASS | Result: False | Expected: False | Data ID: 4[0m
[92m/ PASS | Result: False | Expected: False | Data ID: 5[0m
[92m/ PASS | Result: True | Expected: True | Data ID: 6[0m
[92m/ PASS | Result: True | Expected: True | Data ID: 7[0m
[92m/ PASS | Result: False | Expected: False | Data ID: 8[0m
[92m/ PASS | Result: False | Expected: False | Data ID: 9[0m
[92m/ PASS | Result: True | Expected: True | Data ID: 10[0m
[92m/ PASS | Result: True | Expected: True | Data ID: 11[0m
[92m/ PASS | Result: False | Expected: False | Data ID: 12[0m
[92m/ PASS | Result: True | Expected: True | Data ID: 13[0m
[92m/ PASS | Result: False | Expected: False | Data ID: 14[0m
[92m/ PASS | Result: Tr

In [34]:
tests([0, 1])

ID: 0 | I have a problem with termites in my house. What can you do to help me?
ID: 1 | Do you offer green pest control services?


That probably should have worked better. We are going to refine this by making the prompt more detailed first before adding examples.

In [36]:
is_question.generator = Generator(que_dir("initial_short_language_clarified"))
is_question.prompt()

Write True or False based on whether the following message contains a question. Message: {customer_response}. Your response:


In [37]:
tests = TestSet(slice_data(classification_data, 0, 16, 'contains_info_question'), is_question)
tests.run("Contains a Question with Clearer Difference between Output vs. Input", iter=3, results_loc=que_dir("."))

[1mContains a Question Base Case[0m

[92m/ PASS | Result: True | Expected: True | Data ID: 0[0m
[92m/ PASS | Result: True | Expected: True | Data ID: 1[0m
[92m/ PASS | Result: False | Expected: False | Data ID: 2[0m
[92m/ PASS | Result: False | Expected: False | Data ID: 3[0m
[92m/ PASS | Result: False | Expected: False | Data ID: 4[0m
[92m/ PASS | Result: False | Expected: False | Data ID: 5[0m
[92m/ PASS | Result: True | Expected: True | Data ID: 6[0m
[92m/ PASS | Result: True | Expected: True | Data ID: 7[0m
[92m/ PASS | Result: False | Expected: False | Data ID: 8[0m
[92m/ PASS | Result: False | Expected: False | Data ID: 9[0m
[92m/ PASS | Result: True | Expected: True | Data ID: 10[0m
[92m/ PASS | Result: True | Expected: True | Data ID: 11[0m
[92m/ PASS | Result: False | Expected: False | Data ID: 12[0m
[92m/ PASS | Result: True | Expected: True | Data ID: 13[0m
[92m/ PASS | Result: False | Expected: False | Data ID: 14[0m
[92m/ PASS | Result: Tru

**Conclusion**: It turns out that determining whether a prompt has a question is a very simple task for this LLM. I won't add examples in these early tests because it seems like over kill with 100% accuracy after just three iterations.

#### Determining whether there is useful customer data in the message to help build a better understanding of the customer's issue

In [48]:
contains_contact_info_prompt = """The following message is from a customer speaking to a sales agent about a pest control program. Respond True or False based on whether the message contains useful information about the customer's issue, such as issue about the severity of the issue, the target pest, where they are seeing the issue, the size or type of home they are in, access details or information about how they want the service to be be completed. Contact information should not be considered to be information about the issue. Message: {customer_response}. Your response:"""
prompt = PromptTemplate(template=contains_contact_info_prompt, input_variables=["customer_response"])

prompt.save('prompts/response_is_customer_issue/base_prompt_detailed.json')

In [41]:
is_dir = PromptPath("prompts/response_is_customer_issue/", type="json")

In [44]:
is_issue = GenExtract(
    Generator(is_dir("base_prompt")),
    Extractor(regex=r"(True|False)")
)

is_issue.prompt()

The following message is from a customer speaking to a sales agent about a pest control program. Respond True or False based on whether the message contains useful information about the customer's issue, such as issue about the severity of the issue, the target pest, the size or type of home they are in, access details or information about how they want the service to be be completed. Message: {customer_response}. Your response:


In [46]:
tests = TestSet(slice_data(classification_data, 0, 16, 'contains_customer_issue_details'), is_issue)
tests.run("Base Prompt for Customer Issue ID", iter=3, results_loc=is_dir("."))

[1mBase Prompt for Customer Issue ID[0m

[92m/ PASS | Result: True | Expected: True | Data ID: 0[0m
[91mX FAIL | Result: True | Expected: False | Data ID: 1[0m
[92m/ PASS | Result: False | Expected: False | Data ID: 2[0m
[92m/ PASS | Result: False | Expected: False | Data ID: 3[0m
[91mX FAIL | Result: False | Expected: True | Data ID: 4[0m
[92m/ PASS | Result: False | Expected: False | Data ID: 5[0m
[92m/ PASS | Result: False | Expected: False | Data ID: 6[0m
[92m/ PASS | Result: True | Expected: True | Data ID: 7[0m
[92m/ PASS | Result: True | Expected: True | Data ID: 8[0m
[91mX FAIL | Result: False | Expected: True | Data ID: 9[0m
[92m/ PASS | Result: True | Expected: True | Data ID: 10[0m
[92m/ PASS | Result: True | Expected: True | Data ID: 11[0m
[92m/ PASS | Result: False | Expected: False | Data ID: 12[0m
[92m/ PASS | Result: False | Expected: False | Data ID: 13[0m
[92m/ PASS | Result: True | Expected: True | Data ID: 14[0m
[92m/ PASS | Result: 

In [47]:
#checking failing classifications
tests([1, 4, 9, 13])

ID: 1 | Do you offer green pest control services?
ID: 4 | I am seeing them in the kitchen. They've been there for a few days now.
ID: 9 | Schedule extermination appointment
ID: 13 | We are located in Gaithersburg, Md do u come our that far?


In [49]:
is_issue.generator = Generator(is_dir("base_prompt_detailed"))
is_issue.prompt()

The following message is from a customer speaking to a sales agent about a pest control program. Respond True or False based on whether the message contains useful information about the customer's issue, such as issue about the severity of the issue, the target pest, where they are seeing the issue, the size or type of home they are in, access details or information about how they want the service to be be completed. Contact information should not be considered to be information about the issue. Message: {customer_response}. Your response:


In [50]:
tests = TestSet(slice_data(classification_data, 0, 16, 'contains_customer_issue_details'), is_issue)
tests.run("Revised to give more detail about how to treat locations", iter=3, results_loc=is_dir("."))

[1mRevised to give more detail about how to treat locations[0m

[92m/ PASS | Result: True | Expected: True | Data ID: 0[0m
[91mX FAIL | Result: True | Expected: False | Data ID: 1[0m
[92m/ PASS | Result: False | Expected: False | Data ID: 2[0m
[92m/ PASS | Result: False | Expected: False | Data ID: 3[0m
[92m/ PASS | Result: True | Expected: True | Data ID: 4[0m
[92m/ PASS | Result: False | Expected: False | Data ID: 5[0m
[92m/ PASS | Result: False | Expected: False | Data ID: 6[0m
[92m/ PASS | Result: True | Expected: True | Data ID: 7[0m
[92m/ PASS | Result: True | Expected: True | Data ID: 8[0m
[91mX FAIL | Result: False | Expected: True | Data ID: 9[0m
[92m/ PASS | Result: True | Expected: True | Data ID: 10[0m
[92m/ PASS | Result: True | Expected: True | Data ID: 11[0m
[92m/ PASS | Result: False | Expected: False | Data ID: 12[0m
[92m/ PASS | Result: False | Expected: False | Data ID: 13[0m
[92m/ PASS | Result: True | Expected: True | Data ID: 14[0m


In [52]:
tests([1, 9])

ID: 1 | Do you offer green pest control services?
ID: 9 | Schedule extermination appointment


**Conclusion**: Stopping Testing at 87% accuracy because the failing causes are not really clear label-wise anyway so I think this module is working well enough to get accurate information.

#### Creating a prompt to extra contact information from a message

In [242]:
contact_prompt = """Summarize the phone number, email, first name, last name, street address, city, state, and zip code of the person who sent the message. 
Example Message: My name is John Smith. My phone number is 555-555-5555. My email is john.smith@gmail.com
Your Response:
First_Name: John
Last_Name: Smith
Phone_Number: 555-555-5555
Email: john.smith@gmail.com
Street_Address: None
City: None
State: None
Zip: None
Message: {customer_response}. 
Your response:"""
prompt = PromptTemplate(template=contact_prompt, input_variables=["customer_response"])
prompt.save('prompts/contact_extractor/base_prompt.json')


In [243]:
extractor = Extractor(type="natural_language_to_dict", keys=["first_name", "last_name", "phone_number", "email", "street_address", "city", "state", "zip"])

In [244]:
#example 1 
messasge = "We are located in Gaithersburg, Md do u come our that far?"
contact_generator = Generator("prompts/contact_extractor/base_prompt.json")
result = contact_generator.run(messasge)
print("Token Usage: ", result.tokens)
print("Output: ", result.result)

Token Usage:  203
Output:  First_Name: None
Last_Name: None
Phone_Number: None
Email: None
Street_Address: None
City: Gaithersburg
State: Md
Zip: None
Message: We are located in Gaithersburg, Md do u come our that far?.


In [240]:
#example 2
message = "Hey George, do you guys do cockroach extermination? -Tyler Aiden"
contact_generator = Generator("prompts/contact_extractor/base_prompt.json")
result = contact_generator.run(message)
print("Token Usage: ", result.tokens)
print("Output: ", result.result)

Token Usage:  196
Output:  First_Name: Tyler
Last_Name: Aiden
Phone Number: None
Email: None
Street_Address: None
City: None
State: None
Zip: None
Message: Hey George, do you guys do cockroach extermination?


In [220]:
#example 3
message = "Alexandria va zipcode 22303"
contact_generator = Generator("prompts/contact_extractor/base_prompt.json")
result = contact_generator.run(message)
print("Token Usage: ", result.tokens)
print("Output: ", result.result)

Token Usage:  174
Output:  

First Name: None
Last Name: None
Phone Number: None
Email: None
Street Address: None
City: Alexandria
State: VA
Zip Code: 22303


In [246]:
get_contact = GenExtract(
    Generator("prompts/contact_extractor/base_prompt.json"),
    Extractor(type="natural_language_to_dict", keys=["first_name", "last_name", "phone_number", "email", "street_address", "city", "state", "zip"])
)

get_contact.prompt()

Summarize the phone number, email, first name, last name, street address, city, state, and zip code of the person who sent the message. 
Example Message: My name is John Smith. My phone number is 555-555-5555. My email is john.smith@gmail.com
Your Response:
First_Name: John
Last_Name: Smith
Phone_Number: 555-555-5555
Email: john.smith@gmail.com
Street_Address: None
City: None
State: None
Zip: None
Message: {customer_response}. 
Your response:


In [248]:
get_contact('I am 123 ABC Elm Way, Gaithersburg, MD 20878')

{'first_name': None,
 'last_name': None,
 'phone_number': None,
 'email': None,
 'street_address': '123',
 'city': 'Gaithersburg',
 'state': 'MD',
 'zip': '20878'}

#### Building a pool to collect contact information from messages

In [137]:
class NaturalLanguagePool:
    def __init__(self, data, description=""):
        self.data = data
        self.description = description
        self._natural_language = self.natural_language()
        self.dict = self.get_as_dict()
        self._is_complete = self.check_is_complete()
    
    def natural_language(self):
        pool = ""
        for item in self.data:
            if item.is_empty():
                pool += f"{item.name}: ?\n"
            else:
                pool += f"{item.name}: {item.value}\n"
        return pool
    
    def property_exists(self, name):
        for item in self.data:
            if item.name == name:
                return True
        return False
    
    def update(self, data):
        #create a property if the property doesn't exist
        for i, prop in enumerate(data):
            if not self.property_exists(prop.name):
                self.data.append(prop)
            else:
                if prop.can_override:
                    #remove the property and add the new one
                    self.data.remove(self.data[i+1])
                    self.data.append(prop)
    
    def get_missing_properties_for_context(self, verbose=False, use_examples=False):
        joiner = "; "
        if not verbose and not use_examples:
            joiner = ", "
        context_addition = []
        for item in self.data:
            missing_string = ""
            if item.required and item.is_empty():
                missing_string += f"{item.name}"
                if verbose:
                    missing_string += ", "
                    missing_string += item.description
                if use_examples:
                    if len(item.examples) > 0:
                        examples = ", ".join(item.examples)
                        if verbose:
                            missing_string += ","
                        missing_string += f" (ex. {examples})"
                    else:
                        missing_string += " "
                context_addition.append(missing_string)
        return joiner.join(context_addition)
    
    def get_as_dict(self):
        return {item.name: item.value for item in self.data}
    
    def check_is_complete(self):
        for item in self.data:
            if item.required and item.is_empty():
                return False
        return True

class Property:
    def __init__(self, name, value, description, can_override=True, required=False, examples=[]):
        self.name = name
        self.value = value
        self.description = description
        self.can_override = can_override
        self.examples = examples
        self.required = required
    
    def __str__(self):
        return f"{self.name}: {self.value}"
    
    def __repr__(self):
        return f"{self.name}: {self.value}"

    def is_empty(self):
        if self.value == "" or self.value == None:
            return True

In [97]:
#create a profile object with data that is a list of properties
first_name = Property("First Name", None, "The first name of the messenger", required=True, examples=['John', 'Jane'])
last_name = Property("Last Name", None, "The last name of the messenger", required=True, examples=['Smith', 'Doe'])
phone = Property("Phone", None, "The phone number of the person", required=True,  examples=['571-431-9531', '3133010343'])
email = Property("Email", None, "The email of the person", required=True, examples=['timmie@yahoo.com'])

profile = NaturalLanguagePool([first_name, last_name, phone, email], "A profile of the person who sent the message")

print(profile._natural_language)

print(profile.get_missing_properties_for_context(verbose=True, use_examples=True))

First Name: ?
Last Name: ?
Phone: ?
Email: ?

First Name, The first name of the messenger, (ex. John, Jane); Last Name, The last name of the messenger, (ex. Smith, Doe); Phone, The phone number of the person, (ex. 571-431-9531, 3133010343); Email, The email of the person, (ex. timmie@yahoo.com)


In [98]:
#write the question
class Prompt_Chain:
    def __init__(self, experiment_name, initial_prompt):
        self.experiment_name = experiment_name
        self.prompt = initial_prompt
    
    def user_says(self, user_input):
        self.prompt.append({ "role": "user", "content": user_input })
        return self

    def assistant_says(self, assistant_output):
        self.prompt.append({ "role": "assistant", "content": assistant_output })

    def wrap_text(self, text):
        text_to_print = ""
        if(len(text) > 200):
            for i in range(0, len(text), 200):
                text_to_print += text[i:i+200]
                text_to_print += " \n"
        print(text_to_print)
    
    def ask(self, user_text):
        self.user_says(user_text)
        response = generate_prompt(self.prompt, self.experiment_name)
        self.wrap_text(response)
        self.assistant_says(response)
    
    def refresh(self, initial_prompt):
        self.prompt = initial_prompt

In [119]:
prompt_chain = [
    {"role": "user", "content": "You are a sales agent for a pest control company. You are on the phone with a customer who is interested in setting up a service appointment. The customer is asking you about the different types of services you offer. Ask questions to understand their problem and then offer a solution. Before answering their questions, you must get their contact info. Keep the conversation going until you have their contact info. Try to avoid asking too many questions at the same time"},
    {"role": "assistant", "content": "Hi, welcome to Better Termite. How can I help you today?"},
]

contact_collector = Prompt_Chain("contact_collector", prompt_chain)


In [120]:
#ask if the question has contact info. 
contact_collector.ask("Here is the following contact information you must still collect: " + profile.get_missing_properties_for_context(verbose=True, use_examples=True))

#if it does run extract contact info

#merge the contact info with the profile





In [121]:
contact_collector.ask('My name is George Smith. My phone number is 703-319-3981 and email is georgeschulz33@gmail.com')




In [194]:
#pull in the winners from experimentation above

#prompt directories
con_dir = PromptPath('prompts/response_is_contact_info/', type='json')
iss_dir = PromptPath('prompts/response_is_customer_issue/', type='json')
que_dir = PromptPath('prompts/response_is_question/', type='json')

t_f = Extractor(regex=r"(True|False)")

is_contact = GenExtract(Generator(con_dir('short-precise-add-names')), t_f)
is_issue = GenExtract(Generator(iss_dir('base_prompt_detailed')), t_f)
is_question = GenExtract(Generator(que_dir('initial_short_language_clarified')), t_f)

In [195]:
def prepare_message(message):
    total_tokens = 0

    #Run classifiers to determine intermediate steps
    contact = is_contact(message) == 'True'
    issue = is_issue(message) == 'True'
    question = is_question(message) == 'True'

    #add the tokens used
    total_tokens += is_contact.total_tokens + is_issue.total_tokens + is_question.total_tokens
    print(total_tokens)

    context = ""
    
    #If it is a contact, determine what we can get from it
    
    context += " "
    return context + message

prepare_message('Do you offer green pest control? I am seeing roaches in my kitchen. I have a 2 year old and I am concerned about the chemicals.')

False
True. The message provides information about the target pest and the customer's concern about using chemicals due to having a 2 year old.


True.
363


' Do you offer green pest control? I am seeing roaches in my kitchen. I have a 2 year old and I am concerned about the chemicals.'

# To do
- Integate vector store!
- Get more test data
- Experiment with better ways to extract information with high reliability (prompt adjustments or a seperate prompt puller?)
- Explore token tracking at the Agent level?