In [24]:
import pandas as pd
import openai
from dotenv import load_dotenv
load_dotenv()
import os
from langchain.prompts import load_prompt
from langchain import PromptTemplate
from langchain.llms import OpenAIChat
import sys
pd.options.mode.chained_assignment = None
from langchain.callbacks import get_openai_callback
from langchain.embeddings.openai import OpenAIEmbeddings
from langchain.vectorstores import Chroma
from langchain.text_splitter import CharacterTextSplitter
from langchain.document_loaders import TextLoader
from langchain import PromptTemplate, LLMChain, VectorDBQA
from langchain.chains import TransformChain, SequentialChain
from langchain.document_loaders.csv_loader import CSVLoader
import re
import json

from langchain.chat_models import ChatOpenAI
from langchain.schema import (
    AIMessage,
    SystemMessage
)
import asyncio


import sys
sys.path.append('./modules/')
openai.api_key = os.getenv('OPENAI_KEY')
key = os.getenv('OPENAI_KEY')

from Generator import Generator
from Extractor import Extractor
from PromptPath import PromptPath
from GenExtract import GenExtract
from Property import Property
from NaturalLanguagePool import NaturalLanguagePool
from DynamicChatHistory import DynamicChatHistory
from Conversation import Conversation
from ConversationScript import ConversationScript
from ConversationScriptStage import ConversationScriptStage
from show_chat_log import show_chat_log
from is_question import is_question
from customer_profile import customer_profile

times_embedding_run = 10

RED = "\033[91m"
GREEN = "\033[92m"
RESET = "\033[0m"
BOLD = "\033[1m"

In [25]:
chat = ChatOpenAI(openai_api_key=key)

### Create the embeddings

In [26]:
#load in the policies to the vectorstore
situation_loaders = []
situation_docs = []

#create a list with the names of the files to be loaded in the data/policies folder
for file in os.listdir('data/situational-experience'):
    if file.endswith('.txt'):
        loader = TextLoader('data/situational-experience/' + file)
        situation_loaders.append(loader)

for loader in situation_loaders:
    situation_docs.extend(loader.load())

if times_embedding_run < 20:
    text_splitter = CharacterTextSplitter(chunk_size=300, chunk_overlap=0)
    documents = text_splitter.split_documents(situation_docs)

    siutation_embeddings = OpenAIEmbeddings(openai_api_key=key)
    situational_experience = Chroma.from_documents(documents, siutation_embeddings)
    times_embedding_run += 1


"Embeddings generated into policies variable"

Running Chroma using direct local API.
Using DuckDB in-memory for database. Data will be transient.
Exiting: Cleaning up .chroma directory


'Embeddings generated into policies variable'

In [38]:
#iterate over the contents of data/services and load them into the vectorstore
service_loaders = []
service_docs = []

#create a list with the names of the files to be loaded in the data/services folder
for file in os.listdir('data/services'):
    if file.endswith('.json'):
        loader = TextLoader('data/services/' + file)
        service_loaders.append(loader)

for loader in service_loaders:
    service_docs.extend(loader.load())

service_embeddings = OpenAIEmbeddings(openai_api_key=key)
service_description_search = Chroma.from_documents(service_docs, service_embeddings)
    

Running Chroma using direct local API.
Using DuckDB in-memory for database. Data will be transient.


In [27]:
all_services_df = pd.read_csv('data/services_search - Sheet1.csv')

class Programs:
    def __init__(self, services):
        self.services = services

    def get_service(self, name):
        for service in self.services:
            if service['Name'] == name:
                return service
        return None
    
class Program:
    def __init__(self, dict):
        self.dict = dict

#create a list of dicts out of the contents of all_services_df
all_services = []
for index, row in all_services_df.iterrows():
    all_services.append(row.to_dict())

all_services_list = Programs(all_services)

#### **CHAIN**: Retrieve relevant experiences from the vectorstore

In [39]:
def get_situational_awareness(inputs: dict) -> dict:
    print(GREEN + 'STEP: Getting situational experiences' + RESET)
    customer_issue = inputs["customer_issue"]
    knowledge = situational_experience.similarity_search(customer_issue, 5)
    knowledge = [message.page_content for message in knowledge]
    print(RED + 'Situational experiences found: ', knowledge, RESET) 
    print(GREEN + 'STEP: Analyzing situational experiences' + RESET)       
    return { "situational_experience": knowledge }

retrieve_situational_awareness = TransformChain(transform=get_situational_awareness, input_variables=["customer_issue"], output_variables=["situational_experience"])

#### **CHAIN**: Analyze the experiences we've had

In [40]:
template_message = """Your goal is to identify how severe this issue is. Think step by step about your answer and use the information you provided to make you answer. After your analysis, please rate the severity as Low, Medium or High.
Customer Issue: {customer_issue}
Property Type: {property_type}
Below are some possible practical experiences from our technicians that may be useful. Some of it may not be relevant to your answer.
{situational_experience}
----
Your Analysis:
"""

template = PromptTemplate(template=template_message, input_variables=["customer_issue", "situational_experience", "property_type"])
analyze_issue_chain = LLMChain(llm=chat, prompt=template, output_key="siutation_analysis")

In [41]:
def get_relevant_services(inputs: dict) -> dict:
    analysis = inputs["siutation_analysis"]
    print(GREEN + 'STEP: Getting relevant services' + RESET)
    service_search_results = service_description_search.similarity_search(analysis, 3)
    #extract the services as a dict from the search results (they are json stored in .page_content)
    service_search_results = [json.loads(result.page_content) for result in service_search_results]
    detailed_services = []
    for service in service_search_results:
        service_name = service['Name']
        detailed_service = all_services_list.get_service(service_name)
        detailed_services.append(Program(detailed_service))

    relevant_services_text = ""
    for service in detailed_services:
        relevant_services_text += service.dict['Name'] + ": "
        relevant_services_text += service.dict['Description'] + "\n"
        relevant_services_text += "Best for: " + service.dict['Best for'] + "\n"
        relevant_services_text += "Covers: " + service.dict['Covered Pests'] + "\n"
        relevant_services_text += "Protection: " + service.dict['Level of Protection'] + "\n"
        relevant_services_text += "Price: " + service.dict['Cost'] + "\n\n"
    
    print(RED + 'STEP: Completed getting services: ', detailed_services, RESET)
    print(GREEN + 'STEP: Begin parsing the formulas' + RESET)

    for service in detailed_services:
        recurring_formula = service.dict['Recurring Formula']
        recurring_formula = recurring_formula.replace("{square_footage}", str(inputs["square_footage"]))
        recurring_formula = recurring_formula.replace("{target}", str(inputs["target"]))
        recurring_formula = recurring_formula.replace("{severity}", str(inputs["severity"]))
        recurring_formula = recurring_formula.replace("{acres}", str(inputs["acres"]))
        service.dict['Recurring Formula'] = recurring_formula

        initial_formula = service.dict['Initial Formula']
        initial_formula = initial_formula.replace("{square_footage}", str(inputs["square_footage"]))
        initial_formula = initial_formula.replace("{target}", str(inputs["target"]))
        initial_formula = initial_formula.replace("{severity}", str(inputs["severity"]))
        initial_formula = initial_formula.replace("{acres}", str(inputs["acres"]))
        service.dict['Initial Formula'] = initial_formula
    
    print(GREEN + 'STEP: Completed parsing the formulas' + RESET)

    return { "relevant_services_dict": detailed_services, "relevant_services_text": relevant_services_text }

get_relevant_services_chain = TransformChain(transform=get_relevant_services, input_variables=["siutation_analysis"], output_variables=["relevant_services_dict", "relevant_services_text"])

In [50]:
from langchain.chains import APIChain
import nest_asyncio


calculator_template = """Solve this excel formula. Briefly show your thinking for each step. Then write the answer as:
Answer: *answer*
Example:
=if(1=2, 1, 2) + 1
Thinking: 1=2 is false so the answer is 2 + 1
Answer: 3
Formula:
{formula}
"""

#majority vote will take in a list of results and return the most common answer for each type and name (ex. item: {'name': 'Service 1', 'answer': '5', 'type': 'recurring', 'iter_num': 0})
def majority_vote(results):
    #group by name and type
    grouped_results = {}
    for result in results:
        name = result['name']
        type = result['type']
        if name not in grouped_results:
            grouped_results[name] = {}
        if type not in grouped_results[name]:
            grouped_results[name][type] = []
        grouped_results[name][type].append(result)
        
    #find the most common answer for each name and type
    majority_vote_results = []
    for name in grouped_results:
        for type in grouped_results[name]:
            answers = [result['answer'] for result in grouped_results[name][type]]
            answer = max(set(answers), key=answers.count)
            majority_vote_results.append({ 'name': name, 'answer': answer, 'type': type })
    
    return majority_vote_results



async def async_calculate_price(name, chain, formula, type, iter=1, timeout=4):
    max_retries = 3
    retries = 0
    answer = None
    
    while retries < max_retries:
        try:
            resp = await asyncio.wait_for(chain.arun(formula=formula), timeout=timeout)
            print(BOLD, resp, RESET)
            # parse the Answer: *answer* from the response using regex
            answer = re.search(r'Answer: (.*)', resp)
            
            #if there is a match, break the loop
            if answer is not None:
                answer = answer.group(1)
                break
        except asyncio.TimeoutError:
            print(RED + f"STEP: Timeout reached for {formula} after {timeout} seconds (Retry {retries + 1})" + RESET)
            retries += 1
    
    if answer is None:
        print(RED + f"STEP: Failed to parse answer from response after {max_retries} retries" + RESET)
    
    return { "name": name, "answer": answer, "type": type, "iter_num": iter }

async def generate_recurring_concurrently(inputs: dict) -> dict:
    print(GREEN + 'STEP: Generating recurring price' + RESET)
    services = inputs["relevant_services_dict"]
    tasks = []
    for service in services:
        name = service.dict['Name']
        recurring_formula = service.dict['Recurring Formula']
        intial_formula = service.dict['Initial Formula']
        calculator_prompt = PromptTemplate(
            template=calculator_template,
            input_variables=["formula"]
        )
        chain = LLMChain(llm=chat, prompt=calculator_prompt, output_key="recurring_logic")
        #majority vote
        for n in range(1, 6):
            recurring_task = asyncio.create_task(async_calculate_price(name, chain, recurring_formula, 'recurring', iter=n))
            initial_task = asyncio.create_task(async_calculate_price(name, chain, intial_formula, 'initial', iter=n))
            tasks.append(recurring_task)
            tasks.append(initial_task)

    print(GREEN + 'All Tasks Added to Queue' + RESET)
    results = await asyncio.gather(*tasks)
    print(GREEN, 'All Tasks Completed' + RESET)
    print(GREEN, 'Holding majority vote' + RESET)
    results = majority_vote(results)
    print(GREEN, 'Majority vote complete' + RESET)

    for service in services:
        name = service.dict['Name']
        for result in results:
            if name == result['name'] and result['type'] == 'recurring':
                service.dict['Recurring Price'] = result['answer']
            if name == result['name'] and result['type'] == 'initial':
                service.dict['Initial Price'] = result['answer']
    return { "complete_services": services }

def run(inputs):
    loop = asyncio.get_event_loop()
    nest_asyncio.apply(loop)
    response = loop.run_until_complete(generate_recurring_concurrently(inputs))
    return { "complete_services": response["complete_services"] }


calculate_prices_chain = TransformChain(
    transform=run, 
    input_variables=["relevant_services_dict"], 
    output_variables=["complete_services"]
)

# result = calculate_prices_chain.run({ "relevant_services_dict": [
#     Program({ "Name": "Service 1", "Recurring Formula": "=2+3", "Outward Facing": "All in One Program", "Initial Formula": "=100+3" }),
#     Program({ "Name": "Service 2", "Recurring Formula": "=10+4", "Outward Facing": "Castle Program", "Initial Formula": "=100+3" }),
# ]})

In [43]:
initial_calculator_template = """Solve this excel formula. Briefly show your thinking for each step. Here are the inputs to the formula. 
Then write the answer as:
Answer: *answer*
{initial_formula_parsed}
"""

initial_calculator_prompt = PromptTemplate(
    template=initial_calculator_template,
    input_variables=["initial_formula_parsed"]
)

initial_calculator_chain = LLMChain(llm=chat, prompt=initial_calculator_prompt, output_key="initial_logic")

In [44]:
def parse_formula_thinking(inputs: dict) -> dict:
    recurring_matches = re.search(r"Answer:\s+(\$?\d+(\.\d{2})?)", inputs["recurring_logic"])
    initial_matches = re.search(r"Answer:\s+(\$?\d+(\.\d{2})?)", inputs["initial_logic"])
    
    if recurring_matches:
        #replace any $ signs
        recurring_matches = recurring_matches.group(1).replace("$", "")
    else:
        recurring_matches = "Error calculating recurring price"
    
    if initial_matches:
        #replace any $ signs
        initial_matches = initial_matches.group(1).replace("$", "")
    else:
        initial_matches = "Error calculating initial price"

    return {"recurring_price": recurring_matches, "initial_price": initial_matches}

parse_formula_thinking_chain = TransformChain(transform=parse_formula_thinking, input_variables=["recurring_logic", "initial_logic"], output_variables=["recurring_price", "initial_price"])

In [45]:
def prepare_offer(inputs: dict) -> dict:
    offer = inputs["offers"][0]
    offer["recurring_price"] = inputs["recurring_price"]
    offer["initial_price"] = inputs["initial_price"]
    return {
        "offer": offer,
        "name": offer['Outward Facing'],
        "description": offer['Description'],
        "frequency": offer['Frequency'],
        "recurring_value": offer['recurring_price'],
        "initial_value": offer['initial_price'],
        "covered": ', '.join(offer['Covered Pests'])
    }

prepare_offer_chain = TransformChain(transform=prepare_offer, input_variables=["offers", "recurring_price", "initial_price"], output_variables=["offer", "name", "description", "frequency", "covered", "recurring_value", "initial_value"])

In [46]:
pitch_template = """You are an expert and friendly salesperson for a sales company. You have just finished collecting information from the customer. Provide a helpful and brief explaination of the best service you found for htem based on the following service and issue. Focus on service benefits and avoid being overly technical. Be casual and pretend you are chatting with a friend. This is a chat message so keep it relatively short.
Customer Issue: {customer_issue}
Customer Preferences: {customer_preferences}
Program: {name}
Description: {description}
Frequency: {frequency}
Covered: {covered}
Initial cost (billed for the first service): {initial_value}
Ongoing cost (billed for any followup services that this program involves): {recurring_value}
Covered Pests: {covered}
Your pitch:
"""

parser_prompt = PromptTemplate(template=pitch_template, input_variables=["customer_issue", "customer_preferences", "name", "description", "frequency", "covered", "recurring_value", "covered", "initial_value"])
sales_pitch_chain = LLMChain(llm=chat, prompt=parser_prompt, output_key="pitch")

In [51]:
analyze_situation_sequential = SequentialChain(
    chains=[
        retrieve_situational_awareness,
        analyze_issue_chain,
        get_relevant_services_chain,
        calculate_prices_chain
    ],
    input_variables=["customer_issue", "customer_preferences", "square_footage", "property_type", "acres", "target", "severity"],
    output_variables=[
        "situational_experience", 
        "siutation_analysis", 
        "relevant_services_dict",
        "relevant_services_text",
        "complete_services"
    ],
    verbose=True
)

In [52]:
response = analyze_situation_sequential({
    "customer_issue": "Roaches in the apartment",
    "customer_preferences": "none",
    "property_type": "apartment",
    "square_footage": 800,
    "acres": 0,
    "target": "roaches",
    "severity": "high"
})



[1m> Entering new SequentialChain chain...[0m
[92mSTEP: Getting situational experiences[0m
[91mSituational experiences found:  ['You should assume that a roach issue will be harder to control if the customer lives in an apartment, townhouse or duplex.', 'Customers will need a minimum of monthly service frequency for roaches'] [0m
[92mSTEP: Analyzing situational experiences[0m
[92mSTEP: Getting relevant services[0m
[91mSTEP: Completed getting services:  [<__main__.Program object at 0x1370095b0>, <__main__.Program object at 0x137009640>, <__main__.Program object at 0x137009850>] [0m
[92mSTEP: Begin parsing the formulas[0m
[92mSTEP: Completed parsing the formulas[0m
[92mSTEP: Generating recurring price[0m
[92mAll Tasks Added to Queue[0m
[1m Thinking: The first condition "high"="high" is true, so the first result is 199.

Answer: 199 [0m
[1m Thinking: The first condition "high"="high" is true, so the answer is 199.

Answer: 199 [0m
[1m Thinking: The first compari

In [63]:
for service in response['complete_services']:
    print(service.dict['Outward Facing'])
    print("Recurring: $", service.dict['Recurring Price'])
    print("Initial Price $", service.dict['Initial Price'])
    print(service.dict['Description'])
    print('Best For:', service.dict['Best for'] + '\n----------') 

Monthly Pest and Rodent Control
Recurring: $ 95
Initial Price $ 199
Ongoing rodent and/or pest control program that provides a high level of protection against pests, mice and rats. We come out monthly and complete a perimeter treatment and setup and maintain rodent baiting program.
Best For: Customers with a active roach or rodent issue
----------
Castle with Sentricon
Recurring: $ None
Initial Price $ 1660
Bimonthly pest control covering most pests, and proactive rodent issues. Also includes Sentricon baiting system which can be installed for curative or proactive termite situations to offer a green termite control solution. Includes free callbacks if any covered pests pop up between services.
Best For: Customers who want termite and pest control or customers who have a curative termite need
----------
Castle Program
Recurring: $ None
Initial Price $ 199
Bimonthly pest control program covering pests and proactive rodent issues.
Best For: The best pest control issues like ants, spider

In [54]:
example = ['customer_issue',
 'customer_preferences',
 'property_type',
 'square_footage',
 'acres',
 'target',
 'severity',
 'situational_experience',
 'siutation_analysis',
 'service_search_term',
 'services_plain_text',
 'thinking_through_service_rankings',
 'service_id_list',
 'service_ids',
 'offers',
 'recurring_formula_parsed',
 'initial_formula_parsed',
 'recurring_logic',
 'initial_logic',
 'recurring_price',
 'initial_price',
 'pitch']

def observe_steps(steps, start, num_steps):
    step_list = [
        f'Inputs: Customer Issues: {steps["customer_issue"]}\nCustomer Preferences: {steps["customer_preferences"]}\nProperty Type: {steps["property_type"]}\nSquare Footage: {steps["square_footage"]}\nAcres: {steps["acres"]}\nTarget: {steps["target"]}\nSeverity: {steps["severity"]}',
        f'situational_experience: {steps["situational_experience"]}',
        f'siutation_analysis: {steps["siutation_analysis"]}',
        f'service_search_term: {steps["service_search_term"]}',
        f'services_plain_text: {steps["services_plain_text"]}',
        f'thinking_through_service_rankings: {steps["thinking_through_service_rankings"]}',
        f'service_id_list: {steps["service_id_list"]}',
        f'service_ids: {steps["service_ids"]}',
        f'offers: {steps["offers"]}',
        f'recurring_formula_parsed: {steps["recurring_formula_parsed"]}',
        f'recurring_logic: {steps["recurring_logic"]}',
        f'recurring_price: {steps["recurring_price"]}',
        f'initial_formula_parsed: {steps["initial_formula_parsed"]}',
        f'initial_logic: {steps["initial_logic"]}',
        f'initial_price: {steps["initial_price"]}',
        f'pitch: {steps["pitch"]}'
    ]

    end = start + num_steps
    list = step_list[start:end]
    for i in list:
        print(i)
        print('-----------------')



In [91]:
observe_steps(response, 0, 18)
# print(response["services_plain_text"])

Inputs: Customer Issues: Roaches in the apartment
Customer Preferences: none
Property Type: apartment
Square Footage: 800
Acres: 0
Target: roaches
Severity: high
-----------------
situational_experience: ['You should assume that a roach issue will be harder to control if the customer lives in an apartment, townhouse or duplex.', 'Customers will need a minimum of monthly service frequency for roaches']
-----------------
siutation_analysis: 

Based on the information provided, the issue of roaches in the apartment is likely to be a high severity issue. The fact that the property type is an apartment, townhouse, or duplex makes it harder to control pest infestations. Furthermore, the technician advised that a minimum of monthly service frequency is required for roach control, which suggests that this is a persistent problem that requires ongoing attention. Roaches can carry diseases, contaminate food, and cause allergic reactions, making it critical to address this issue promptly to prote