In [27]:
import pandas as pd
import openai
from dotenv import load_dotenv
load_dotenv()
import os
from langchain.prompts import load_prompt
from langchain import PromptTemplate
from langchain.llms import OpenAIChat
import sys
pd.options.mode.chained_assignment = None
from langchain.callbacks import get_openai_callback
from langchain.embeddings.openai import OpenAIEmbeddings
from langchain.vectorstores import Chroma
from langchain.text_splitter import CharacterTextSplitter
from langchain.document_loaders import TextLoader
from langchain import PromptTemplate, LLMChain, VectorDBQA
from langchain.chains import TransformChain, SequentialChain
from langchain.document_loaders.csv_loader import CSVLoader
import re

from langchain.chat_models import ChatOpenAI
from langchain.schema import (
    AIMessage,
    SystemMessage
)

import sys
sys.path.append('./modules/')
openai.api_key = os.getenv('OPENAI_KEY')
key = os.getenv('OPENAI_KEY')

from Generator import Generator
from Extractor import Extractor
from PromptPath import PromptPath
from GenExtract import GenExtract
from Property import Property
from NaturalLanguagePool import NaturalLanguagePool
from DynamicChatHistory import DynamicChatHistory
from Conversation import Conversation
from ConversationScript import ConversationScript
from ConversationScriptStage import ConversationScriptStage
from show_chat_log import show_chat_log
from is_question import is_question
from customer_profile import customer_profile

times_embedding_run = 10

#### Step 1: Create a directory in the prompts folder to store the contents. Then save the path

In [28]:
#prompt folder_name
prompt_folder_name = 'service_selector'

#check to see if the the folder name exists in the prompts folder
if prompt_folder_name in os.listdir('prompts'):
    print('prompt folder exists')
    prompt_folder_path = 'prompts/' + prompt_folder_name
else:
    os.mkdir('prompts/' + prompt_folder_name)
    prompt_folder_path = 'prompts/' + prompt_folder_name

path = PromptPath(prompt_folder_path + '/', type="json")

def save(prompt, name):
    prompt.save(prompt_folder_path + '/' + name + '.json')

prompt folder exists


In [29]:
chat = ChatOpenAI(openai_api_key=key)


In [73]:
#load in the policies to the vectorstore
situation_loaders = []
situation_docs = []

#create a list with the names of the files to be loaded in the data/policies folder
for file in os.listdir('data/situational-experience'):
    if file.endswith('.txt'):
        loader = TextLoader('data/situational-experience/' + file)
        situation_loaders.append(loader)

for loader in situation_loaders:
    situation_docs.extend(loader.load())

if times_embedding_run < 20:
    text_splitter = CharacterTextSplitter(chunk_size=300, chunk_overlap=0)
    documents = text_splitter.split_documents(situation_docs)

    siutation_embeddings = OpenAIEmbeddings(openai_api_key=key)
    situational_experience = Chroma.from_documents(documents, siutation_embeddings)
    times_embedding_run += 1


"Embeddings generated into policies variable"

Running Chroma using direct local API.
Using DuckDB in-memory for database. Data will be transient.
Exiting: Cleaning up .chroma directory


'Embeddings generated into policies variable'

In [150]:
situation_loaders = []
situation_docs = []

all_services_df = pd.read_csv('data/services_search - Sheet1.csv')
service_descriptions = []
#iterate over the contents of data/services folder and load the json files into situation_loaders
for file in os.listdir('data/services'):
    if file.endswith('.json'):
        loaders = TextLoader('data/services/' + file)
        situation_loaders.append(loaders)

for loader in situation_loaders:
    situation_docs.extend(loader.load())

service_description_embeddings = OpenAIEmbeddings(openai_api_key=key)
service_description_search = Chroma.from_documents(situation_docs, service_description_embeddings)

Running Chroma using direct local API.
Using DuckDB in-memory for database. Data will be transient.
Exiting: Cleaning up .chroma directory


In [180]:
import json

search_term = "Ant control program"

class Programs:
    def __init__(self, services):
        self.services = services

    def get_service(self, name):
        for service in self.services:
            if service['Name'] == name:
                return service
        return None
    
class Program:
    def __init__(self, dict):
        self.dict = dict
    
    def __repr__(self):
        return self.dict['Outward Facing']

service_search_results = service_description_search.similarity_search(search_term, 3)
#extract the services as a dict from the search results (they are json stored in .page_content)
service_search_results = [json.loads(result.page_content) for result in service_search_results]

#create a list of dicts out of the contents of all_services_df
all_services = []
for index, row in all_services_df.iterrows():
    all_services.append(row.to_dict())

all_services_list = Programs(all_services)
#load the services into a Programs object
detailed_services = []
for service in service_search_results:
    service_name = service['Name']
    detailed_service = all_services_list.get_service(service_name)
    detailed_services.append(Program(detailed_service))

In [181]:
print(detailed_services)

[Castle Program, Quarterly Pest Control, One Time Pest Control]


In [79]:
def get_situational_awareness(inputs: dict) -> dict:
    customer_issue = inputs["customer_issue"]
    knowledge = situational_experience.similarity_search(customer_issue, 5)
    knowledge = [message.page_content for message in knowledge]
    print('STEP: Getting situational experiences')
    return { "situational_experience": knowledge }

retrieve_situational_awareness = TransformChain(transform=get_situational_awareness, input_variables=["customer_issue"], output_variables=["situational_experience"])

In [32]:
template_message = """Your goal is to identify how severe this issue is. Think step by step about your answer and use the information you provided to make you answer. After your analysis, please rate the severity as Low, Medium or High.
Customer Issue: {customer_issue}
Property Type: {property_type}
Below are some possible practical experiences from our technicians that may be useful. Some of it may not be relevant to your answer.
{situational_experience}
----
Your Analysis:
"""

template = PromptTemplate(template=template_message, input_variables=["customer_issue", "situational_experience", "property_type"])
analyze_issue_chain = LLMChain(llm=chat, prompt=template, output_key="siutation_analysis")

In [86]:
loader = CSVLoader(file_path='./data/service_embeddings.csv')
services = loader.load()

service_embeddings = OpenAIEmbeddings(openai_api_key=key)
service_search = Chroma.from_documents(services, service_embeddings)

Running Chroma using direct local API.
Using DuckDB in-memory for database. Data will be transient.
Exiting: Cleaning up .chroma directory


In [34]:
situation_summarize_text = """Summarize what kind of program you are looking for based on this analysis in one sentence.
Example Summary: Program for a townhome with a severe german roach infestation.
Analysis:
{siutation_analysis}
Your Summary:
"""

situation_template = PromptTemplate(template=situation_summarize_text, input_variables=["siutation_analysis"])
situation_summarize_chain = LLMChain(llm=chat, prompt=situation_template, output_key="service_search_term")

In [87]:
def get_best_services(inputs: dict) -> dict:
    print('Getting best services')
    customer_issue = inputs["service_search_term"]
    services = service_search.similarity_search(customer_issue, 3)
    services = [message.page_content for message in services]
    return { "services_plain_text": '\n'.join(services) }

retrieve_services = TransformChain(transform=get_best_services, input_variables=["service_search_term"], output_variables=["services_plain_text"])

#### Step 2: Create your base prompt & Test It

In [36]:
text = """About the customer you are helping:
Current Issue: {customer_issue}
Severity: {siutation_analysis}
Stated Preferences: {customer_preferences}
Your goal is to help them pick whatever service would work best for them. Put then in order of best to worst. Your should prefer services that are recurring, rather than one time services. If the customer's preferences indicate they are interested in one time, services you should also add in one time servies.

{services_plain_text}

Think step by step and walk through your logic for selecting the program for the customer before writing your answer. Explain your thinking before answering. Then, write an ordered list with at least one response like this at the end of your response. Do not include programs that will not solve the problem. You do not have to include all programs in the list. Do your thinking, then write the list:
Example:
Given that the customer has termites, we can eliminate all programs that don't cover termites, like the monthly pest and rodent control, quarterly pest control, VIP, Lawnshield, One Time Services, IN2Care and Castle.
Since the customer has not stated they have a pest problem, Program 2 is the best program for them. Next best is the Program 4.
Ratings
1. Program 2
2. Program 4
Your answer:"""
prompt = PromptTemplate(template=text, input_variables=["customer_issue", "siutation_analysis", "services_plain_text", "customer_preferences"])

save(prompt, 'base_prompt')

In [37]:
think_through_offerings_chain = LLMChain(llm=chat, prompt=prompt, output_key="thinking_through_service_rankings")

In [38]:
parser_text = """Convert The Text into a comma seperated list of numbers. For example:
Example Text
1. Program 8
2. Program 4

Example Answer: 8, 4

Text:
{thinking_through_service_rankings}
Your answer:
"""

parser_prompt = PromptTemplate(template=parser_text, input_variables=["thinking_through_service_rankings"])
map_to_ids_chain = LLMChain(llm=chat, prompt=parser_prompt, output_key="service_id_list")

In [39]:
import re 

def transform_func(inputs: dict) -> dict:
    print('Finding service IDS in analysis')
    text = inputs["service_id_list"]
    pattern = r'\d+(?:,\s*\d+)+'
    matches = re.findall(pattern, text)
    #split the matches by commas into a list
    matches = matches[0].split(',')
    #strip the whitespace from the list
    matches = [x.strip() for x in matches]
    return {"service_ids": matches}

parser_chain = TransformChain(transform=transform_func, input_variables=["service_id_list"], output_variables=["service_ids"])

In [40]:
import pandas as pd

services_df = pd.read_csv('./data/services_search - Sheet1.csv')
#create a new column called ID that is the index of the row
services_df['ID'] = services_df.index + 1
#cast the ID as a string
services_df['ID'] = services_df['ID'].astype(str)
#save services as a offers dict with the ID as the key and the service as the value and the rest of the columns as a sub dict
offers = services_df.set_index('ID').T.to_dict('dict')

def get_offers(inputs: dict) -> dict:
    print('Getting JSON offers')
    ids = inputs["service_ids"]
    recurring_formua_raw = offers[ids[0]]["Recurring Formula"]
    initial_formula_raw = offers[ids[0]]["Initial Formula"]
    return {"offers": [offers[x] for x in ids], "recurring_formula_raw": recurring_formua_raw, "initial_formula_raw": initial_formula_raw }

get_offers_chain = TransformChain(transform=get_offers, input_variables=["service_ids"], output_variables=["offers", "recurring_formula_raw", "initial_formula_raw"])

In [41]:
def prepare_formulas(inputs: dict) -> dict:
    #interpolate the recurring formula with the inputs
    print('start prepare formulas')
    recurring_formula = inputs["recurring_formula_raw"]
    recurring_formula = recurring_formula.replace("{square_footage}", str(inputs["square_footage"]))
    recurring_formula = recurring_formula.replace("{target}", str(inputs["target"]))
    recurring_formula = recurring_formula.replace("{severity}", str(inputs["severity"]))
    recurring_formula = recurring_formula.replace("{acres}", str(inputs["acres"]))
    
    #interpolate the setup formula with the inputs
    initial_formula = inputs["initial_formula_raw"]
    initial_formula = initial_formula.replace("{square_footage}", str(inputs["square_footage"]))
    initial_formula = initial_formula.replace("{target}", str(inputs["target"]))
    initial_formula = initial_formula.replace("{severity}", str(inputs["severity"]))
    initial_formula = initial_formula.replace("{acres}", str(inputs["acres"]))
    print('end prepare formulas')

    return {"recurring_formula_parsed": recurring_formula, "initial_formula_parsed": initial_formula}

parse_formula_chain = TransformChain(transform=prepare_formulas, input_variables=['square_footage',  'target', 'severity', 'acres', 'recurring_formula_raw', 'initial_formula_raw'], output_variables=["recurring_formula_parsed", "initial_formula_parsed"])

In [42]:
recurring_calculator_template = """Solve this excel formula. Briefly show your thinking for each step. Here are the inputs to the formula. 
Then write the answer as:
Answer: *answer*
{recurring_formula_parsed}
"""

recurring_calculator_prompt = PromptTemplate(
    template=recurring_calculator_template,
    input_variables=["recurring_formula_parsed"]
)

recurring_calculator_chain = LLMChain(llm=chat, prompt=recurring_calculator_prompt, output_key="recurring_logic")

initial_calculator_template = """Solve this excel formula. Briefly show your thinking for each step. Here are the inputs to the formula. 
Then write the answer as:
Answer: *answer*
{initial_formula_parsed}
"""

initial_calculator_prompt = PromptTemplate(
    template=initial_calculator_template,
    input_variables=["initial_formula_parsed"]
)

initial_calculator_chain = LLMChain(llm=chat, prompt=initial_calculator_prompt, output_key="initial_logic")
    

In [50]:
def parse_formula_thinking(inputs: dict) -> dict:
    recurring_matches = re.search(r"Answer:\s+(\$?\d+(\.\d{2})?)", inputs["recurring_logic"])
    initial_matches = re.search(r"Answer:\s+(\$?\d+(\.\d{2})?)", inputs["initial_logic"])
    
    if recurring_matches:
        #replace any $ signs
        recurring_matches = recurring_matches.group(1).replace("$", "")
    else:
        recurring_matches = "Error calculating recurring price"
    
    if initial_matches:
        #replace any $ signs
        initial_matches = initial_matches.group(1).replace("$", "")
    else:
        initial_matches = "Error calculating initial price"

    return {"recurring_price": recurring_matches, "initial_price": initial_matches}

parse_formula_thinking_chain = TransformChain(transform=parse_formula_thinking, input_variables=["recurring_logic", "initial_logic"], output_variables=["recurring_price", "initial_price"])

In [45]:
def prepare_offer(inputs: dict) -> dict:
    offer = inputs["offers"][0]
    offer["recurring_price"] = inputs["recurring_price"]
    offer["initial_price"] = inputs["initial_price"]
    return {
        "offer": offer,
        "name": offer['Outward Facing'],
        "description": offer['Description'],
        "frequency": offer['Frequency'],
        "recurring_value": offer['recurring_price'],
        "initial_value": offer['initial_price'],
        "covered": ', '.join(offer['Covered Pests'])
    }

prepare_offer_chain = TransformChain(transform=prepare_offer, input_variables=["offers", "recurring_price", "initial_price"], output_variables=["offer", "name", "description", "frequency", "covered", "recurring_value", "initial_value"])

In [88]:
pitch_template = """You are an expert and friendly salesperson for a sales company. You have just finished collecting information from the customer. Provide a helpful and brief explaination of the best service you found for htem based on the following service and issue. Focus on service benefits and avoid being overly technical. Be casual and pretend you are chatting with a friend. This is a chat message so keep it relatively short.
Customer Issue: {customer_issue}
Customer Preferences: {customer_preferences}
Program: {name}
Description: {description}
Frequency: {frequency}
Covered: {covered}
Initial cost (billed for the first service): {initial_value}
Ongoing cost (billed for any followup services that this program involves): {recurring_value}
Covered Pests: {covered}
Your pitch:
"""

parser_prompt = PromptTemplate(template=pitch_template, input_variables=["customer_issue", "customer_preferences", "name", "description", "frequency", "covered", "recurring_value", "covered", "initial_value"])
sales_pitch_chain = LLMChain(llm=chat, prompt=parser_prompt, output_key="pitch")

In [89]:
analyze_situation_sequential = SequentialChain(
    chains=[
        retrieve_situational_awareness, 
        analyze_issue_chain, 
        situation_summarize_chain, 
        retrieve_services, 
        think_through_offerings_chain, 
        map_to_ids_chain,
        parser_chain,
        get_offers_chain,
        parse_formula_chain,
        recurring_calculator_chain,
        initial_calculator_chain,
        parse_formula_thinking_chain,
        prepare_offer_chain,
        sales_pitch_chain
    ],
    input_variables=["customer_issue", "customer_preferences", "square_footage", "property_type", "acres", "target", "severity"],
    output_variables=[
        "situational_experience", 
        "siutation_analysis", 
        "service_search_term", 
        "services_plain_text", 
        "thinking_through_service_rankings", 
        "service_id_list", 
        "service_ids", 
        "offers", 
        "recurring_formula_parsed",
        "initial_formula_parsed",
        "recurring_logic",
        "initial_logic",
        "recurring_price",
        "initial_price",
        "pitch"
    ],
    verbose=True
)

In [90]:
response = analyze_situation_sequential({
    "customer_issue": "Roaches in the apartment",
    "customer_preferences": "none",
    "property_type": "apartment",
    "square_footage": 800,
    "acres": 0,
    "target": "roaches",
    "severity": "high"
})



[1m> Entering new SequentialChain chain...[0m
STEP: Getting situational experiences
Getting best services
Finding service IDS in analysis
Getting JSON offers
start prepare formulas
end prepare formulas

[1m> Finished chain.[0m


In [54]:
example = ['customer_issue',
 'customer_preferences',
 'property_type',
 'square_footage',
 'acres',
 'target',
 'severity',
 'situational_experience',
 'siutation_analysis',
 'service_search_term',
 'services_plain_text',
 'thinking_through_service_rankings',
 'service_id_list',
 'service_ids',
 'offers',
 'recurring_formula_parsed',
 'initial_formula_parsed',
 'recurring_logic',
 'initial_logic',
 'recurring_price',
 'initial_price',
 'pitch']

def observe_steps(steps, start, num_steps):
    step_list = [
        f'Inputs: Customer Issues: {steps["customer_issue"]}\nCustomer Preferences: {steps["customer_preferences"]}\nProperty Type: {steps["property_type"]}\nSquare Footage: {steps["square_footage"]}\nAcres: {steps["acres"]}\nTarget: {steps["target"]}\nSeverity: {steps["severity"]}',
        f'situational_experience: {steps["situational_experience"]}',
        f'siutation_analysis: {steps["siutation_analysis"]}',
        f'service_search_term: {steps["service_search_term"]}',
        f'services_plain_text: {steps["services_plain_text"]}',
        f'thinking_through_service_rankings: {steps["thinking_through_service_rankings"]}',
        f'service_id_list: {steps["service_id_list"]}',
        f'service_ids: {steps["service_ids"]}',
        f'offers: {steps["offers"]}',
        f'recurring_formula_parsed: {steps["recurring_formula_parsed"]}',
        f'recurring_logic: {steps["recurring_logic"]}',
        f'recurring_price: {steps["recurring_price"]}',
        f'initial_formula_parsed: {steps["initial_formula_parsed"]}',
        f'initial_logic: {steps["initial_logic"]}',
        f'initial_price: {steps["initial_price"]}',
        f'pitch: {steps["pitch"]}'
    ]

    end = start + num_steps
    list = step_list[start:end]
    for i in list:
        print(i)
        print('-----------------')



In [91]:
observe_steps(response, 0, 18)
# print(response["services_plain_text"])

Inputs: Customer Issues: Roaches in the apartment
Customer Preferences: none
Property Type: apartment
Square Footage: 800
Acres: 0
Target: roaches
Severity: high
-----------------
situational_experience: ['You should assume that a roach issue will be harder to control if the customer lives in an apartment, townhouse or duplex.', 'Customers will need a minimum of monthly service frequency for roaches']
-----------------
siutation_analysis: 

Based on the information provided, the issue of roaches in the apartment is likely to be a high severity issue. The fact that the property type is an apartment, townhouse, or duplex makes it harder to control pest infestations. Furthermore, the technician advised that a minimum of monthly service frequency is required for roach control, which suggests that this is a persistent problem that requires ongoing attention. Roaches can carry diseases, contaminate food, and cause allergic reactions, making it critical to address this issue promptly to prote