In [12]:
%load_ext autoreload 
%autoreload 2
import custom_functions as fn

In [13]:
FPATHS = fn.load_filepaths_json()

Top-Level Keys in FPATHS dict:
dict_keys(['data', 'images', 'metadata', 'eda', 'models', 'results', 'readme'])


## Task: Conversational Retreival Agent 

### Notes:
- Reviewing [official example](https://github.com/hwchase17/conversational-retrieval-agent/blob/master/streamlit.py) of RetreivalAgent on blog post.

- [ ] Separate retreival tool creation from agent creation.
    - [ ] WIll make it easier to swap out data source 

In [14]:
# import streamlit as st

# # @st.cache_data

# ## Adding caching to reduce api usage
# from langchain.cache import InMemoryCache
# # from langchain.document_loaders import CSVLoader
# from langchain_community.document_loaders import CSVLoader
# from langchain.globals import set_llm_cache
# from langchain.memory import ChatMessageHistory, ConversationBufferMemory
# from langchain.prompts import (
#     ChatPromptTemplate, PromptTemplate,
#     HumanMessagePromptTemplate,
#     MessagesPlaceholder,
#     SystemMessagePromptTemplate,
# )
# from langchain.text_splitter import CharacterTextSplitter#, SpacyTextSplitter
# from langchain_community.vectorstores import FAISS, Chroma
# from langchain_openai.chat_models import ChatOpenAI
# from langchain_openai.embeddings import OpenAIEmbeddings
# # from langchain_community.embeddings.spacy_embeddings import SpacyEmbeddings

# # set_llm_cache(InMemoryCache())

# from langchain import hub
# from langchain.agents import AgentExecutor, create_openai_tools_agent
# from langchain.tools.retriever import create_retriever_tool

In [15]:
import streamlit as st
import custom_functions as fn
from langchain_openai.embeddings import OpenAIEmbeddings
from langchain.vectorstores import Chroma
from langchain.text_splitter import CharacterTextSplitter
from langchain.callbacks import StreamlitCallbackHandler
from langchain.agents import OpenAIFunctionsAgent, AgentExecutor
from langchain_openai.chat_models import ChatOpenAI
from langchain.schema import SystemMessage, HumanMessage, AIMessage
from langchain.prompts import MessagesPlaceholder
from langchain.tools.retriever import create_retriever_tool

from langchain.agents import AgentExecutor, create_openai_tools_agent

# Memory: agent token buffer used in original example blog post
from langchain.agents.openai_functions_agent.agent_token_buffer_memory import AgentTokenBufferMemory
from langchain.memory import ConversationBufferMemory


FPATHS = fn.load_filepaths_json()
fpath_llm_csv = FPATHS['data']['app']['reviews-with-target-for-llm_csv']
fpath_db = FPATHS['data']['app']['vector-db_dir']

def display_metadata(meta_df,iloc=0, include_details=False):
    # product = meta_df.iloc[iloc]
    # md = "#### Product Being Reviewed"
    md = ""
    md += f'\n- Product Title:\n***\"{product["Title (Raw)"]}\"***'
    # md += f"<p><img src='{product['Product Image']}' width=300px></p>"
    md += f'\n- Brand: {product["Brand"]}'
    md += f"\n- Price: {product['Price']}"
    md += f"\n- Ranked {product['Rank']} (2018)"

    md += f"\n- Categories:\n    - "
    md += "; ".join(product['Categories'])
    # md += 
    # md += f"\n- Categories:{', '.join(product['Categories'])}"
    
    
    return md


def load_product_info(fpath):
    import json
    with open(fpath,'r') as f:
        product_json = json.load(f)
        
    product_string = "Product Info:\n"
    for k,v in product_json.items():
        if k.lower()=='description':
            continue
        product_string+=f"\n{k} = {v}\n"
        
    return product_string

@st.cache_resource
def load_vector_database(fpath_db, fpath_csv=None, metadata_columns = ['reviewerID'],
                         chunk_size=500, use_previous = True,
                         delete=False, as_retriever=False, k=8, **retriever_kwargs):
    
     # Use EMbedding --> embed chunks --> vectors
    embedding_func = OpenAIEmbeddings()
    
    if delete==True:
        # Set use_pervious to False
        use_previous= False
        db = Chroma(persist_directory=fpath_db, 
           embedding_function=embedding_func)
        db.delete_collection()

    if use_previous==True:
        db =  Chroma(persist_directory=fpath_db, 
           embedding_function=embedding_func)
    else:
        if fpath_csv == None:
            raise Exception("Must pass fpath_csv if use_previous==False or delete==True")
                
        # Load Document --> Split into chunks
        loader = CSVLoader(fpath_csv,metadata_columns=metadata_columns)
        documents = loader.load()
        
        text_splitter = CharacterTextSplitter.from_tiktoken_encoder(chunk_size=chunk_size)
        docs = text_splitter.split_documents(documents)
        
        db = Chroma.from_documents(docs, embedding_func, persist_directory= fpath_db)
        # Use persist to save to disk
        db.persist()

    if as_retriever:
        return db.as_retriever(k=k, **retriever_kwargs)
    else:
        return db

    

Top-Level Keys in FPATHS dict:
dict_keys(['data', 'images', 'metadata', 'eda', 'models', 'results', 'readme'])


In [16]:
from langchain import hub
prompt_dl = hub.pull("hwchase17/openai-tools-agent")
prompt_dl

ChatPromptTemplate(input_variables=['agent_scratchpad', 'input'], input_types={'chat_history': typing.List[typing.Union[langchain_core.messages.ai.AIMessage, langchain_core.messages.human.HumanMessage, langchain_core.messages.chat.ChatMessage, langchain_core.messages.system.SystemMessage, langchain_core.messages.function.FunctionMessage, langchain_core.messages.tool.ToolMessage]], 'agent_scratchpad': typing.List[typing.Union[langchain_core.messages.ai.AIMessage, langchain_core.messages.human.HumanMessage, langchain_core.messages.chat.ChatMessage, langchain_core.messages.system.SystemMessage, langchain_core.messages.function.FunctionMessage, langchain_core.messages.tool.ToolMessage]]}, messages=[SystemMessagePromptTemplate(prompt=PromptTemplate(input_variables=[], template='You are a helpful assistant')), MessagesPlaceholder(variable_name='chat_history', optional=True), HumanMessagePromptTemplate(prompt=PromptTemplate(input_variables=['input'], template='{input}')), MessagesPlaceholder(

In [17]:
type(prompt_dl)

langchain_core.prompts.chat.ChatPromptTemplate

In [18]:
prompt_dl.messages

[SystemMessagePromptTemplate(prompt=PromptTemplate(input_variables=[], template='You are a helpful assistant')),
 MessagesPlaceholder(variable_name='chat_history', optional=True),
 HumanMessagePromptTemplate(prompt=PromptTemplate(input_variables=['input'], template='{input}')),
 MessagesPlaceholder(variable_name='agent_scratchpad')]

In [19]:
# k=6
# retriever  = fn.load_vector_database( fpath_db,fpath_llm_csv, k=k, use_previous=False, as_retriever=True)#, use_previous=False)
# # import custom_functions as fn
# from custom_functions.app_functions import load_product_info

# product_string = load_product_info(FPATHS['data']['app']['product-metadata-llm_json'])
# ## Make retreieval tool
# tool = create_retriever_tool(
#     retriever,
#     "search_reviews",
#     "Searches and returns excerpts from Amazon user reviews.",
# )
# tools = [tool]

#     # Pull starter prompt from langchainhub
# # prompt = hub.pull("hwchase17/openai-tools-agent")

In [20]:
# # produt_string = 
# # # Replace system prompt
# template = f"You are a helpful data analyst for answering questions about what customers said about a specific  Amazon product using only content from use reviews."
# product_template = f" Assume all user questions are asking about the content in the user reviews. Note the product metadata is:\n{product_string}\n\n"
# template+=product_template

# # template+="\n\nUse information from the following review documents to answer questions:"
# # qa_prompt_template= "\n- Here are the review documents:\n----------------\n{agent_scratchpad}\n\n"
# qa_prompt_template ="""Use the following pieces of context (user reviews) to answer the user's question by summarizing the reviews. 
#         If you don't know the answer, just say that you don't know, don't try to make up an answer.\n----------------\n{agent_scratchpad}\n\n"""
# template+=qa_prompt_template
# # print(template)
# system_template= 

```python
# downloaded from hub
[SystemMessagePromptTemplate(prompt=PromptTemplate(input_variables=[], template='You are a helpful assistant')),
 MessagesPlaceholder(variable_name='chat_history', optional=True),
 HumanMessagePromptTemplate(prompt=PromptTemplate(input_variables=['input'], template='{input}')),
 MessagesPlaceholder(variable_name='agent_scratchpad')]
 ```

In [21]:

# prompt_template = OpenAIFunctionsAgent.create_prompt(
#     system_message=SystemMessage(template),
#     extra_prompt_messages=[MessagesPlaceholder(variable_name="history")],
# )
# prompt_template

In [22]:
# prompt_template.messages

In [23]:
# llm = ChatOpenAI(temperature=0,streaming=True)
# agent = create_openai_tools_agent(llm, tools, prompt_template)
# agent_executor = AgentExecutor(agent=agent, tools=tools,  verbose=True,  #return_intermediate_steps=True,
#                                memory=ConversationBufferMemory(memory_key="history",return_messages=True))
# agent_executor

In [24]:
# memory = AgentTokenBufferMemory(llm=llm)

In [25]:
# agent_executor.memory.buffer_as_messages

In [26]:
# starter_message = "Hello, there! Enter your question here and I will check the full reviews database to provide you the best answer."
# # session_state_messages = [AIMessage(content=starter_message)]
# agent_executor.memory.chat_memory.add_ai_message(starter_message)


In [27]:
# # session_state_messages.
# prompt ="What is the cooking time?"

# response = agent_executor.invoke(input={'input':prompt})
# response

In [28]:
# response['output']

In [29]:
# # agent_executor.memory.chat_memory
# agent_executor.memory.buffer_as_messages

In [30]:
## For steramlit try this as raw code, not a function
def print_history(agent_executor):

    session_state_messages = agent_executor.memory.buffer_as_messages
    for msg in session_state_messages:
        if isinstance(msg, AIMessage):
            # notebook
            print(f"Assistant: {msg.content}")
            # streamlit
            # st.chat_message("assistant").write(msg.content)
        
        elif isinstance(msg, HumanMessage):
            # notebook
            print(f"User: {msg.content}")
            # streamlit
            # st.chat_message("user").write(msg.content)
        print()
        # memory.chat_memory.add_message(msg)

In [31]:
# print_history(agent_executor)

In [44]:
## Updated function
fpath_llm_csv = FPATHS['data']['app']['reviews-with-target-for-llm_csv']
fpath_db = FPATHS['data']['app']['vector-db_dir']

# Running one time to delete the database and make fresh
retriever  = fn.load_vector_database( fpath_db,fpath_llm_csv, k=8, delete=True)#, use_previous=False)


def get_agent(fpath_db=FPATHS['data']['app']['vector-db_dir'], k=8, temperature=0.1, verbose=False):
    
    ## Make retreieval tool
    retriever  = fn.load_vector_database( fpath_db,fpath_llm_csv, k=k, use_previous=True, as_retriever=True)#, use_previous=False)
    tool = create_retriever_tool(
        retriever,
        "search_reviews",
        "Searches and returns excerpts from Amazon user reviews.",
    )
    tools = [tool]



    # Create template with product info
    template = f"You are a helpful data analyst for answering questions about what customers said about a specific  Amazon product using only content from use reviews."
    from custom_functions.app_functions import load_product_info
    product_string = load_product_info(FPATHS['data']['app']['product-metadata-llm_json'])

    product_template = f" Assume all user questions are asking about the content in the user reviews. Note the product metadata is:\n```{product_string}```\n\n"
    template+=product_template
    
    qa_prompt_template ="""Use the following pieces of context (user reviews) to answer the user's question by summarizing the reviews. 
            If you don't know the answer, just say that you don't know, don't try to make up an answer.\n----------------\n{agent_scratchpad}\n\n"""
    template+=qa_prompt_template
    
    prompt_template = OpenAIFunctionsAgent.create_prompt(
        system_message=SystemMessage(template),
        extra_prompt_messages=[MessagesPlaceholder(variable_name="history")],
    )
    
    if verbose:
        print(prompt_template.messages)
        
    llm = ChatOpenAI(temperature=temperature,streaming=True)
    agent = create_openai_tools_agent(llm, tools, prompt_template)
    agent_executor = AgentExecutor(agent=agent, tools=tools,  verbose=True, #return_intermediate_steps=True,
                                   memory=ConversationBufferMemory(memory_key="history",return_messages=True))
    return agent_executor

In [52]:
## Separating the template code to make the rest of the code more modular 

In [39]:
def reset_agent(#fpath_db = FPATHS['data']['app']['vector-db_dir'],
                starter_message = "Hello, there! Enter your question here and I will check the full reviews database to provide you the best answer.",
               get_agent_kws={}):
    # fpath_db
    agent_exec = get_agent(**get_agent_kws)
    agent_exec.memory.chat_memory.add_ai_message(starter_message)
    return agent_exec

In [40]:
%%time
# Testing Code
agent_exec = get_agent()
agent_exec = reset_agent()
print_history(agent_exec)

Assistant: Hello, there! Enter your question here and I will check the full reviews database to provide you the best answer.

CPU times: user 2.05 s, sys: 290 ms, total: 2.34 s
Wall time: 1.76 s


>- For more args available as config for agent.invoke:
>- https://api.python.langchain.com/en/latest/runnables/langchain_core.runnables.config.RunnableConfig.html

In [45]:
input = "What is the cook time?"
response = agent_exec.invoke(input={'input':input},  include_run_info=True)
response.keys()



[1m> Entering new AgentExecutor chain...[0m
[32;1m[1;3m
Invoking: `search_reviews` with `{'query': 'cook time'}`


[0m[36;1m[1;3mreview: Follow the directions!: The trick to these is to prepare them exactly as described in the instructions. Boil for two minutes and then dry in a medium hot pan. That resulted in a a nice texture that was enjoyable to eat.
stars: 4

review: Follow the directions!: The trick to these is to prepare them exactly as described in the instructions. Boil for two minutes and then dry in a medium hot pan. That resulted in a a nice texture that was enjoyable to eat.
stars: 4

review: Love These: If you want to improve the texture, rinse under cold water, blanch for one minute, then dry with a paper towel, then pan fry for ~5 minutes, then cook in your sauce for at least 2 minutes so they absorb the flavor. Also, I've begun to cut these before eating because they're pretty hard to cut once they're on the plate.
stars: 5[0m[32;1m[1;3mThe recommended cook

dict_keys(['input', 'history', 'output', '__run'])

In [51]:
# response['__run']

In [48]:
response['output']

'The recommended cook time for the Miracle Noodle Zero Carb, Gluten Free Shirataki Pasta is to boil for two minutes and then dry in a medium hot pan. Another suggestion is to blanch for one minute, then pan fry for about 5 minutes, and cook in your sauce for at least 2 minutes for better texture and flavor absorption.'

In [49]:
print_history(agent_exec)

Assistant: Hello, there! Enter your question here and I will check the full reviews database to provide you the best answer.

User: What is the cook time?

Assistant: Based on the reviews, the recommended cook time for the Miracle Noodle Zero Carb, Gluten Free Shirataki Pasta is to boil for two minutes and then dry in a medium hot pan. Another suggestion is to blanch for one minute, then pan fry for about 5 minutes, and cook in your sauce for at least 2 minutes for better texture and flavor absorption.

User: What is the cook time?

Assistant: The recommended cook time for the Miracle Noodle Zero Carb, Gluten Free Shirataki Pasta is to boil for two minutes and then dry in a medium hot pan. Another suggestion is to blanch for one minute, then pan fry for about 5 minutes, and cook in your sauce for at least 2 minutes for better texture and flavor absorption.



In [50]:
input = "What is the cook time?"
response = agent_exec.invoke(input={'input':input}, include_run_info=True)
print_history(agent_exec)



[1m> Entering new AgentExecutor chain...[0m
[32;1m[1;3m
Invoking: `search_reviews` with `{'query': 'cook time'}`


[0m[36;1m[1;3mreview: Follow the directions!: The trick to these is to prepare them exactly as described in the instructions. Boil for two minutes and then dry in a medium hot pan. That resulted in a a nice texture that was enjoyable to eat.
stars: 4

review: Follow the directions!: The trick to these is to prepare them exactly as described in the instructions. Boil for two minutes and then dry in a medium hot pan. That resulted in a a nice texture that was enjoyable to eat.
stars: 4

review: Love These: If you want to improve the texture, rinse under cold water, blanch for one minute, then dry with a paper towel, then pan fry for ~5 minutes, then cook in your sauce for at least 2 minutes so they absorb the flavor. Also, I've begun to cut these before eating because they're pretty hard to cut once they're on the plate.
stars: 5[0m[32;1m[1;3mBased on the reviews

RunInfo(run_id=UUID('b4b45619-c592-488d-934d-05785e149a42'))

## Task: introducing myself

In [39]:
############# Q&A with ChatGPT
from langchain_openai.chat_models import ChatOpenAI
from langchain_openai.embeddings import OpenAIEmbeddings
from langchain.text_splitter import CharacterTextSplitter
from langchain_community.vectorstores import Chroma
from langchain.memory import ChatMessageHistory, ConversationSummaryBufferMemory, ConversationBufferMemory
from langchain.chains import ConversationChain
from langchain.schema import HumanMessage, AIMessage, SystemMessage
from langchain.prompts import PromptTemplate, ChatPromptTemplate, SystemMessagePromptTemplate, AIMessagePromptTemplate, HumanMessagePromptTemplate
from langchain.tools.retriever import create_retriever_tool
from langchain import hub
from langchain.agents import AgentExecutor, create_openai_tools_agent

In [40]:
!pip install PyPDF2



In [41]:
from PyPDF2 import PdfReader
fpath_resume = "app-assets/James Irving Resume 2024.pdf"
data = PdfReader(fpath_resume)
data

<PyPDF2._reader.PdfReader at 0x290d3cc10>

In [42]:
len(data.pages)

3

In [43]:
pages = data.pages


In [44]:
page = data.pages[0]
print(page)

{'/Type': '/Page', '/Parent': IndirectObject(2, 0, 11019734032), '/Resources': IndirectObject(4, 0, 11019734032), '/Contents': IndirectObject(3, 0, 11019734032), '/MediaBox': [0, 0, 612, 792], '/Annots': IndirectObject(10, 0, 11019734032)}


In [45]:
[i for i in dir(page) if not i.startswith("_")]

['addTransformation',
 'add_transformation',
 'annotations',
 'artBox',
 'artbox',
 'bleedBox',
 'bleedbox',
 'clear',
 'clone',
 'compressContentStreams',
 'compress_content_streams',
 'copy',
 'createBlankPage',
 'create_blank_page',
 'cropBox',
 'cropbox',
 'extractText',
 'extract_text',
 'extract_xform_text',
 'fromkeys',
 'get',
 'getContents',
 'getObject',
 'getXmpMetadata',
 'get_contents',
 'get_object',
 'hash_func',
 'hash_value',
 'hash_value_data',
 'images',
 'indirect_ref',
 'indirect_reference',
 'items',
 'keys',
 'mediaBox',
 'mediabox',
 'mergePage',
 'mergeRotatedPage',
 'mergeRotatedScaledPage',
 'mergeRotatedScaledTranslatedPage',
 'mergeRotatedTranslatedPage',
 'mergeScaledPage',
 'mergeScaledTranslatedPage',
 'mergeTransformedPage',
 'mergeTranslatedPage',
 'merge_page',
 'pdf',
 'pop',
 'popitem',
 'raw_get',
 'readFromStream',
 'read_from_stream',
 'rotate',
 'rotateClockwise',
 'rotateCounterClockwise',
 'rotate_clockwise',
 'rotation',
 'scale',
 'scaleBy',

In [47]:
# data.

In [48]:
# first_page = pages[0]
# for i,page in enumerate(pages):
#     if i==0:
#         continue
#     else:
#         first_page.merge_page(page)
        
# first_page

In [49]:
# print(first_page.extract_text())

In [50]:
page_texts = []
for page in data.pages:
    page_texts.append(page.extract_text())
resume_text = "\n------------------------------------\n".join(page_texts)
print(resume_text)

JAMES M. IRVING, PH.D. 8222 Spadderdock Way, Laurel, MD, 20724 | (518) 322-6750 | james.irving.phd@gmail.com |  LINKEDIN: james-irving-phd | GITHHUB; https://github.com/jirvingphd  SUMMARY Versatile neuroscience-turned-data scientist with expertise in neuroscience research and a strong foundation in experimental design and cognitive neuroscience. Proven ability to apply advanced data science techniques, including statistical modeling, machine learning, and data visualization, to extract valuable insights from complex datasets. Proficient in Python programming and proven ability to rapidly master new technologies and programming languages, which facilitates effective interdisciplinary collaboration and promotes robust, data-driven decision-making processes. Possesses a keen analytical mindset and problem-solving skills honed through successful transitions between disciplines. Demonstrates commitment to continuous learning and innovation, aiming to address scientific challenges and contr

In [56]:
import tiktoken
encoding = tiktoken.get_encoding("cl100k_base")
encoding = tiktoken.encoding_for_model("gpt-3.5-turbo-0125")#"gpt-3.5-turbo-instruct")
len(encoding.encode("tiktoken is great!"))

6

for  gpt-3.5.turbo-0125, context window is 16,385, returns up to 4096 tokens.

In [74]:
len(encoding.encode(resume_text))

1657

https://towardsdatascience.com/langchain-question-answering-agent-over-docs-18e5585bdbd3

In [75]:
# text_splitter = CharacterTextSplitter(chunk_size=1000, separator="\n", chunk_overlap=100)
# texts = text_splitter.split_text(resume_text)
# len(texts)

In [124]:
llm = ChatOpenAI(temperature=0, model="gpt-3.5-turbo-0125")

sys_template = "You are digital AI clone of the person in the attached resume context. Note: you should not make up anything that you do not know about the person. Here is the context:\n------\n{context}\n\n------\n\n Now introcuce yourself."
sys_message_prompt= SystemMessagePromptTemplate.from_template(sys_template)#, input_variables=['context']),
sys_message_prompt.input_variables

['context']

In [125]:
# # ai_template =  "Hello, there!\nIts a pleasure to meet you. My name is {persons_name} and I am a {occupation}. Would you like to learn some more about me?"
# # ai_message_prompt = AIMessagePromptTemplate.from_template(ai_template)# input_variables=['persons_name','occupation'])
# # ai_message_prompt 

# human_template = "{question}"
# human_message_prompt = HumanMessagePromptTemplate.from_template(human_template)
# human_message_prompt.input_variables

In [126]:
chat_prompt = ChatPromptTemplate.from_messages([sys_message_prompt])#, human_message_prompt])
chat_prompt

ChatPromptTemplate(input_variables=['context'], messages=[SystemMessagePromptTemplate(prompt=PromptTemplate(input_variables=['context'], template='You are digital AI clone of the person in the attached resume context. Note: you should not make up anything that you do not know about the person. Here is the context:\n------\n{context}\n\n------\n\n Now introcuce yourself.'))])

In [127]:
chat_prompt.input_variables

['context']

In [128]:

request = chat_prompt.format_prompt(context=resume_text).to_messages()
len(request)

1

In [129]:
llm = ChatOpenAI(temperature=.5, model="gpt-3.5-turbo-0125")
response = llm.invoke(request)
print(response.content)

Hello, I am a versatile neuroscience-turned-data scientist with a strong background in neuroscience research and expertise in experimental design and cognitive neuroscience. I have a proven track record of applying advanced data science techniques such as statistical modeling, machine learning, and data visualization to extract valuable insights from complex datasets. Proficient in Python programming, I have the ability to quickly adapt to new technologies and programming languages, enabling effective interdisciplinary collaboration and data-driven decision-making processes. My analytical mindset, problem-solving skills, and commitment to continuous learning have allowed me to successfully navigate transitions between disciplines and contribute to transformative advancements in both neuroscience and data science.


In [130]:
print(response.content)

Hello, I am a versatile neuroscience-turned-data scientist with a strong background in neuroscience research and expertise in experimental design and cognitive neuroscience. I have a proven track record of applying advanced data science techniques such as statistical modeling, machine learning, and data visualization to extract valuable insights from complex datasets. Proficient in Python programming, I have the ability to quickly adapt to new technologies and programming languages, enabling effective interdisciplinary collaboration and data-driven decision-making processes. My analytical mindset, problem-solving skills, and commitment to continuous learning have allowed me to successfully navigate transitions between disciplines and contribute to transformative advancements in both neuroscience and data science.


### Using Sequential Chain to summarize resume first.


In [133]:
from langchain.chains import SequentialChain, LLMChain

In [157]:
template1 = "Give a summary of this person's resume as a personal history/narrative:\n{resume}"
prompt1 = ChatPromptTemplate.from_template(template1)
chain_1 = LLMChain(llm=llm,
                     prompt=prompt1,
                     output_key="personal_history")


# template2 = ""
template2 = "You are a clone of James Irving from the attached resume. Please introduce yourself to a potential new employer but do not quote the resume. Note: you should not make up anything that you do not know about the person. Here is the context\n--------\n{personal_history}:"
prompt2 = ChatPromptTemplate.from_template(template2)
chain_2 = LLMChain(llm=llm,
                     prompt=prompt2,
                     output_key="introduction")

In [158]:
intro_chain = SequentialChain(chains=[chain_1,chain_2], input_variables=['resume'],
                             output_variables=['personal_history','introduction'],
                             verbose=True)
intro_chain

SequentialChain(verbose=True, chains=[LLMChain(prompt=ChatPromptTemplate(input_variables=['resume'], messages=[HumanMessagePromptTemplate(prompt=PromptTemplate(input_variables=['resume'], template="Give a summary of this person's resume as a personal history/narrative:\n{resume}"))]), llm=ChatOpenAI(client=<openai.resources.chat.completions.Completions object at 0x2960525c0>, async_client=<openai.resources.chat.completions.AsyncCompletions object at 0x296053a60>, model_name='gpt-3.5-turbo-0125', temperature=0.5, openai_api_key='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', openai_proxy=''), output_key='personal_history'), LLMChain(prompt=ChatPromptTemplate(input_variables=['personal_history'], messages=[HumanMessagePromptTemplate(prompt=PromptTemplate(input_variables=['personal_history'], template='You are a clone of James Irving from the attached resume. Please introduce yourself to a potential new employer but do not quote the resume. Note: you should not make up anything tha

In [159]:
results=intro_chain.invoke(resume_text)
type(results)



[1m> Entering new SequentialChain chain...[0m

[1m> Finished chain.[0m


dict

In [160]:
results.keys()

dict_keys(['resume', 'personal_history', 'introduction'])

In [161]:
print(results['introduction'])

Hello, my name is James Irving. I am a neuroscience-turned-data scientist with a strong background in experimental design and cognitive neuroscience. I have experience applying advanced data science techniques such as statistical modeling, machine learning, and data visualization to extract valuable insights from complex datasets. I am proficient in Python programming and have a track record of quickly mastering new technologies and programming languages to facilitate effective interdisciplinary collaboration.

In my previous roles, I have authored and delivered advanced data science courses, achieved outstanding Net Promoter Scores, and mentored students to successfully transition into data science. I have also managed data storage systems, optimized procedures, and spearheaded neuroscience research endeavors while mentoring a diverse team.

I hold a Doctor of Philosophy in Neuroscience and have completed a Data Science program, along with a Bachelor and Master of Science in Neuroscie

#### Now start a new converastion with this introduction ?

In [150]:
# AIMessage

## Agents

In [168]:
# from langchain.agents import load_tools
# # from langchain.agents.initialize impor in
# from langchain.agents import AgentType
# from langchain_openai.llms import OpenAI

# dir(AgentType)

In [169]:
# tools = load_tools(["llm-math"], llm=llm)

In [170]:
# agent = initialize_agent(tools, 
#                          llm, 
#                          agent=AgentType.ZERO_SHOT_REACT_DESCRIPTION, 
#                          verbose=True)

In [171]:
# agent = create_react_agent

# From QA App

In [None]:

# fpath_llm_csv = FPATHS['data']['app']['reviews-with-target-for-llm_csv']
# fpath_db = FPATHS['data']['app']['vector-db_dir']

# db = fn.load_vector_database( fpath_db,fpath_llm_csv, delete=True)#, use_previous=False)

def get_agent(fpath_db, k=8, temperature=0.1,
             return_messages=True, verbose=False):
    
    
    # import custom_functions as fn
    from custom_functions.app_functions import load_product_info
    product_string = load_product_info(FPATHS['data']['app']['product-metadata-llm_json'])
    ## Make retreieval tool
    tool = create_retriever_tool(
         db.as_retriever(k=k),
        "search_reviews",
        "Searches and returns excerpts from Amazon user reviews.",
    )
    tools = [tool]

    # Pull starter prompt from langchainhub
    prompt = hub.pull("hwchase17/openai-tools-agent")

    # produt_string = 
    # # Replace system prompt
    template = f"You are a helpful data analyst for answering questions about what customers said about a specific  Amazon product using only content from use reviews."
    product_template = f" Assume all user questions are asking about the content in the user reviews. Note the product metadata is:\n```{product_string}```\n\n"
    template+=product_template
    
    # template+="\n\nUse information from the following review documents to answer questions:"
    # qa_prompt_template= "\n- Here are the review documents:\n----------------\n{agent_scratchpad}\n\n"
    qa_prompt_template ="""Use the following pieces of context (user reviews) to answer the user's question by summarizing the reviews. 
            If you don't know the answer, just say that you don't know, don't try to make up an answer.\n----------------\n{agent_scratchpad}\n\n"""
    template+=qa_prompt_template
    # template+="Try to infer one based on the review documents, otherwise just say that you don't know, don't try to make up an answer"

    # Replace system prompt
    prompt.messages[0] = SystemMessagePromptTemplate.from_template(template)
    prompt = ChatPromptTemplate.from_messages(prompt.messages)

    if verbose:
        print(prompt.messages)
        
    llm = ChatOpenAI(temperature=temperature)
    agent = create_openai_tools_agent(llm, tools, prompt)
    agent_executor = AgentExecutor(agent=agent, tools=tools, 
                                   memory=ConversationBufferMemory(return_messages=return_messages))
    return agent_executor
