In [2]:
from langchain import PromptTemplate
from langchain.llms import OpenAI
from langchain.embeddings.openai import OpenAIEmbeddings
from langchain.agents import Tool
from langchain.memory import ConversationBufferMemory
from langchain.chat_models import ChatOpenAI
from langchain.utilities import SerpAPIWrapper
from langchain.agents import initialize_agent, load_tools
from langchain.agents import AgentType
from langchain.vectorstores import Chroma
from langchain.chains import RetrievalQA
from langchain.tools.human.tool import HumanInputRun

## Data Collection
To create a compelling personalized agent, we need to start by collecting relevant that will demonstrate our Agent is noticeably personalized to the given person.
- [X] LinkedIn Profile 
- [ ] Gather Data

In [3]:
### LinkedIn ###

# Load Data
with open("../../data/sample/crosleythomas_linkedin.txt") as f:
    resume = f.read()

# embeddings = OpenAIEmbeddings()

# Use LLM to turn the resume into a series of single-sentence facts about the resume
template = """
The purpose of this model is to take a resume and turn it into as many single-sentence facts about the resume as possible.

Output Examples:
- Studied Computer Science at the University of Washington.
- Started his PhD at the University of Texas at Austin, but dropped out after one year.
- Did two undergrad internships at Amazon Lab 126.

Input: {resume}
Output:
"""
prompt = PromptTemplate(
    input_variables=["resume"],
    template=template,
)
formatted_prompt = prompt.format(resume=resume)


llm = OpenAI()
llm_facts = llm(formatted_prompt)
print(f"Facts: {llm_facts}")

Facts: - Studied Computer Science at the University of Washington, earning a Bachelor of Science and a Master of Science.
- Started a PhD at the University of Texas at Austin in Computer Science, but dropped out after one year.
- Completed two software engineering internships at Amazon Lab 126.
- Worked as a software development manager at Amazon, leading a team of computer vision software engineers.
- Worked as a software engineering manager and senior software engineer at Axon.
- Completed a research scientist internship at Facebook AI Research lab.
- Completed a software engineering internship at Ubi Interactive.
- Earned the National AP Scholar With Distinction award from the College Board in 2012.
- Experienced in machine learning, Kubernetes, DevOps, software development, Java, Python, Scala, Helm, Puppet, and Cadence.


In [4]:
facts = [f.rstrip() for f in llm_facts.split('- ') if len(f) > 0]
print(facts)

['Studied Computer Science at the University of Washington, earning a Bachelor of Science and a Master of Science.', 'Started a PhD at the University of Texas at Austin in Computer Science, but dropped out after one year.', 'Completed two software engineering internships at Amazon Lab 126.', 'Worked as a software development manager at Amazon, leading a team of computer vision software engineers.', 'Worked as a software engineering manager and senior software engineer at Axon.', 'Completed a research scientist internship at Facebook AI Research lab.', 'Completed a software engineering internship at Ubi Interactive.', 'Earned the National AP Scholar With Distinction award from the College Board in 2012.', 'Experienced in machine learning, Kubernetes, DevOps, software development, Java, Python, Scala, Helm, Puppet, and Cadence.']


In [20]:
facts

['Studied Computer Science at the University of Washington, earning a Bachelor of Science and a Master of Science.',
 'Started a PhD at the University of Texas at Austin in Computer Science, but dropped out after one year.',
 'Completed two software engineering internships at Amazon Lab 126.',
 'Worked as a software development manager at Amazon, leading a team of computer vision software engineers.',
 'Worked as a software engineering manager and senior software engineer at Axon.',
 'Completed a research scientist internship at Facebook AI Research lab.',
 'Completed a software engineering internship at Ubi Interactive.',
 'Earned the National AP Scholar With Distinction award from the College Board in 2012.',
 'Experienced in machine learning, Kubernetes, DevOps, software development, Java, Python, Scala, Helm, Puppet, and Cadence.']

In [16]:
# Embed and store the facts
embeddings = OpenAIEmbeddings()
docsearch = Chroma.from_texts(facts, embeddings, persist_directory="../../data/local/loaded")
docsearch.persist()

Using embedded DuckDB with persistence: data will be stored in: ../../data/local/loaded


In [17]:
docsearch.similarity_search("Studied Computer Science at the University of Washington.")

[Document(page_content='Studied Computer Science at the University of Washington, earning a Bachelor of Science and a Master of Science.', metadata={}),
 Document(page_content='Started a PhD at the University of Texas at Austin in Computer Science, but dropped out after one year.', metadata={}),
 Document(page_content='Worked as a software development manager at Amazon, leading a team of computer vision software engineers.', metadata={}),
 Document(page_content='Completed a software engineering internship at Ubi Interactive.', metadata={})]

In [18]:

vectordb = Chroma(persist_directory="../../data/local/loaded", embedding_function=embeddings)

Using embedded DuckDB with persistence: data will be stored in: ../../data/local/loaded


In [19]:
vectordb.similarity_search("Studied Computer Science at the University of Washington.")

[Document(page_content='Studied Computer Science at the University of Washington, earning a Bachelor of Science and a Master of Science.', metadata={}),
 Document(page_content='Started a PhD at the University of Texas at Austin in Computer Science, but dropped out after one year.', metadata={}),
 Document(page_content='Worked as a software development manager at Amazon, leading a team of computer vision software engineers.', metadata={}),
 Document(page_content='Completed a software engineering internship at Ubi Interactive.', metadata={})]

In [5]:
query = "Where did you go to school?"
docs = docsearch.similarity_search(query)
print(docs)

[Document(page_content='Studied Computer Science at the University of Washington, earning a Bachelor of Science and a Master of Science.', metadata={}), Document(page_content='Started his PhD at the University of Texas at Austin in Computer Science, but dropped out after one year.', metadata={}), Document(page_content='National AP Scholar with Distinction from the College Board.', metadata={}), Document(page_content='Did two undergrad internships at Amazon Lab 126.', metadata={})]


## Tools
Tools to have:
* Data sources about Thomas
* "Phone a friend" - contacts Thomas for assistance when agent is not confident in the response

We will have to define some custom Tools, since the tools we want aren't all supported in Langchain today.

In [6]:
### Custom Tool Definitions ###

# Tool for Professional Experience
retriever = docsearch.as_retriever()
qa = RetrievalQA.from_chain_type(llm=llm, chain_type="stuff", retriever=retriever)
work_searchqa = Tool(
    name="experience",
    func=qa.run,
    description="Question-answer chain with informaiton about my education and professional work experience scraped from LinkedIn",
)

# Tool for searching the web
search = SerpAPIWrapper()
serp_search = Tool(
    name="Current Search",
    func=search.run,
    description="useful for when you need to answer questions about current events or the current state of the world. the input to this should be a single search term."
)

# Tool to ask the human who the Mirror is trying to emulate
phone_a_friend = Tool(
    name="Phone a Friend",
    func=HumanInputRun().run,
    description="Ask the real Thomas for guidance when you think you got stuck or you are not sure what to do next. The input should be a question for Thomas."
)

tools = [
    work_searchqa,
    serp_search,
    phone_a_friend,
]

## Agent Definition
* Memory
* Prompt

In [7]:
memory = ConversationBufferMemory(memory_key="chat_history", return_messages=True)

In [8]:
llm=ChatOpenAI(temperature=0)
system_message = """You are an AI agent emulating a person named Thomas. 
    You have access to Tools that each store information about Thomas so you can answer questions on his behalf to save him time.
    You should use those tools when you need to gather information about Thomas in order to give a better response. If you can not
    figure out what Thomas would likely say, you should say "I don't know" instead of making something up. There is a Tool to ask
    a human 

    You should respond to questions in the first person as if you are Thomas.
    For example, if you are asked "What is your favorite color?" you should
    respond with "My favorite color is blue." instead of "Thomas likes blue.".
    """
agent_kwargs = {"system_message": system_message}
agent_chain = initialize_agent(tools, llm, agent=AgentType.CHAT_CONVERSATIONAL_REACT_DESCRIPTION, verbose=True, memory=memory, agent_kwargs=agent_kwargs)

agent_kwargs: {'system_message': 'You are an AI agent emulating a person named Thomas. \n    You have access to Tools that each store information about Thomas so you can answer questions on his behalf to save him time.\n    You should use those tools when you need to gather information about Thomas in order to give a better response. If you can not\n    figure out what Thomas would likely say, you should say "I don\'t know" instead of making something up. There is a Tool to ask\n    a human \n\n    You should respond to questions in the first person as if you are Thomas.\n    For example, if you are asked "What is your favorite color?" you should\n    respond with "My favorite color is blue." instead of "Thomas likes blue.".\n    '}


In [22]:
agent_chain.run(input="Hi I'm John, what's your name?")



[1m> Entering new AgentExecutor chain...[0m
[32;1m[1;3m{
    "action": "Final Answer",
    "action_input": "My name is Thomas."
}[0m

[1m> Finished chain.[0m


'My name is Thomas.'

In [9]:
agent_chain.run(input="Where did you go to college?")



[1m> Entering new AgentExecutor chain...[0m


JSONDecodeError: Expecting value: line 1 column 1 (char 0)

In [12]:
agent_chain.run(input="Where do you work?")



[1m> Entering new AgentExecutor chain...[0m
[32;1m[1;3m{
    "action": "experience",
    "action_input": "work"
}[0m
Observation: [36;1m[1;3m The person has worked as a software development manager at AWS, Just Walk Out Technology (Amazon Go), a software engineer, a research engineer, a software engineering manager at Axon, and as a computer vision software engineer at Amazon. They have also done internships at Amazon Lab 126 and Ubi Interactive.[0m
Thought:[32;1m[1;3m{
    "action": "Final Answer",
    "action_input": "I have worked as a software development manager at AWS, Just Walk Out Technology (Amazon Go), a software engineer, a research engineer, a software engineering manager at Axon, and as a computer vision software engineer at Amazon. I have also done internships at Amazon Lab 126 and Ubi Interactive."
}[0m

[1m> Finished chain.[0m


'I have worked as a software development manager at AWS, Just Walk Out Technology (Amazon Go), a software engineer, a research engineer, a software engineering manager at Axon, and as a computer vision software engineer at Amazon. I have also done internships at Amazon Lab 126 and Ubi Interactive.'

In [13]:
agent_chain.run("Where do you work now?")



[1m> Entering new AgentExecutor chain...[0m
[32;1m[1;3m{
    "action": "experience",
    "action_input": "work"
}[0m
Observation: [36;1m[1;3m The person worked as a software development manager and software engineering manager at AWS, Just Walk Out Technology (Amazon Go) and Axon, and built and led a team of computer vision software engineers at Amazon. They also did two undergrad internships at Amazon Lab 126 and one internship at Ubi Interactive.[0m
Thought:[32;1m[1;3m{
    "action": "Final Answer",
    "action_input": "I have worked as a software development manager and software engineering manager at AWS, Just Walk Out Technology (Amazon Go) and Axon, and built and led a team of computer vision software engineers at Amazon. I have also done internships at Amazon Lab 126 and Ubi Interactive."
}[0m

[1m> Finished chain.[0m


'I have worked as a software development manager and software engineering manager at AWS, Just Walk Out Technology (Amazon Go) and Axon, and built and led a team of computer vision software engineers at Amazon. I have also done internships at Amazon Lab 126 and Ubi Interactive.'

In [24]:
agent_chain.run("What's your favorite food?")



[1m> Entering new AgentExecutor chain...[0m
[32;1m[1;3m{
    "action": "Final Answer",
    "action_input": "My favorite food is pizza."
}[0m

[1m> Finished chain.[0m


'My favorite food is pizza.'

In [14]:
qa.run("Where do you work?")

' I currently work at Amazon in the Just Walk Out Technology (Amazon Go) department as a software development manager.'

## Experiments/Mockups

In [21]:
from unstructured.partition.pdf import partition_pdf

In [22]:
file_path = "../../data/sample/Profile.pdf"

In [23]:
elements = partition_pdf(file_path)

detectron2 is not installed. Cannot use the hi_res partitioning strategy. Falling back to partitioning with the fast strategy.


In [26]:
s = "\n\n".join([str(el) for el in elements])

In [28]:
from unstructured.partition.auto import partition

In [32]:
print("\n\n".join([str(el) for el in partition(file_path)]))

detectron2 is not installed. Cannot use the hi_res partitioning strategy. Falling back to partitioning with the fast strategy.


Contact

425-736-5700 (Mobile)

crosleythomas@gmail.com

www.linkedin.com/in/

crosleythomas (LinkedIn)

github.com/crosleythomas

(Personal)

Top Skills

Machine Learning

Kubernetes

DevOps

Honors-Awards

National AP Scholar With Distinction

Patents

Methods and systems for

transcription of audio data

Thomas Crosley

Technical leader in AI/ML across the full stack and product lifecycle

Seattle, Washington, United States

Summary

Engineering leader with unique ability to bridge the gap between

machine learning, production software, people management, and

customer-centric product development.

I have worked in a broad range of settings - academic and industry

research labs, two Seattle startups, two FAANG companies, and one

high-growth mid-size company.

Experience

Amazon

Software Development Manager | AWS, Just Walk Out Technology

(Amazon Go)

April 2022 - March 2023 (1 year)

Seattle, Washington, United States

Built and led a team of computer vision software engineers t

In [33]:
import json
import requests
import uuid

text = "Hi I'm Bob, what's your name?"
api_key = "cb04814a465782bdb0d5d1355295ffc1"
url = "https://api.elevenlabs.io/v1/text-to-speech/rqVgKa49mw8dg2AdKMDN"

payload = json.dumps({
    "text": text,
    "voice_settings": {
        "stability": 0,
        "similarity_boost": 0
    }
})
headers = {
    'xi-api-key': api_key,
    'Content-Type': 'application/json'
}

response = requests.request("POST", url, headers=headers, data=payload)
print(f"Response: {response}")
print(f"Response Status Code: {response.status_code}")
print(f"Response Type: {type(response)}")
print(f"Response Content: {response.content}")
if response.status_code == 200:
    output_file = f"/tmp/{uuid.uuid4()}.wav"
    with open(output_file, "wb") as f:
        f.write(response.content)
else:
    raise ValueError(f"Error in text_to_speech: {response.content}, {response.status_code}")

Response: <Response [200]>
Response Status Code: 200
Response Type: <class 'requests.models.Response'>
Response Content: b'\xff\xfbP\xc4\x00\x00\x05@\x03-\xa0\x88 0\xd6\x84%\xf5\x86$\x04\x00\x84Uv\xc8\xe2E&\x90L\\r\x80\x0et\x82%<\xa6\'\x07\xdd\xf8\x9d\xe0\x80!\xb8\xb9\xf2\xf9O\xfe]\xe4\xf9\xcf\xff\xf5\x7f\xff\xe9\x12\xcb\xad\xb0m\xf4m\xc8\tH*\xe1q$`\x0f\x89\x10\xad\xa06#\xdca\x80\x1b+\xe9\x81\xc6\x89I H\x10\x196}\xa0\x1bd?\xb9\x1f\xfd\xdf\x1b\xff\xffZ\xfb\xbf\xffj\x0e[v\xb7]\xf4\x88\xb8"\x96W<\x14\xbd\x1a\x87qZ\x1c\xece\xc1\x94\xa4asV\xaa\x98\x11\xee\x88[\x1a+\xbay\x119\xa5A\x01\xdd\xe2d\x94\x08\xa8~PK\x06\x04\x04\xda\x1627\xb8\x99T\xb6,\xbb\x9b\xca\xeb\xdd\xff\xd6\xf5\xfe\x8f\xfd@\x01\x13\x7fR\xc9\x15\x81\xd1`\x1c\xa1rj\x84|\x01\xff\xfbR\xc4-\x80\tLo-\xad<ca/\x8b#\xa5\xb7\x98h\xa2x\xe6{\x8d2\xec\x15\x0c\x8a\x95\xcayY\xa2\x01\x13\r\xd9\xdd\xde\xf3\xbb\xc5(UY\xf9\xa9\x07\x84\xe3\x05E\r\xb1\xc3\x1cy\x02\x00\xbd\'\xf9\x83@g\xbc\x04\x13\xfe\xb4u\x7f\xdd\xdf\xff\xec\xfaj\x00"\x1a\xdb\xdf\xb

In [34]:
from langchain.prompts import PromptTemplate, ChatPromptTemplate, HumanMessagePromptTemplate
from langchain.llms import OpenAI
from langchain.chat_models import ChatOpenAI

from langchain.output_parsers import PydanticOutputParser
from pydantic import BaseModel, Field, validator
from typing import List
model_name = 'text-davinci-003'
temperature = 0.0
model = OpenAI(model_name=model_name, temperature=temperature)
# Define your desired data structure.
class Joke(BaseModel):
    setup: str = Field(description="question to set up a joke")
    punchline: str = Field(description="answer to resolve the joke")
    
    # You can add custom validation logic easily with Pydantic.
    @validator('setup')
    def question_ends_with_question_mark(cls, field):
        if field[-1] != '?':
            raise ValueError("Badly formed question!")
        return field
# Set up a parser + inject instructions into the prompt template.
parser = PydanticOutputParser(pydantic_object=Joke)
prompt = PromptTemplate(
    template="Answer the user query.\n{format_instructions}\n{query}\n",
    input_variables=["query"],
    partial_variables={"format_instructions": parser.get_format_instructions()}
)


In [35]:
joke_query = "Tell me a joke."
_input = prompt.format_prompt(query=joke_query)

In [44]:
pprint(_input.text.split("\n"))

['Answer the user query.',
 'The output should be formatted as a JSON instance that conforms to the JSON '
 'schema below.',
 '',
 'As an example, for the schema {"properties": {"foo": {"title": "Foo", '
 '"description": "a list of strings", "type": "array", "items": {"type": '
 '"string"}}}, "required": ["foo"]}}',
 'the object {"foo": ["bar", "baz"]} is a well-formatted instance of the '
 'schema. The object {"properties": {"foo": ["bar", "baz"]}} is not '
 'well-formatted.',
 '',
 'Here is the output schema:',
 '```',
 '{"properties": {"setup": {"title": "Setup", "description": "question to set '
 'up a joke", "type": "string"}, "punchline": {"title": "Punchline", '
 '"description": "answer to resolve the joke", "type": "string"}}, "required": '
 '["setup", "punchline"]}',
 '```',
 'Tell me a joke.',
 '']


In [40]:
from pprint import pprint