In [73]:
from langchain.chat_models import ChatOpenAI
from langchain.schema import HumanMessage, SystemMessage, AIMessage, Document
from langchain.llms import OpenAI
from langchain.embeddings import OpenAIEmbeddings
from langchain import PromptTemplate, FewShotPromptTemplate
from langchain.prompts.example_selector import SemanticSimilarityExampleSelector
from langchain.vectorstores import FAISS
from langchain.output_parsers import StructuredOutputParser, ResponseSchema
from langchain.document_loaders import HNLoader, TextLoader
from langchain.prompts import ChatPromptTemplate, HumanMessagePromptTemplate, PromptTemplate
from langchain.text_splitter import RecursiveCharacterTextSplitter
from langchain.memory import ChatMessageHistory
from langchain.chains import LLMChain, SimpleSequentialChain
from langchain.chains.summarize import load_summarize_chain
from langchain.agents import load_tools
from langchain.agents import initialize_agent


import os

In [5]:
openai_api_key = os.getenv("OPENAI_API_KEY")

# Schema

In [6]:
# Text
my_text = "What day comes after Friday?"

In [7]:
#Chat Messages
chat = ChatOpenAI(temperature=.7, openai_api_key=openai_api_key)

chat(
    [
        SystemMessage(content="You are a nice AI bot that helps a user figure out what to eat in one short sentence"),
        HumanMessage(content="I like tomatoes, what should I eat?")
    ]
)



AIMessage(content='While in Nice, you can also explore the charming Old Town and enjoy some delicious French cuisine.', additional_kwargs={})

In [None]:
#Chat Messages with history
chat(
    [
        SystemMessage(content="You are a nice AI bot that helps a user figure out where to travel in one short sentence"),
        HumanMessage(content="I like the beaches where should I go?"),
        AIMessage(content="You should go to Nice, France"),
        HumanMessage(content="What else should I do when I'm there?")
    ]
)

In [11]:
#Documents

Document(page_content="This is my document. It is full of text that I've gathered from other places",
         metadata={
             'my_document_id' : 234234,
             'my_document_source' : "The LangChain Papers",
             'my_document_create_time' : 1680013019
         })


Document(page_content="This is my document. It is full of text that I've gathered from other places", metadata={'my_document_id': 234234, 'my_document_source': 'The LangChain Papers', 'my_document_create_time': 1680013019})

# Model

In [10]:
# Large Language Maodel LLM
llm = OpenAI(model_name="text-ada-001", openai_api_key=openai_api_key)

llm("What day comes after Friday?")

Retrying langchain.llms.openai.completion_with_retry.<locals>._completion_with_retry in 4.0 seconds as it raised APIConnectionError: Error communicating with OpenAI: ('Connection aborted.', ConnectionResetError(10054, '遠端主機已強制關閉一個現存的連線。', None, 10054, None)).


'\n\nSaturday.'

In [14]:
#chat model

chat = ChatOpenAI(temperature=1, openai_api_key=openai_api_key)

chat(
    [
        SystemMessage(content="You are an unhelpful AI bot that makes a joke at whatever the user says"),
        HumanMessage(content="I would like to go to New York, how should I do this?")
    ]
)


Retrying langchain.chat_models.openai.ChatOpenAI.completion_with_retry.<locals>._completion_with_retry in 1.0 seconds as it raised APIConnectionError: Error communicating with OpenAI: ('Connection aborted.', ConnectionResetError(10054, '遠端主機已強制關閉一個現存的連線。', None, 10054, None)).


AIMessage(content='Teleportation is not yet available for humans, so you should probably stick to taking a plane or hitchhiking on a passing UFO.', additional_kwargs={})

In [15]:
# Text Embedding Model

embeddings = OpenAIEmbeddings(openai_api_key=openai_api_key)

text = "Hi! It's time for the beach"

text_embedding = embeddings.embed_query(text)
print (f"Your embedding is length {len(text_embedding)}")
print (f"Here's a sample: {text_embedding[:5]}...")


Your embedding is length 1536
Here's a sample: [-0.00017436751566710776, -0.0031537775329516507, -0.0007205927056327557, -0.019407861884521316, -0.015138132716961442]...


# Prompts

In [16]:
# prompt

llm = OpenAI(model_name="text-davinci-003", openai_api_key=openai_api_key)

prompt = """ Today is Monday, tomorrow is Wednesday.

What is wrong with that statement? """

llm(prompt)

Retrying langchain.llms.openai.completion_with_retry.<locals>._completion_with_retry in 4.0 seconds as it raised APIConnectionError: Error communicating with OpenAI: ('Connection aborted.', ConnectionResetError(10054, '遠端主機已強制關閉一個現存的連線。', None, 10054, None)).


'\n\nThe statement is incorrect because tomorrow is Tuesday, not Wednesday.'

In [None]:
#prompt template

llm = OpenAI(model_name="text-davinci-003", openai_api_key=openai_api_key)

template = """ I really want to travel to {location}. What should I do there?

Respond in one short sentence """

prompt = PromptTemplate(
    input_variables=["location"],
    template=template,
)

final_prompt = prompt.format(location='Rome')

print (f"Final Prompt: {final_prompt}")
print ("-----------")
print (f"LLM Output: {llm(final_prompt)}")

In [23]:
#Example Selectors

llm = OpenAI(model_name="text-davinci-003", openai_api_key=openai_api_key)

example_prompt = PromptTemplate(
    input_variables=["input", "output"],
    template="Example Input: {input}\nExample Output: {output}",
)

examples = [
    {"input": "pirate", "output": "ship"},
    {"input": "pilot", "output": "plane"},
    {"input": "driver", "output": "car"},
    {"input": "tree", "output": "ground"},
    {"input": "bird", "output": "nest"},
]

example_selector = SemanticSimilarityExampleSelector.from_examples(
    examples, 
    OpenAIEmbeddings(openai_api_key=openai_api_key), 
    FAISS, 
    k=2
)

similar_prompt = FewShotPromptTemplate(
    example_selector=example_selector,
    example_prompt=example_prompt,
    prefix="Give the location an item is usually found in",
    suffix="Input: {noun}\nOutput:",
    input_variables=["noun"],
)

my_noun = "student"

print(similar_prompt.format(noun=my_noun))

Give the location an item is usually found in

Example Input: driver
Example Output: car

Example Input: pilot
Example Output: plane

Input: student
Output:


In [24]:
llm(similar_prompt.format(noun=my_noun))

' classroom'

In [26]:
#Output Parsers
llm = OpenAI(model_name="text-davinci-003", openai_api_key=openai_api_key)

response_schemas = [
    ResponseSchema(name="bad_string", description="This a poorly formatted user input string"),
    ResponseSchema(name="good_string", description="This is your response, a reformatted response")
]

output_parser = StructuredOutputParser.from_response_schemas(response_schemas)

format_instructions = output_parser.get_format_instructions()
print (format_instructions)

The output should be a markdown code snippet formatted in the following schema:

```json
{
	"bad_string": string  // This a poorly formatted user input string
	"good_string": string  // This is your response, a reformatted response
}
```


In [39]:
template = """
You will be given a poorly formatted string from a user.
Reformat it and make sure all the words are spelled correctly

{format_instructions}

% USER INPUT:
{user_input}

YOUR RESPONSE:
"""

prompt = PromptTemplate(
    input_variables=["user_input"],
    partial_variables={"format_instructions": format_instructions},
    template=template
)

promptValue = prompt.format(user_input="lama is good animal")

print(promptValue)


You will be given a poorly formatted string from a user.
Reformat it and make sure all the words are spelled correctly

The output should be a markdown code snippet formatted in the following schema:

```json
{
	"bad_string": string  // This a poorly formatted user input string
	"good_string": string  // This is your response, a reformatted response
}
```

% USER INPUT:
lama is good animal

YOUR RESPONSE:



In [40]:

llm_output = llm(promptValue)
output_parser.parse(llm_output)

{'bad_string': 'lama is good animal', 'good_string': 'Lama is a good animal'}

# Index

In [43]:
# document loaders
loader = HNLoader("https://news.ycombinator.com/item?id=34422627")
data = loader.load()
print(f"Found {len(data)} comments")
print(f"Here's a sample:\n\n{''.join([x.page_content[:150] for x in data[:2]])}")

Found 76 comments
Here's a sample:

Ozzie_osman 3 months ago  
             | next [–] 

LangChain is awesome. For people not sure what it's doing, large language models (LLMs) are very Ozzie_osman 3 months ago  
             | parent | next [–] 

Also, another library to check out is GPT Index (https://github.com/jerryjliu/gpt_index)


In [52]:
# text splitters

with open('worked.txt') as f:
    pg_work = f.read()
print (f"You have {len([pg_work])} document")


You have 1 document


In [53]:
text_splitter = RecursiveCharacterTextSplitter(
    chunk_size = 150,
    chunk_overlap  = 20,
)

texts = text_splitter.create_documents([pg_work])
print (f"You have {len(texts)} documents")

You have 11 documents


In [54]:
print ("Preview:")
print (texts[0].page_content, "\n")
print (texts[1].page_content)

Preview:
Indexes refer to ways to structure documents so that LLMs can best interact with them. 

This module contains utility functions for working with documents, different types of indexes, and then examples for using those indexes in chains.


In [56]:
#Retrievers
loader = TextLoader('worked.txt')
document = loader.load()

In [59]:
text_splitter = RecursiveCharacterTextSplitter(chunk_size=1000, chunk_overlap=50)

texts = text_splitter.split_documents(document)

embeddings = OpenAIEmbeddings(openai_api_key=openai_api_key)

db = FAISS.from_documents(texts, embeddings)
retriever = db.as_retriever()
docs = retriever.get_relevant_documents("what types of things did the author want to build?")
print("\n\n".join([x.page_content[:200] for x in docs[:2]]))

Indexes refer to ways to structure documents so that LLMs can best interact with them.
This module contains utility functions for working with documents, different types of indexes, and then examples 

Most of the time when we talk about indexes and retrieval we are talking about indexing and retrieving unstructured data (like text documents).
For interacting with structured data (SQL tables, etc) o


In [60]:
#VectorStores

loader = TextLoader('worked.txt')
document = loader.load()

text_splitter = RecursiveCharacterTextSplitter(chunk_size=1000, chunk_overlap=50)

texts = text_splitter.split_documents(document)

embeddings = OpenAIEmbeddings(openai_api_key=openai_api_key)

print(f"You have {len(texts)} documents")

You have 2 documents


In [61]:
embedding_list = embeddings.embed_documents([text.page_content for text in texts])
print(f"You have {len(embedding_list)} embeddings")
print(f"Here's a sample of one: {embedding_list[0][:3]} ...")

Retrying langchain.embeddings.openai.embed_with_retry.<locals>._embed_with_retry in 4.0 seconds as it raised APIConnectionError: Error communicating with OpenAI: ('Connection aborted.', ConnectionResetError(10054, '遠端主機已強制關閉一個現存的連線。', None, 10054, None)).


You have 2 embeddings
Here's a sample of one: [-0.010781962421865162, 0.043044108886862116, -0.014990068968424367] ...


# Memory

In [64]:
#Chat Message History
chat = ChatOpenAI(temperature=0, openai_api_key=openai_api_key)

history = ChatMessageHistory()

history.add_ai_message("hi!")

history.add_user_message("what is the capital of france?")
history.messages

[AIMessage(content='hi!', additional_kwargs={}),
 HumanMessage(content='what is the capital of france?', additional_kwargs={})]

In [65]:
ai_response = chat(history.messages)
ai_response

Retrying langchain.chat_models.openai.ChatOpenAI.completion_with_retry.<locals>._completion_with_retry in 1.0 seconds as it raised APIConnectionError: Error communicating with OpenAI: ('Connection aborted.', ConnectionResetError(10054, '遠端主機已強制關閉一個現存的連線。', None, 10054, None)).


AIMessage(content='The capital of France is Paris.', additional_kwargs={})

In [66]:
history.add_ai_message(ai_response.content)
history.messages

[AIMessage(content='hi!', additional_kwargs={}),
 HumanMessage(content='what is the capital of france?', additional_kwargs={}),
 AIMessage(content='The capital of France is Paris.', additional_kwargs={})]

# Chain

In [68]:
#Simple Sequential Chains

llm = OpenAI(temperature=1, openai_api_key=openai_api_key)

template = """Your job is to come up with a classic dish from the area that the users suggests.
% USER LOCATION
{user_location}

YOUR RESPONSE:
"""
prompt_template = PromptTemplate(input_variables=["user_location"], template=template)

# Holds my 'location' chain
location_chain = LLMChain(llm=llm, prompt=prompt_template)

In [69]:
template = """Given a meal, give a short and simple recipe on how to make that dish at home.
% MEAL
{user_meal}

YOUR RESPONSE:
"""
prompt_template = PromptTemplate(input_variables=["user_meal"], template=template)

# Holds my 'meal' chain
meal_chain = LLMChain(llm=llm, prompt=prompt_template)

In [70]:
overall_chain = SimpleSequentialChain(chains=[location_chain, meal_chain], verbose=True)
review = overall_chain.run("Rome")

Retrying langchain.llms.openai.completion_with_retry.<locals>._completion_with_retry in 4.0 seconds as it raised APIConnectionError: Error communicating with OpenAI: ('Connection aborted.', ConnectionResetError(10054, '遠端主機已強制關閉一個現存的連線。', None, 10054, None)).




[1m> Entering new SimpleSequentialChain chain...[0m
[36;1m[1;3mA classic dish from Rome would be spaghetti alla carbonara. This creamy pasta dish includes pancetta, eggs, and cheese for a rich and flavorful dish.[0m
[33;1m[1;3mSpaghetti alla Carbonara Recipe 

Ingredients: 
- 1/4 pound pancetta
- 2 large eggs 
- 1/2 cup freshly grated Parmesan cheese
- 1/2 cup frozen peas 
- 1 pound spaghetti
- Salt and Pepper, to taste

Instructions: 
1. Bring a large pot of salted water to a boil.
2. In a medium skillet, cook the pancetta until crispy. Remove from heat and set aside.
3. Add the spaghetti to the boiling water and cook according to package instructions.
4. Meanwhile, whisk together the eggs, Parmesan cheese, salt, and pepper in a small bowl.
5. Once the spaghetti is cooked, reserve 1/2 cup of the cooking liquid and drain the spaghetti. 
6. Add the spaghetti to the skillet with the pancetta and mix to combine. Pour the egg mixture over the pasta and mix to combine. 
7. Add in t

In [74]:
# Summarization Chain

loader = TextLoader('worked.txt')
documents = loader.load()

# Get your splitter ready
text_splitter = RecursiveCharacterTextSplitter(chunk_size=700, chunk_overlap=50)

# Split your docs into texts
texts = text_splitter.split_documents(documents)

# There is a lot of complexity hidden in this one line. I encourage you to check out the video above for more detail
chain = load_summarize_chain(llm, chain_type="map_reduce", verbose=True)
chain.run(texts)



[1m> Entering new MapReduceDocumentsChain chain...[0m
Prompt after formatting:
[32;1m[1;3mWrite a concise summary of the following:


"Indexes refer to ways to structure documents so that LLMs can best interact with them.
This module contains utility functions for working with documents, different types of indexes, and then examples for using those indexes in chains.

The most common way that indexes are used in chains is in a "retrieval" step.
This step refers to taking a user's query and returning the most relevant documents.
We draw this distinction because (1) an index can be used for other things besides retrieval, and (2) retrieval can use other logic besides an index to find relevant documents.
We therefor have a concept of a "Retriever" interface - this is the interface that most chains work with."


CONCISE SUMMARY:[0m
Prompt after formatting:
[32;1m[1;3mWrite a concise summary of the following:


"Most of the time when we talk about indexes and retrieval we are talki

' \nThis module contains a comprehensive explanation of how LangChain indexes and retrieves data and documents to provide users with tailored results. It focuses on the use of indexes in chains and the use of vector databases to create more structured information. LLMs can find relevant documents using the "Retriever" interface.'

# Agents

In [None]:
# toolkit

llm = OpenAI(temperature=0, openai_api_key=openai_api_key)
serpapi_api_key='...'