# **LangChain Decoded**

## Getting Started

In [None]:
# Install the LangChain package
!pip install langchain

In [None]:
# Install the OpenAI package
!pip install openai

In [None]:
# Configure the API key
import os

openai_api_key = os.environ.get('OPENAI_API_KEY', 'sk-XXX')

## Part 1: Models

### Large Language Models (LLMs)

In [None]:
# Use the OpenAI LLM wrapper and text-davinci-003 model
from langchain.llms import OpenAI

llm = OpenAI(model_name="text-davinci-003", openai_api_key=openai_api_key)

In [None]:
# Generate a simple text response
llm("Why is the sky blue?")

In [None]:
# Show the generation output instead
llm_result = llm.generate(["Why is the sky blue?"])
llm_result.llm_output

In [None]:
# Track OpenAI token usage for a single API call
from langchain.callbacks import get_openai_callback

with get_openai_callback() as cb:
    result = llm("Why is the sky blue?")

    print(f"Total Tokens: {cb.total_tokens}")
    print(f"\tPrompt Tokens: {cb.prompt_tokens}")
    print(f"\tCompletion Tokens: {cb.completion_tokens}")
    print(f"Total Cost (USD): ${cb.total_cost}")

### Chat Models

In [None]:
# Define system message for the chatbot, and pass human message
from langchain.chat_models import ChatOpenAI
from langchain.schema import (
    AIMessage,
    HumanMessage,
    SystemMessage
)

chat = ChatOpenAI(temperature=0, openai_api_key=openai_api_key)

messages = [
    SystemMessage(content="You are a helpful assistant that translates English to Spanish."),
    HumanMessage(content="Translate this sentence from English to Spanish. I'm hungry, give me food.")
]

chat(messages)

## Part 2: Embeddings

In [None]:
# Use OpenAI text embeddings for a text input
from langchain.embeddings.openai import OpenAIEmbeddings

embeddings = OpenAIEmbeddings(openai_api_key=openai_api_key)

text = "This is a sample query."

query_result = embeddings.embed_query(text)
print(query_result)
print(len(query_result))

In [None]:
# Use OpenAI text embeddings for multiple text/document inputs
from langchain.embeddings.openai import OpenAIEmbeddings

embeddings = OpenAIEmbeddings(openai_api_key=openai_api_key)

text = ["This is a sample query.", "This is another sample query.", "This is yet another sample query."]

doc_result = embeddings.embed_documents(text)
print(doc_result)
print(len(doc_result))

In [None]:
# Use fake embeddings to test your pipeline
from langchain.embeddings import FakeEmbeddings

embeddings = FakeEmbeddings(size=1481)

text = "This is a sample query."

query_result = embeddings.embed_query(text)
print(query_result)
print(len(query_result))

In [None]:
# Request with context length > 8191 throws an error
from langchain.embeddings.openai import OpenAIEmbeddings

embeddings = OpenAIEmbeddings(openai_api_key=openai_api_key)

long_text = 'Hello ' * 10000

query_result = embeddings.embed_query(long_text)
print(query_result)

In [None]:
!pip install tiktoken

In [None]:
# Truncate input text length using tiktoken
import tiktoken
from langchain.embeddings.openai import OpenAIEmbeddings

embeddings = OpenAIEmbeddings(openai_api_key=openai_api_key)

max_tokens = 8191
encoding_name = 'cl100k_base'

long_text = 'Hello ' * 10000

# Tokenize the input text before truncating it
encoding = tiktoken.get_encoding(encoding_name)
tokens = encoding.encode(long_text)[:max_tokens]

# Re-convert the tokens to a string before embedding
truncated_text = encoding.decode(tokens)

query_result = embeddings.embed_query(truncated_text)
print(query_result)
print(len(query_result))

## Part 3: Prompts

In [None]:
# Ask the LLM about a recent event/occurence
from langchain.llms.openai import OpenAI

llm = OpenAI(model_name='text-davinci-003', openai_api_key=openai_api_key)

print(llm("What is LangChain useful for? Answer in one sentence."))

In [None]:
# Ask the same question again, but with relevant context
prompt = """You are a helpful assistant, who can explain concepts in an easy-to-understand manner. Answer the following question succintly.
          Context: There are six main areas that LangChain is designed to help with. These are, in increasing order of complexity:
            LLMs and Prompts: This includes prompt management, prompt optimization, a generic interface for all LLMs, and common utilities for working with LLMs.
            Chains: Chains go beyond a single LLM call and involve sequences of calls (whether to an LLM or a different utility). LangChain provides a standard interface for chains, lots of integrations with other tools, and end-to-end chains for common applications.
            Data Augmented Generation: Data Augmented Generation involves specific types of chains that first interact with an external data source to fetch data for use in the generation step. Examples include summarization of long pieces of text and question/answering over specific data sources.
            Agents: Agents involve an LLM making decisions about which Actions to take, taking that Action, seeing an Observation, and repeating that until done. LangChain provides a standard interface for agents, a selection of agents to choose from, and examples of end-to-end agents.
            Memory: Memory refers to persisting state between calls of a chain/agent. LangChain provides a standard interface for memory, a collection of memory implementations, and examples of chains/agents that use memory.
            Evaluation: Generative models are notoriously hard to evaluate with traditional metrics. One new way of evaluating them is using language models themselves to do the evaluation. LangChain provides some prompts/chains for assisting in this.
          Question: What is LangChain useful for?
          Answer: """

print(llm(prompt))

In [None]:
# Use a template to structure the prompt
from langchain import PromptTemplate

template = """You are a helpful assistant, who is good at general knowledge trivia. Answer the following question succintly.
              Question: {question}
              Answer:"""

prompt = PromptTemplate(template=template, input_variables=['question'])

question = "Who won the first football World Cup?"

print(llm(question))

In [None]:
# Use a chain to execute the prompt
from langchain.chains import LLMChain

llm_chain = LLMChain(prompt=prompt, llm=llm)

print(llm_chain.run(question))

In [None]:
# Save prompt template to JSON file
prompt.save("myprompt.json")

# Load prompt template from JSON file
from langchain.prompts import load_prompt

saved_prompt = load_prompt("myprompt.json")
assert prompt == saved_prompt

print(llm(question))

In [None]:
# Guide the model using few shot examples in the prompt
from langchain import PromptTemplate, FewShotPromptTemplate

examples = [
    { "question": "How can we extend our lifespan?",
      "answer": "Just freeze yourself and wait for technology to catch up."},
    { "question": "Does red wine help you live longer?",
      "answer": "I don't know about that, but it does make the time pass more quickly."},
    { "question": "How can we slow down the aging process?",
      "answer": "Simple, just stop having birthdays."}
]

template = """
    Question: {question}
    Answer: {answer}
  """

prompt = PromptTemplate(input_variables=["question", "answer"], template=template)

few_shot_prompt = FewShotPromptTemplate(
    examples=examples,
    example_prompt=prompt,
    prefix="Respond with a funny and witty remark.",
    suffix="Question: {question}\nAnswer:",
    input_variables=["question"],
    example_separator=""
)

print(few_shot_prompt.format(question="How can I eat healthy?"))
print(llm(few_shot_prompt.format(question="How can I eat healthy?")))

In [None]:
# Use prompt templates with chat models
from langchain.chat_models import ChatOpenAI
from langchain.prompts import (
    ChatPromptTemplate,
    PromptTemplate,
    SystemMessagePromptTemplate,
    AIMessagePromptTemplate,
    HumanMessagePromptTemplate,
)

chat = ChatOpenAI(temperature=0, openai_api_key=openai_api_key)

system_message="You are a helpful assistant that translates {input_language} to {output_language}."
system_message_prompt = SystemMessagePromptTemplate.from_template(system_message)

human_message="{text}"
human_message_prompt = HumanMessagePromptTemplate.from_template(human_message)

chat_prompt = ChatPromptTemplate.from_messages([system_message_prompt, human_message_prompt])

messages = chat_prompt.format_prompt(input_language="English", output_language="Spanish", text="I'm hungry, give me food.").to_messages()

chat(messages)

## Part 4: Indexes

### Document Loaders

In [None]:
!pip install unstructured tabulate pdf2image pytesseract

In [None]:
# URL Loader
from langchain.document_loaders import UnstructuredURLLoader

urls = ["https://alphasec.io/summarize-text-with-langchain-and-openai"]
loader = UnstructuredURLLoader(urls=urls)
data = loader.load()
print(data)

In [None]:
!pip install pypdf

In [None]:
# PDF Loader
from langchain.document_loaders import PyPDFLoader

loader = PyPDFLoader("./data/attention-is-all-you-need.pdf")
pages = loader.load_and_split()
pages[0]

In [None]:
# File Directory Loader
from langchain.document_loaders import DirectoryLoader

loader = DirectoryLoader('data', glob="**/*.csv")
docs = loader.load()
len(docs)

In [None]:
!pip install pytube youtube-transcript-api

In [None]:
# YouTube Transcripts Loader
from langchain.document_loaders import YoutubeLoader

loader = YoutubeLoader.from_youtube_url("https://www.youtube.com/watch?v=yEgHrxvLsz0", add_video_info=True)
data = loader.load()
print(data)

In [None]:
!pip install google-cloud-storage

In [None]:
# Google Cloud Storage File Loader
from langchain.document_loaders import GCSFileLoader

loader = GCSFileLoader(project_name="langchain-gcs", bucket="langchain-gcs", blob="lorem-ipsum.txt")
data = loader.load()
print(data)

### Text Splitters

In [None]:
# Character Text Splitter
from langchain.text_splitter import CharacterTextSplitter
from google.colab import files

uploaded = files.upload()
filename = next(iter(uploaded))
text = uploaded[filename].decode("utf-8")

text_splitter = CharacterTextSplitter(
    separator = "\n\n",
    chunk_size = 1000,
    chunk_overlap  = 200,
    length_function = len,
)

texts = text_splitter.create_documents([text])
print(texts[0])
print(texts[1])
print(texts[2])

In [None]:
# Recursive Character Text Splitter
from langchain.text_splitter import RecursiveCharacterTextSplitter
from google.colab import files

uploaded = files.upload()
filename = next(iter(uploaded))
text = uploaded[filename].decode("utf-8")

text_splitter = RecursiveCharacterTextSplitter(
    chunk_size = 100,
    chunk_overlap  = 20,
    length_function = len,
)

texts = text_splitter.create_documents([text])
print(texts[0])
print(texts[1])
print(texts[2])

### Vector Stores

In [None]:
!pip install chromadb tiktoken

In [None]:
# Chroma Vector Store
import os, tiktoken
from langchain.document_loaders import TextLoader
from langchain.text_splitter import CharacterTextSplitter
from langchain.vectorstores import Chroma
from langchain.embeddings.openai import OpenAIEmbeddings

OPENAI_API_KEY = '' # @param {type:"string"}
os.environ["OPENAI_API_KEY"] = OPENAI_API_KEY

from google.colab import files

uploaded = files.upload()
filename = next(iter(uploaded))

loader = TextLoader(filename)
data = loader.load()

text_splitter = CharacterTextSplitter(chunk_size=1000, chunk_overlap=0)
docs = text_splitter.split_documents(data)

embeddings = OpenAIEmbeddings()
db = Chroma.from_documents(docs, embeddings)

query = "What comes after 'Vestibulum congue convallis finibus'?"
docs = db.similarity_search(query)

print(docs[0].page_content)

### Retrievers

In [None]:
!pip install arxiv pymupdf

In [None]:
# Arxiv Retriever
from langchain.retrievers import ArxivRetriever

retriever = ArxivRetriever(load_max_docs=2)
docs = retriever.get_relevant_documents(query='2203.15556')

docs[0].metadata

In [None]:
!pip install wikipedia

In [None]:
# Wikipedia Retriever
from langchain.retrievers import WikipediaRetriever

retriever = WikipediaRetriever()
docs = retriever.get_relevant_documents(query='large language models')

docs[0].metadata

In [None]:
!pip install chromadb tiktoken

In [None]:
# Chroma Vector Store Retriever
import os, tiktoken
from langchain.document_loaders import TextLoader
from langchain.text_splitter import CharacterTextSplitter
from langchain.vectorstores import Chroma
from langchain.embeddings.openai import OpenAIEmbeddings

OPENAI_API_KEY = '' # @param {type:"string"}
os.environ["OPENAI_API_KEY"] = OPENAI_API_KEY

from google.colab import files

uploaded = files.upload()
filename = next(iter(uploaded))

loader = TextLoader(filename)
data = loader.load()

text_splitter = CharacterTextSplitter(chunk_size=1000, chunk_overlap=0)
docs = text_splitter.split_documents(data)

embeddings = OpenAIEmbeddings()
db = Chroma.from_documents(docs, embeddings)

retriever = db.as_retriever()
query = "What comes after 'Vestibulum congue convallis finibus'?"
docs = retriever.get_relevant_documents(query)

print(docs[0].page_content)

## Part 5: Memory

In [None]:
# Store and retrieve chat messages with ChatMessageHistory
from langchain.memory import ChatMessageHistory

history = ChatMessageHistory()
history.add_user_message("Hello")
history.add_ai_message("Hi, how can I help you?")
history.add_user_message("I want to write Python code.")
history.add_ai_message("Sure, I can help with that. What do you want to code?")

history.messages

In [None]:
# Retrieve chat messages with ConversationBufferHistory (as a variable)
from langchain.memory import ConversationBufferMemory

memory = ConversationBufferMemory()
memory.chat_memory.add_user_message("Hello")
memory.chat_memory.add_ai_message("Hi, how can I help you?")
memory.chat_memory.add_user_message("I want to write Python code.")
memory.chat_memory.add_ai_message("Sure, I can help with that. What do you want to code?")

memory.load_memory_variables({})

In [None]:
# Retrieve chat messages with ConversationBufferHistory (as a list of messages)
from langchain.memory import ConversationBufferMemory

memory = ConversationBufferMemory(return_messages=True)
memory.chat_memory.add_user_message("Hello")
memory.chat_memory.add_ai_message("Hi, how can I help you?")
memory.chat_memory.add_user_message("I want to write Python code.")
memory.chat_memory.add_ai_message("Sure, I can help with that. What do you want to code?")

memory.load_memory_variables({})

In [None]:
# Use ConversationBufferMemory in a chain
from langchain.llms.openai import OpenAI
from langchain.chains import ConversationChain

llm = OpenAI(temperature=0, openai_api_key=openai_api_key)
conversation = ConversationChain(llm=llm, memory=ConversationBufferMemory())

conversation.predict(input="Hello")

In [None]:
conversation.predict(input="I want to write Python code.")

In [None]:
# Store a conversation summary with ConversationSummaryMemory
from langchain.llms.openai import OpenAI
from langchain.memory import ChatMessageHistory, ConversationSummaryMemory

llm = OpenAI(temperature=0, openai_api_key=openai_api_key)
memory = ConversationSummaryMemory(llm=llm)
memory.save_context({"input": "Hello"}, {"output": "Hi, how can I help you?"})

memory.load_memory_variables({})

In [None]:
conversation.predict(input="I want to write Python code.")

In [None]:
# Use ConversationSummaryMemory in a chain
from langchain.llms.openai import OpenAI
from langchain.chains import ConversationChain

llm = OpenAI(temperature=0, openai_api_key=openai_api_key)
memory = ConversationSummaryMemory(llm=llm)
conversation = ConversationChain(llm=llm, verbose=True, memory=memory)

conversation.predict(input="Hello")

In [None]:
conversation.predict(input="I want to write Python code.")

In [None]:
conversation.predict(input="No, I'm a beginner.")

In [None]:
# # Memory management using Motorhead (managed)
from langchain.memory.motorhead_memory import MotorheadMemory
from langchain import OpenAI, LLMChain, PromptTemplate

template = """You are a chatbot having a conversation with a human.

{chat_history}
Human: {human_input}
AI:"""

prompt = PromptTemplate(input_variables=["chat_history", "human_input"], template=template)

memory = MotorheadMemory(
    api_key="API_KEY",
    client_id="CLIENT_ID",
    session_id="langchain-1",
    memory_key="chat_history",
)

await memory.init();

llm = OpenAI(temperature=0, openai_api_key=openai_api_key)
llm_chain = LLMChain(llm=llm, prompt=prompt, memory=memory)

llm_chain.run("Hello, I'm Motorhead.")

In [None]:
llm_chain.run("What's my name?")

In [None]:
# Memory management using Motorhead (self-hosted)
from langchain import OpenAI, LLMChain, PromptTemplate
from langchain.memory.motorhead_memory import MotorheadMemory

template = """You are a chatbot having a conversation with a human.

{chat_history}
Human: {human_input}
AI:"""

prompt = PromptTemplate(input_variables=["chat_history", "human_input"], template=template)

memory = MotorheadMemory(
    url="URL",
    session_id="langchain-1",
    memory_key="chat_history",
)

await memory.init();

llm = OpenAI(temperature=0, openai_api_key=openai_api_key)
llm_chain = LLMChain(llm=llm, prompt=prompt, memory=memory)

llm_chain.run("Hello, I'm Motorhead.")

In [None]:
llm_chain.run("What's my name?")