# LangChain Usecases 

https://github.com/gkamradt/langchain-tutorials/blob/main/LangChain%20Cookbook%20Part%202%20-%20Use%20Cases.ipynb

In [4]:
import langchain

from langchain.embeddings import GooglePalmEmbeddings
from langchain.llms import GooglePalm
import google.generativeai
import os

llm = GooglePalm(
    google_api_key=os.getenv('GOOGLE_API_KEY'),
    temperature=0.0
)

# prompts = ['Explain the difference between effective and affective with examples']
# llm_result = llm._generate(prompts)

# print(llm_result.generations[0][0].text)
print(langchain.__version__)

0.0.330


In [7]:
GooglePalm?

[0;31mInit signature:[0m
[0mGooglePalm[0m[0;34m([0m[0;34m[0m
[0;34m[0m    [0;34m*[0m[0;34m,[0m[0;34m[0m
[0;34m[0m    [0mcache[0m[0;34m:[0m [0mOptional[0m[0;34m[[0m[0mbool[0m[0;34m][0m [0;34m=[0m [0;32mNone[0m[0;34m,[0m[0;34m[0m
[0;34m[0m    [0mverbose[0m[0;34m:[0m [0mbool[0m [0;34m=[0m [0;32mNone[0m[0;34m,[0m[0;34m[0m
[0;34m[0m    [0mcallbacks[0m[0;34m:[0m [0mUnion[0m[0;34m[[0m[0mList[0m[0;34m[[0m[0mlangchain[0m[0;34m.[0m[0mcallbacks[0m[0;34m.[0m[0mbase[0m[0;34m.[0m[0mBaseCallbackHandler[0m[0;34m][0m[0;34m,[0m [0mlangchain[0m[0;34m.[0m[0mcallbacks[0m[0;34m.[0m[0mbase[0m[0;34m.[0m[0mBaseCallbackManager[0m[0;34m,[0m [0mNoneType[0m[0;34m][0m [0;34m=[0m [0;32mNone[0m[0;34m,[0m[0;34m[0m
[0;34m[0m    [0mcallback_manager[0m[0;34m:[0m [0mOptional[0m[0;34m[[0m[0mlangchain[0m[0;34m.[0m[0mcallbacks[0m[0;34m.[0m[0mbase[0m[0;34m.[0m[0mBaseCallbackManager[0m[0;34

## Summarization

In [9]:
from langchain import PromptTemplate

# Create our template.
template = """
%INSTRUCTIONS:
Please summarize the following piece of text.
Response in a manner that a 5 year old would understand.

%TEXT:
{text}
"""

# Create a LangChain prompt template that we can insert values to later
prompt = PromptTemplate(
    input_variables=['text'],
    template=template
)

In [10]:
confusing_text = """
For the next 130 years, debate raged.
Some scientists called Prototaxites a lichen, others a fungus, and still others clung to the notion that it was some kind of tree.
“The problem is that when you look up close at the anatomy, it’s evocative of a lot of different things, but it’s diagnostic of nothing,” says Boyce, an associate professor in geophysical sciences and the Committee on Evolutionary Biology.
“And it’s so damn big that when whenever someone says it’s something, everyone else’s hackles get up: ‘How could you have a lichen 20 feet tall?’”
"""

In [11]:
print ("------- Prompt Begin -------")

final_prompt = prompt.format(text=confusing_text)
print(final_prompt)

print ("------- Prompt End -------")

------- Prompt Begin -------

%INSTRUCTIONS:
Please summarize the following piece of text.
Response in a manner that a 5 year old would understand.

%TEXT:

For the next 130 years, debate raged.
Some scientists called Prototaxites a lichen, others a fungus, and still others clung to the notion that it was some kind of tree.
“The problem is that when you look up close at the anatomy, it’s evocative of a lot of different things, but it’s diagnostic of nothing,” says Boyce, an associate professor in geophysical sciences and the Committee on Evolutionary Biology.
“And it’s so damn big that when whenever someone says it’s something, everyone else’s hackles get up: ‘How could you have a lichen 20 feet tall?’”


------- Prompt End -------


In [12]:
output = llm(final_prompt)
print(output)

Scientists argued for 130 years about what Prototaxites is. Some said it is a lichen, some said it is a fungus, and some said it is a tree. The problem is that it looks like a lot of different things, but it is not clear what it is.


## Summaries of Longer Text

We will use `wikipedia` [^1] to get articles online and then summarize it with LLM.

[^1]: https://python.langchain.com/docs/integrations/tools/wikipedia

In [20]:
!pip install -qqq wikipedia transformers

In [14]:
from langchain.tools import WikipediaQueryRun
from langchain.utilities import WikipediaAPIWrapper

wikipedia = WikipediaQueryRun(api_wrapper=WikipediaAPIWrapper())
text = wikipedia.run('idempotency')

In [15]:
len(text)

2057

In [18]:
print(text[:300])

Page: Idempotence
Summary: Idempotence (UK: , US: ) is the property of certain operations in mathematics and computer science whereby they can be applied multiple times without changing the result beyond the initial application. The concept of idempotence arises in a number of places in abstract alg


In [23]:
from langchain.llms import GooglePalm
from langchain.chains.summarize import load_summarize_chain
from langchain.text_splitter import RecursiveCharacterTextSplitter

llm = GooglePalm(
    google_api_key=os.getenv('GOOGLE_API_KEY'),
    temperature=0.0
)
num_tokens = llm.get_num_tokens(text)
print(f'There are {num_tokens} tokens in your text')

There are 704 tokens in your text


In [26]:
text_splitter = RecursiveCharacterTextSplitter(separators=["\n\n", "\n"], 
                                               chunk_size=1000, 
                                               chunk_overlap=350)
docs = text_splitter.create_documents([text])

print (f"You now have {len(docs)} docs intead of 1 piece of text")

You now have 3 docs intead of 1 piece of text


In [27]:
# verbose=True optional to see what is getting sent to the LLM
chain = load_summarize_chain(llm=llm, 
                             chain_type='map_reduce',
                             verbose=True) 

In [28]:
output = chain.run(docs)
print(output)



[1m> Entering new MapReduceDocumentsChain chain...[0m


[1m> Entering new LLMChain chain...[0m
Prompt after formatting:
[32;1m[1;3mWrite a concise summary of the following:


"Page: Idempotence
Summary: Idempotence (UK: , US: ) is the property of certain operations in mathematics and computer science whereby they can be applied multiple times without changing the result beyond the initial application. The concept of idempotence arises in a number of places in abstract algebra (in particular, in the theory of projectors and closure operators) and functional programming (in which it is connected to the property of referential transparency).
The term was introduced by American mathematician Benjamin Peirce in 1870 in the context of elements of algebras that remain invariant when raised to a positive integer power, and literally means "(the quality of having) the same power", from idem + potence (same + power)."


CONCISE SUMMARY:[0m
Prompt after formatting:
[32;1m[1;3mWrite a c

# Question & Answering Using Documents As Context

## Using Embeddings

In [48]:
!pip install -qqq faiss-cpu

huggingface/tokenizers: The current process just got forked, after parallelism has already been used. Disabling parallelism to avoid deadlocks...
	- Avoid using `tokenizers` before the fork if possible
	- Explicitly set the environment variable TOKENIZERS_PARALLELISM=(true | false)


In [40]:
from langchain.llms import GooglePalm

# The vectorstore we'll be using
from langchain.vectorstores import FAISS

# The LangChain component we'll use to get the documents
from langchain.chains import RetrievalQA

# The easy document loader for text
from langchain.document_loaders import TextLoader

# The embedding engine that will convert our text to vectors
from langchain.embeddings import GooglePalmEmbeddings

google_api_key = os.getenv('GOOGLE_API_KEY')
llm = GooglePalm(
    google_api_key=google_api_key,
    temperature=0.0
)

In [34]:
from langchain.schema.document import Document

# Instead of using document loader, we load it directly from string.
doc = [Document(page_content=text)]
print (f"You have {len(doc)} document")
print (f"You have {len(doc[0].page_content)} characters in that document")

You have 1 document
You have 2057 characters in that document


In [38]:
text_splitter = RecursiveCharacterTextSplitter(chunk_size=1000, chunk_overlap=400)
docs = text_splitter.split_documents(doc)

In [41]:
# Get the total number of characters so we can see the average later
num_total_characters = sum([len(x.page_content) for x in docs])

print (f"Now you have {len(docs)} documents that have an average of {num_total_characters / len(docs):,.0f} characters (smaller pieces)")

Now you have 3 documents that have an average of 684 characters (smaller pieces)


In [49]:
# Get your embeddings engine ready
embeddings = GooglePalmEmbeddings(google_api_key=google_api_key)

# Embed your documents and combine with the raw text in a pseudo db. Note: This will make an API call to OpenAI
docsearch = FAISS.from_documents(docs, embeddings)

# Create your retrieval engine

In [51]:
qa = RetrievalQA.from_chain_type(llm=llm, chain_type="stuff", retriever=docsearch.as_retriever())

In [52]:
query = "What is the article about?"
qa.run(query)

'a property of logical systems'

In [53]:
query = "What is the advantage of idempotency?"
qa.run(query)

'the same consequences from many instances of a hypothesis as from just one'