In [None]:
from langchain.document_loaders import UnstructuredFileLoader
from langchain.chains.summarize import load_summarize_chain
from langchain.chains.question_answering import load_qa_chain

### Load Documents

In [None]:
sm_loader = UnstructuredFileLoader("../data/muir_lake_tahoe_in_winter.txt")
sm_doc = sm_loader.load()

lg_loader = UnstructuredFileLoader("../data/PaulGrahamEssays/worked.txt")
lg_doc = lg_loader.load()

In [None]:
def doc_summary(docs):
    print (f'You have {len(docs)} document(s)')
    
    num_words = sum([len(doc.page_content.split(' ')) for doc in docs])
    
    print (f'You have roughly {num_words} words in your docs')
    print ()
    print (f'Preview: \n{docs[0].page_content.split(". ")[0]}')

In [None]:
doc_summary(sm_doc)

In [None]:
doc_summary(lg_doc)

### Load Your LLM

In [None]:
from langchain import OpenAI

In [None]:
OPENAI_API_KEY = '...'

In [None]:
llm = OpenAI(openai_api_key=OPENAI_API_KEY)

### Summarize: Stuff

In [None]:
chain = load_summarize_chain(llm, chain_type="stuff", verbose=True)

In [None]:
chain.run(sm_doc)

In [None]:
chain.run(lg_doc)

### Summarize: Map Reduce

In [None]:
chain = load_summarize_chain(llm, chain_type="map_reduce", verbose=True)

In [None]:
chain.run(sm_doc)

In [None]:
from langchain.text_splitter import RecursiveCharacterTextSplitter

In [None]:
text_splitter = RecursiveCharacterTextSplitter(
    # Set a really small chunk size, just to show.
    chunk_size = 400,
    chunk_overlap = 0
)

In [None]:
lg_docs = text_splitter.split_documents(lg_doc)

In [None]:
doc_summary(lg_docs)

In [None]:
chain.run(lg_docs[:5])

### Summarize: Refine

In [None]:
chain = load_summarize_chain(llm, chain_type="refine", verbose=True)

In [None]:
chain.run(lg_docs[:5])

### Q&A: Map Re-Rank

In [None]:
chain = load_qa_chain(llm, chain_type="map_rerank", verbose=True, return_intermediate_steps=True)

In [None]:
query = "Who was the authors friend who he got permission from to use the IBM 1401?"

result = chain({"input_documents": lg_docs[:5], "question": query}, return_only_outputs=True)

In [None]:
result['output_text']

In [None]:
result['intermediate_steps']