In [1]:
from dotenv import load_dotenv, find_dotenv
_ = load_dotenv(find_dotenv())

In [2]:
from langchain.document_loaders import UnstructuredFileLoader
from langchain.chains.summarize import load_summarize_chain
from langchain.chains.question_answering import load_qa_chain

## Load Documents

In [7]:
sm_loader = UnstructuredFileLoader("worked_small.txt")
sm_doc = sm_loader.load()

lg_loader = UnstructuredFileLoader("worked.txt")
lg_doc = lg_loader.load()

In [8]:
def doc_summary(docs):
    print (f'You have {len(docs)} document(s)')
    
    num_words = sum([len(doc.page_content.split(' ')) for doc in docs])
    
    print (f'You have roughly {num_words} words in your docs')
    print ()
    print (f'Preview: \n{docs[0].page_content.split(". ")[0]}')

In [9]:
doc_summary(sm_doc)

You have 1 document(s)
You have roughly 2291 words in your docs

Preview: 
February 2021Before college the two main things I worked on, outside of school,

were writing and programming


In [10]:
doc_summary(lg_doc)

You have 1 document(s)
You have roughly 12576 words in your docs

Preview: 
February 2021Before college the two main things I worked on, outside of school,

were writing and programming


## Load Your LLM

In [11]:
from langchain import OpenAI

In [12]:
llm = OpenAI()

## Summarize: Stuff

In [13]:
chain = load_summarize_chain(llm, chain_type="stuff", verbose=True)

In [14]:
chain.run(sm_doc)



[1m> Entering new StuffDocumentsChain chain...[0m


[1m> Entering new LLMChain chain...[0m
Prompt after formatting:
[32;1m[1;3mWrite a concise summary of the following:


"February 2021Before college the two main things I worked on, outside of school,

were writing and programming. I didn't write essays. I wrote what

beginning writers were supposed to write then, and probably still

are: short stories. My stories were awful. They had hardly any plot,

just characters with strong feelings, which I imagined made them

deep.The first programs I tried writing were on the IBM 1401 that our

school district used for what was then called "data processing."

This was in 9th grade, so I was 13 or 14. The school district's

1401 happened to be in the basement of our junior high school, and

my friend Rich Draves and I got permission to use it. It was like

a mini Bond villain's lair down there, with all these alien-looking

machines  CPU, disk drives, printer, card reader  sitting up


' A person who had been working on writing and programming before college switched to AI after becoming dissatisfied with philosophy courses. He then decided to pursue art instead of computer science after realizing that paintings would last longer than programs, and he was accepted into RISD and the Accademia di Belle Arti in Florence. He also wrote a book about Lisp during grad school.'

## Summarize: Map Reduce


In [15]:
chain = load_summarize_chain(llm, chain_type="map_reduce", verbose=True)

In [16]:
chain.run(sm_doc)



[1m> Entering new MapReduceDocumentsChain chain...[0m


[1m> Entering new LLMChain chain...[0m
Prompt after formatting:
[32;1m[1;3mWrite a concise summary of the following:


"February 2021Before college the two main things I worked on, outside of school,

were writing and programming. I didn't write essays. I wrote what

beginning writers were supposed to write then, and probably still

are: short stories. My stories were awful. They had hardly any plot,

just characters with strong feelings, which I imagined made them

deep.The first programs I tried writing were on the IBM 1401 that our

school district used for what was then called "data processing."

This was in 9th grade, so I was 13 or 14. The school district's

1401 happened to be in the basement of our junior high school, and

my friend Rich Draves and I got permission to use it. It was like

a mini Bond villain's lair down there, with all these alien-looking

machines  CPU, disk drives, printer, card reader  sitting

' The protagonist of this story began their college career with an interest in philosophy, but soon switched to AI due to its lack of engagement. After realizing traditional AI was a hoax, they pursued a career in art, successfully gaining entrance to RISD and, surprisingly, the Accademia di Belli Arti in Florence.'

In [18]:
from langchain.text_splitter import RecursiveCharacterTextSplitter

In [19]:
text_splitter = RecursiveCharacterTextSplitter(
    # Set a really small chunk size, just to show.
    chunk_size = 400,
    chunk_overlap = 0
)

In [20]:
lg_docs = text_splitter.split_documents(lg_doc)

In [21]:
doc_summary(lg_docs)

You have 212 document(s)
You have roughly 12787 words in your docs

Preview: 
February 2021Before college the two main things I worked on, outside of school,

were writing and programming


In [24]:
chain.run(lg_docs[:100])



[1m> Entering new MapReduceDocumentsChain chain...[0m


[1m> Entering new LLMChain chain...[0m
Prompt after formatting:
[32;1m[1;3mWrite a concise summary of the following:


"February 2021Before college the two main things I worked on, outside of school,

were writing and programming. I didn't write essays. I wrote what

beginning writers were supposed to write then, and probably still

are: short stories. My stories were awful. They had hardly any plot,

just characters with strong feelings, which I imagined made them"


CONCISE SUMMARY:[0m
Prompt after formatting:
[32;1m[1;3mWrite a concise summary of the following:


"deep.The first programs I tried writing were on the IBM 1401 that our

school district used for what was then called "data processing."

This was in 9th grade, so I was 13 or 14. The school district's

1401 happened to be in the basement of our junior high school, and

my friend Rich Draves and I got permission to use it. It was like"


CONCISE SUMMARY:[0m

"\nThis article follows a 25-year-old programmer's journey from being drawn to Artificial Intelligence (AI) to eventually creating an ecommerce software, Viaweb, which they sold to Yahoo in 1998. Along the way, they attended Harvard, learned Lisp hacking, and realized the importance of product people vs. sales people to run technology companies. They also encountered HTML, moved to New York, and gained insight into retail, growth rates, and customer service."

## Summarize: Refine

In [25]:
chain = load_summarize_chain(llm, chain_type="refine", verbose=True)

In [26]:
chain.run(sm_doc)



[1m> Entering new RefineDocumentsChain chain...[0m


[1m> Entering new LLMChain chain...[0m
Prompt after formatting:
[32;1m[1;3mWrite a concise summary of the following:


"February 2021Before college the two main things I worked on, outside of school,

were writing and programming. I didn't write essays. I wrote what

beginning writers were supposed to write then, and probably still

are: short stories. My stories were awful. They had hardly any plot,

just characters with strong feelings, which I imagined made them

deep.The first programs I tried writing were on the IBM 1401 that our

school district used for what was then called "data processing."

This was in 9th grade, so I was 13 or 14. The school district's

1401 happened to be in the basement of our junior high school, and

my friend Rich Draves and I got permission to use it. It was like

a mini Bond villain's lair down there, with all these alien-looking

machines  CPU, disk drives, printer, card reader  sitting up

' A young college student in the 1980s, initially focusing on philosophy, eventually discovers AI and programming, and his love for Lisp. After realizing the limitations of AI, he considers making art instead which eventually leads him to apply to art schools and take the entrance exam at the Accademia di Belli Arti in Florence. With his saved money, he passes the exam and starts to learn Italian.'

In [27]:
chain.run(lg_docs[:20])



[1m> Entering new RefineDocumentsChain chain...[0m


[1m> Entering new LLMChain chain...[0m
Prompt after formatting:
[32;1m[1;3mWrite a concise summary of the following:


"February 2021Before college the two main things I worked on, outside of school,

were writing and programming. I didn't write essays. I wrote what

beginning writers were supposed to write then, and probably still

are: short stories. My stories were awful. They had hardly any plot,

just characters with strong feelings, which I imagined made them"


CONCISE SUMMARY:[0m

[1m> Finished chain.[0m


[1m> Entering new LLMChain chain...[0m
Prompt after formatting:
[32;1m[1;3mYour job is to produce a final summary.
We have provided an existing summary up to a certain point:  Prior to college, the writer was primarily focused on writing and programming. They wrote short stories that lacked plot but relied heavily on strong emotions from the characters.
We have the opportunity to refine the existing summary (

"\n\nPrior to college, the writer was primarily focused on writing and programming. At the age of 13 or 14, they had the opportunity to write programs on the IBM 1401 in the basement of their junior high school, which felt like a mini Bond villain's lair with all its alien-looking machines. They used an early version of Fortran and had to type programs on punch cards, then stack them in the card reader and press a button to load the program into memory and run it. Despite having limited input to the programs in the form of data stored on punched cards, the writer persevered and wrote short stories that lacked plot but relied heavily on strong emotions from the characters. The result would ordinarily be to print something on the spectacularly loud printer. The only other option the writer had was to do things that didn't rely on any input, like calculate approximations of pi, but they didn't know enough math to do anything interesting of that type. However, their experience changed when

## Q&A: Map Re-Rank

In [28]:
chain = load_qa_chain(llm, chain_type="map_rerank", verbose=True, return_intermediate_steps=True)

In [29]:
query = "Who was the authors friend who he got permission from to use the IBM 1401?"

result = chain({"input_documents": lg_docs[:5], "question": query}, return_only_outputs=True)



[1m> Entering new MapRerankDocumentsChain chain...[0m


[1m> Entering new LLMChain chain...[0m
Prompt after formatting:
[32;1m[1;3mUse the following pieces of context to answer the question at the end. If you don't know the answer, just say that you don't know, don't try to make up an answer.

In addition to giving an answer, also return a score of how fully it answered the user's question. This should be in the following format:

Question: [question here]
Helpful Answer: [answer here]
Score: [score between 0 and 100]

How to determine the score:
- Higher is a better answer
- Better responds fully to the asked question, with sufficient level of detail
- If you do not know the answer based on the context, that should be a score of 0
- Don't be overconfident!

Example #1

Context:
---------
Apples are red
---------
Question: what color are apples?
Helpful Answer: red
Score: 100

Example #2

Context:
---------
it was night and the witness forgot his glasses. he was not sure if it 




[1m> Finished chain.[0m

[1m> Finished chain.[0m


In [31]:
result['output_text']

' Rich Draves'

In [32]:

result['intermediate_steps']

[{'answer': ' This document does not answer the question', 'score': '0'},
 {'answer': ' Rich Draves', 'score': '100'},
 {'answer': ' This document does not answer the question.', 'score': '0'},
 {'answer': ' This document does not answer the question.', 'score': '0'},
 {'answer': ' This document does not answer the question', 'score': '0'}]