### Input
Set either INPUT_FILE or INPUT_TEXT variable.

In [None]:
# Provide either INPUT_FILE path or INPUT_TEXT to summarize.
INPUT_FILE="./inputs/Wiki.txt" # Insert file path here
INPUT_TEXT="""Insert text to summarize here."""

# Style of summarization:

# Numbered List style
STYLE="Return your response as numbered list which covers the main points of the text."
PROMPT_TRIGGER="NUMBERED LIST SUMMARY"

# One sentence style
# STYLE="Return your response as one sentence which covers the main points of the text.",
# PROMPT_TRIGGER="ONE SENTENCE SUMMARY",

# Concise style
# STYLE="Return your response as concise summary which covers the main points of the text.",
# PROMPT_TRIGGER="CONCISE SUMMARY",

# Detailed style
# STYLE="Return your response as detailed summary which covers the main points of the text and key facts and figures.",
# PROMPT_TRIGGER="DETAILED SUMMARY",

# Output language, try e.g. Polish, Spanish, etc 
OUTPUT_LANGUAGE = "English"

# Should output verbose info from underlying models, etc.
VERBOSE=True

### Model params & setup

In [None]:
# Model file
MODEL_FILE="./model/mistral-7b-openorca.Q5_K_M.gguf"

MODEL_CONTEXT_WINDOW=8192

# Maximal lenght of model's output, in tokens.
MAX_ANSWER_TOKENS = 2048

# Chunk params in characters (not tokens).
CHUNK_SIZE=10000
CHUNK_OVERLAP=500

In [None]:
from langchain.llms import LlamaCpp

llm = LlamaCpp(
    model_path=MODEL_FILE,
    n_ctx=MODEL_CONTEXT_WINDOW,
    # Number of tokens to process in parallel. Should be a number between 1 and n_ctx.
    n_batch=512,
    # Number of layers to be loaded into gpu memory. Default None.
    n_gpu_layers=1,
    # Maximal lenght of model's output, in tokens.
    max_tokens=MAX_ANSWER_TOKENS,
    # Don't be creative.
    temperature=0,
    verbose=VERBOSE,
)

### Implementation

In [None]:
from langchain.document_loaders import TextLoader

def load_content():
    """Loads INPUT_FILE if set, otherwise returns INPUT_TEXT"""

    if INPUT_FILE:
        if INPUT_FILE.endswith(".pdf"):
            loader = PyPDFLoader(INPUT_FILE)
            docs = loader.load()
            print(f"PDF: loaded {len(docs)} pages")
            return "\n".join([d.page_content for d in docs])
        
        docs =  TextLoader(INPUT_FILE).load()
        return docs[0].page_content

    return INPUT_TEXT


In [None]:
from langchain import PromptTemplate
from langchain.chains import LLMChain
from langchain.chains.summarize import load_summarize_chain
from langchain.text_splitter import RecursiveCharacterTextSplitter

combine_prompt_template = """
Write a summary of the following text delimited by tripple backquotes.
{style}

```{content}```

{trigger} in {language}:
"""

map_prompt_template = """
Write a concise summary of the following:
{text}

CONCISE SUMMARY:
"""

def summarize_base(llm, content):
    """Summarize whole content at once. The content needs to fit into model's context window."""

    prompt = PromptTemplate.from_template(
        combine_prompt_template
    ).partial(
        style=STYLE,
        trigger=PROMPT_TRIGGER,
        language=OUTPUT_LANGUAGE,
    )

    chain = LLMChain(llm=llm, prompt=prompt, verbose=VERBOSE)
    output = chain.run(content)

    return output


def summarize_map_reduce(llm, content):
    """Summarize content potentially larger that model's context window using map-reduce approach."""

    text_splitter = RecursiveCharacterTextSplitter(
        chunk_size=CHUNK_SIZE,
        chunk_overlap=CHUNK_OVERLAP,
    )

    split_docs = text_splitter.create_documents([content])
    print(f"Map-Reduce content splits ({len(split_docs)} splits): {[len(sd.page_content) for sd in split_docs]}")

    map_prompt = PromptTemplate.from_template(map_prompt_template)
    combine_prompt = PromptTemplate.from_template(
        combine_prompt_template
    ).partial(
        style=STYLE,
        trigger=PROMPT_TRIGGER,
        language=OUTPUT_LANGUAGE,
    )

    chain = load_summarize_chain(
        llm=llm,
        chain_type="map_reduce",
        map_prompt=map_prompt,
        combine_prompt=combine_prompt,
        combine_document_variable_name="content",
        verbose=VERBOSE,
    )

    output = chain.run(split_docs)
    return output

### Main program

In [None]:
%%time 

content = load_content()
content_tokens = llm.get_num_tokens(content)
print(f"Content length: {len(content)} chars, {content_tokens} tokens.")
print("Content sample:\n" + content[:200] + "\n\n")

# Keep part of context window for models output.
base_threshold = 0.75*MODEL_CONTEXT_WINDOW

if (content_tokens < base_threshold):
    print("Using summarizer: base")
    summary = summarize_base(llm, content)
else:
    print("Using summarizer: map-reduce")
    summary = summarize_map_reduce(llm, content)

print(f"Content length: {len(summary)} chars, {llm.get_num_tokens(summary)} tokens.")
print("Summary:\n" + summary + "\n\n")
