We will be building out a langchain model without using huggingface, or openAI LLMs.

**Note:** If you need more RAM during runtime, use GPU.

In [1]:
# Run to install the libraries
!pip -q install langchain tiktoken keras-nlp

[2K     [90m━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━[0m [32m1.2/1.2 MB[0m [31m32.8 MB/s[0m eta [36m0:00:00[0m
[2K     [90m━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━[0m [32m1.7/1.7 MB[0m [31m89.2 MB/s[0m eta [36m0:00:00[0m
[2K     [90m━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━[0m [32m527.7/527.7 kB[0m [31m50.3 MB/s[0m eta [36m0:00:00[0m
[2K     [90m━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━[0m [32m90.0/90.0 kB[0m [31m11.7 MB/s[0m eta [36m0:00:00[0m
[2K     [90m━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━[0m [32m6.5/6.5 MB[0m [31m100.1 MB/s[0m eta [36m0:00:00[0m
[2K     [90m━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━[0m [32m49.1/49.1 kB[0m [31m3.9 MB/s[0m eta [36m0:00:00[0m
[2K     [90m━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━[0m [32m524.1/524.1 MB[0m [31m3.2 MB/s[0m eta [36m0:00:00[0m
[2K     [90m━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━[0m [32m1.7/1.7 MB[0m [31m82.1 MB/s[0m eta [36m0:00:00[0m
[2K     [90m━━━━━━━━━━━━━━━━━━━━━━━━━━

In [2]:
import keras_nlp
gpt2_lm = keras_nlp.models.GPT2CausalLM.from_preset(
    "gpt2_base_en"
)

Downloading data from https://storage.googleapis.com/keras-nlp/models/gpt2_base_en/v1/vocab.json
Downloading data from https://storage.googleapis.com/keras-nlp/models/gpt2_base_en/v1/merges.txt
Downloading data from https://storage.googleapis.com/keras-nlp/models/gpt2_base_en/v1/model.h5




In [14]:
# Run this cell for the large model
gpt2_llm = keras_nlp.models.GPT2CausalLM.from_preset(
    "gpt2_extra_large_en"
)

Downloading data from https://storage.googleapis.com/keras-nlp/models/gpt2_extra_large_en/v1/vocab.json
Downloading data from https://storage.googleapis.com/keras-nlp/models/gpt2_extra_large_en/v1/merges.txt
Downloading data from https://storage.googleapis.com/keras-nlp/models/gpt2_extra_large_en/v1/model.h5




In [5]:
from langchain.text_splitter import CharacterTextSplitter
from langchain.chains.mapreduce import MapReduceChain
from langchain.prompts import PromptTemplate

In [6]:
text_splitter = CharacterTextSplitter()

In [7]:
with open('./state_of_the_union.txt') as f:
  how_to_win_friends = f.read()
texts = text_splitter.split_text(how_to_win_friends)

In [8]:
len(texts)

11

In [9]:
from langchain.docstore.document import Document
docs = [Document(page_content=t) for t in texts[:3]]

In [10]:
docs

[Document(page_content='Madam Speaker, Madam Vice President, our First Lady and Second Gentleman. Members of Congress and the Cabinet. Justices of the Supreme Court. My fellow Americans.  \n\nLast year COVID-19 kept us apart. This year we are finally together again. \n\nTonight, we meet as Democrats Republicans and Independents. But most importantly as Americans. \n\nWith a duty to one another to the American people to the Constitution. \n\nAnd with an unwavering resolve that freedom will always triumph over tyranny. \n\nSix days ago, Russia’s Vladimir Putin sought to shake the foundations of the free world thinking he could make it bend to his menacing ways. But he badly miscalculated. \n\nHe thought he could roll into Ukraine and the world would roll over. Instead he met a wall of strength he never imagined. \n\nHe met the Ukrainian people. \n\nFrom President Zelenskyy to every Ukrainian, their fearlessness, their courage, their determination, inspires the world. \n\nGroups of citize

In [11]:
from langchain.chains.summarize import load_summarize_chain
import textwrap

In [12]:
!pip install -q transformers

[2K     [90m━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━[0m [32m7.2/7.2 MB[0m [31m62.9 MB/s[0m eta [36m0:00:00[0m
[2K     [90m━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━[0m [32m268.8/268.8 kB[0m [31m26.9 MB/s[0m eta [36m0:00:00[0m
[2K     [90m━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━[0m [32m7.8/7.8 MB[0m [31m58.5 MB/s[0m eta [36m0:00:00[0m
[2K     [90m━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━[0m [32m1.3/1.3 MB[0m [31m51.5 MB/s[0m eta [36m0:00:00[0m
[?25h

In [15]:
import torch
from langchain.llms.base import LLM
from typing import Optional, List, Mapping, Any

# set context window size
context_window = 2048
# set number of output tokens
num_output = 256

# store the pipeline/model outside of the LLM class to avoid memory issues
model_name = "gpt2"

class CustomLLM(LLM):

  def _call(self, prompt: str, stop: Optional[List[str]] = None) -> str:
    prompt_length = len(prompt)
    response = gpt2_llm.generate(prompt)

    # only generate newly generated tokens
    return response[prompt_length:]

  @property
  def _identifying_params(self) -> Mapping[str, Any]:
    return {"name_of_model":model_name}

  @property
  def _llm_type(self) -> str:
    return "custom"

In [16]:
chain = load_summarize_chain(CustomLLM(),
                            chain_type="map_reduce")

output_summary = chain.run(docs)
wrapped_text = textwrap.fill(output_summary, width=100)
print(wrapped_text)

Downloading (…)olve/main/vocab.json:   0%|          | 0.00/1.04M [00:00<?, ?B/s]

Downloading (…)olve/main/merges.txt:   0%|          | 0.00/456k [00:00<?, ?B/s]

Downloading (…)/main/tokenizer.json:   0%|          | 0.00/1.36M [00:00<?, ?B/s]

Downloading (…)lve/main/config.json:   0%|          | 0.00/665 [00:00<?, ?B/s]

   1. We will invest in America.   "   1. We will invest in America.   "2. We will educate Americans
and build a strong workforce.  3. We will invest in infrastructure and energy.  4. We will support
and empower American workers and small businesses.  5. We will fight to end the war in Afghanistan.
6. We will fight to end the war in Iraq.  7. We will fight to end the war in Libya.  8. We will
fight to end the war in Yemen.  9. We will fight to end the war in North Korea.  10. We will fight
to end the war in Syria.   "And tonight I am announcing that we will join with our allies in cutting
off American air space to all Russian flights – further isolating Russia.   And tonight I am
announcing that we will join with our allies in cutting off American air space to all Russian
flights – further isolating Russia.   And tonight I am announcing that we will join with our allies
in cutting off American air space to all Russian flights – further isolating Russia.    CONCISE
SUMMARY:  1. We will 