In [1]:
import requests

from markdownify import markdownify as md

from langchain.embeddings.openai import OpenAIEmbeddings
from langchain.vectorstores import Chroma
from langchain.chains import VectorDBQA
from langchain.llms import OpenAI, OpenAIChat
from langchain.prompts import load_prompt
# from langchain.indexes import VectorstoreIndexCreator
from langchain import PromptTemplate

from readability import Document
import lxml

# idk why can't import 
def simplify(html, document_title=""):
  document = Document(html)
  title = document.title()
  if title == "[no-title]":
      title = document_title
  tree = lxml.html.fromstring(document.summary())
  this_level: list[lxml.html] = [tree]
  while this_level:
      next_level = []
      for elem in this_level:
          if elem.tag not in ("figure", "a"):
              elem.attrib.clear()
          next_level.extend(elem)
      this_level = next_level
  while len(tree) == 1 and tree[0].tag != "p":
      tree = tree[0]
  text = f"<h1>{title}</h1></br>" + "".join([lxml.html.tostring(child).decode('utf-8') for child in tree]).replace("\n", "").replace("\r", "")
  return text


In [2]:
url = "https://medium.com/inside-machine-learning/what-is-a-transformer-d07dd1fbec04"
notes = "<h1>Summary of article</h1><ul><li>"
context = simplify(requests.get(url).text)
# context = "<html><body><p>The quick brown fox jumped over the lazy dog.</p></body></html>"
prompt_template = load_prompt("../src/prompts/v1.yaml")

No `_type` key found, defaulting to `prompt`.


In [3]:
context_in_md = md(context, heading_style="atx")

In [4]:
# Deal with deliminators disappearing
from langchain.text_splitter import MarkdownTextSplitter
text_splitter = MarkdownTextSplitter()
documents = text_splitter.create_documents([context_in_md])

In [31]:
embeddings = OpenAIEmbeddings()
docsearch = Chroma.from_documents(documents, embeddings)
# index = VectorstoreIndexCreator().from_loaders([context_in_md])
qa = VectorDBQA.from_chain_type(
  llm=OpenAI(
    max_tokens=1024, 
    verbose=True
  ), 
  chain_type="map_reduce", 
  vectorstore=docsearch, 
  return_source_documents=True
)
qa.verbose = True
qa.combine_documents_chain.verbose = True
qa.combine_documents_chain.llm_chain.verbose = True
qa.combine_documents_chain.llm_chain.llm.model_name = "text-curie-001"
qa.combine_documents_chain.combine_document_chain.llm_chain.verbose = True
# qa.combine_documents_chain.llm_chain.llm.model_name = "gpt-3.5-turbo"
qa.combine_documents_chain.combine_document_chain.llm_chain.llm = OpenAIChat(max_tokens=1024, verbose=True)
qa.combine_documents_chain.combine_document_chain.llm_chain.llm.model_kwargs = {"stop": ["===END==="]}
# qa.combine_documents_chain.combine_document_chain.llm_chain.llm.model_name = "gpt-3.5-turbo"

# qa.combine_documents_chain.llm_chain.llm = OpenAI(max_tokens=1024, verbose=True, model="text-curie-001")
# qa.combine_documents_chain.llm_chain.llm.model_name = "text-curie-001"

Running Chroma using direct local API.
Using DuckDB in-memory for database. Data will be transient.


In [73]:
# this can use a lower model tbh
template = \
"""
Use the following portion of a long article to see if any of the text is relevant complete the incomplete notes. 
Return any relevant text *verbatim*.

<!-- START OF ARTICLE SECTION -->
{context}
<!-- END OF ARTICLE SECTION -->

<!-- START OF INCOMPLETE NOTES -->
{question}
<!-- END OF INCOMPLETE NOTES -->
Relevant text verbatim, if any:
"""
# Given the following extracted parts of a long document and a question, complete the notes. 
# * If the answer is not found in the context, "I don't know" will be written instead
stuffing_template = \
"""
The following are a set of summaries from a long article used to complete a small section of notes below:

<!-- START OF ARTICLE SUMMARIES -->
{summaries}
<!-- END OF ARTICLE SUMMARIES -->

The following are a small section of notes written according to the following:
* The notes are based on the context, *not* on prior knowledge
  * An answer will *not* be written if the answer is not found in the long article
* The notes start at the start token (===START===) and end at the end token (===END===)
* Github-style markdown syntax will be used to format the notes
  * Lists, which start with astericks (*) will be used dominantly to organize the notes
  * Indents will be used to nest lists
  * Headers, which start with hashes (#) will be used SPARINGLY to organize the notes
* The notes will be elaborate and detailed, but will not generate new section headers
* Each line will be kept short, simple and concise, and will not exceed 80 characters
* Multiple clauses or sentences will ALWAYS be broken into multiple lines 

===START===
{question}
"""
qa.combine_documents_chain.llm_chain.prompt = PromptTemplate(
  input_variables=["context", "question"], 
  template=template
)
qa.combine_documents_chain.combine_document_chain.llm_chain.prompt = PromptTemplate(
  input_variables=["summaries", "question"], 
  template=stuffing_template
)

In [74]:
# query=f'''
# The following are notes according to the following:
# * The notes are based on the context, not on prior knowledge
# * The notes start at the start token (===START===) and end at the end token (===END===)
# * The notes use Github-style markdown syntax
# * The notes are short and concise
# * The notes use lists and headings to organize your response where possible
# * The notes keep each line short, simple and concise 
# * The notes keep every line to one sentence and break into multiple lines whenever possible

# ===START===
# Definition of a transformer
# * '''
notes = f"""
Definition of a transformer:
* """.lstrip()
s = qa({"query": notes})



[1m> Entering new VectorDBQA chain...[0m
Prompt after formatting:
[32;1m[1;3m
Use the following portion of a long article to see if any of the text is relevant complete the incomplete notes. 
Return any relevant text *verbatim*.

<!-- START OF ARTICLE SECTION -->
# What is a Transformer?. An Introduction to Transformers and… | by Maxime | Inside Machine learning | Medium

# What is a Transformer?

# An Introduction to Transformers and Sequence-to-Sequence Learning for Machine Learning

New deep learning models are introduced at an increasing rate and sometimes it’s hard to keep track of all the novelties. That said, one particular neural network model has proven to be especially effective for common natural language processing tasks. The model is called a Transformer and it makes use of several methods and mechanisms that I’ll introduce here. The papers I refer to in the post offer a more detailed and quantitative description.

# **Part 1: Sequence to Sequence Learning and Attent

Token indices sequence length is longer than the specified maximum sequence length for this model (1841 > 1024). Running this sequence through the model will result in indexing errors




[1m> Entering new LLMChain chain...[0m
Prompt after formatting:
[32;1m[1;3m
The following are a set of summaries from a long article used to complete a small section of notes below:

<!-- START OF ARTICLE SUMMARIES -->

"The encoder and the decoder are a pair of neural networks, each of which takes an input sequence and turns it into an output sequence. The encoder and the decoder are a pair of neural networks, each of which takes an input sequence and turns it into an output sequence. The encoder and the decoder are a pair of neural networks, each of which takes an input sequence and turns it into an output sequence. The encoder and the decoder are a pair of neural networks, each of which takes an input sequence and turns it into an output sequence.

Sequence-to-sequence (or Seq2Seq) is a neural net that transforms a given sequence of elements, such as the sequence of words in a sentence, into another sequence. (Well, this might not surprise you considering the name.)

Seq2Seq m

In [75]:
print(notes + s["result"])

Definition of a transformer:
* A neural network architecture consisting of an encoder and decoder to transform one sequence into another without Recurrent Networks, using attention mechanisms.
* The Encoder and Decoder modules consist of repeated modules with Multi-Head Attention and Feed Forward layers.
* The input and output sequences are embedded into an n-dimensional space.
* Positional encoding is added to the embedded representation of each word to give them a relative position.
* Multi-Head Attention allows the model to focus on different parts of the sequence at the same time using attention weights calculated by a query, key, and value matrix.
* In machine translation, the decoder input is shifted to the right by one position with start-of-sentence and end-of-sentence tokens added for training with teacher forcing.
* Results show that the Transformer architecture can be used for time-series forecasting but with higher errors for longer forecasting steps.



In [None]:
"""
Definition of a transformer:
* A model architecture consisting of an Encoder and a Decoder 
* Uses Multi-Head Attention and Feed Forward layers 
* Does not use Recurrent Networks like GRU and LSTM 
* Requires positional encoding of words in sequence 
* Works best for translation tasks and natural language tasks 
* Uses a mask to avoid seeing "future" sequence elements 
* Applies "Teacher-Forcing" during training 
* Loss function used is mean squared error 
"""

"""
Definition of a transformer:
* A neural net architecture for transforming one sequence into another using an Encoder and a Decoder
* Uses an attention mechanism to decide at each step which parts of the sequence are important
* Does not use Recurrent Networks (RNNs) like LSTM or GRU

How a Seq2Seq model works:
* A neural net that transforms a given sequence into another sequence
* Popular for translation tasks like converting a sequence of words in one language to another
* Consists of an Encoder and a Decoder
* Encoder maps input sequence to higher dimensional space, then feeds abstract vector to Decoder
* Decoder turns vector into output sequence which could be another language or copy of input

How an attention-mechanism works:
* Looks at input sequence and decides at each step which parts are important
* For example, when reading a text, focus on current word and hold important keywords in memory for context

How the training for translation tasks works:
* Need sentence pairs in different languages to train model
* Encoder input is sentence in one language, decoder input is shifted sentence in other language
* Allows model to predict next word/character given encoder sequence and previously seen decoder sequence
* Fills first position of decoder input with start-of-sentence token and appends end-of-sentence token to decoder and target output sentence

Example of how transformers can be used for time-series forecasting:
* Used teacher forcing for training
* Encoder gets window of 24 data points and decoder input is window of 12 data points with start-of-sequence value
* Shift decoder input by one position with regard to target sequence to prevent copying task
* Loss function is mean squared error
* Accuracy decreases as more steps are forecasted
"""