In [None]:
!pip3 install -r requirements.txt

In [71]:
import os
import dotenv
import openai
import langchain
from langchain.llms import OpenAI
from langchain.chat_models import ChatOpenAI
from langchain.schema import AIMessage, HumanMessage, SystemMessage
from langchain.cache import InMemoryCache
from langchain.prompts import ChatPromptTemplate, PromptTemplate, SystemMessagePromptTemplate, AIMessagePromptTemplate, HumanMessagePromptTemplate
from langchain.output_parsers import CommaSeparatedListOutputParser, DatetimeOutputParser
from langchain.document_loaders import PyPDFLoader, TextLoader
from langchain.text_splitter import CharacterTextSplitter
from langchain.embeddings import OpenAIEmbeddings
from langchain.vectorstores import Chroma
from langchain.chains import LLMChain, SimpleSequentialChain, SequentialChain, LLMRouterChain, TransformChain
from langchain.chains.router.multi_prompt_prompt import MULTI_PROMPT_ROUTER_TEMPLATE
from langchain.chains.router import MultiPromptChain, LLMRouterChain
from langchain.chains.router.llm_router import RouterOutputParser
from langchain.chains.question_answering import load_qa_chain
from langchain.chains.qa_with_sources import load_qa_with_sources_chain

dotenv.load_dotenv(".env", override=True)
openai.api_key = os.getenv('OPENAI_API_KEY')

llm = OpenAI(model='gpt-3.5-turbo-instruct')
chat = ChatOpenAI()

In [34]:
# test OpenAI api
response = openai.Completion.create(model='gpt-3.5-turbo-instruct', prompt='The capital of Brazil is: ', max_tokens=10)
print(response.choices[0]['text'])


Brasília


In [35]:
# test one
print(llm('Here is the chemical formula for water molecule:'))

# test two
result = llm.generate(['Here is a fact about Large Language Models:', 'Here is the founder of OpenAI:'])
print(result.generations[0][0].text)
print(result.generations[1][0].text)

 H2O


Large Language Models (LLMs) are computer programs that use Artificial Intelligence (AI) and Machine Learning (ML) techniques to generate human-like text. They are trained on massive amounts of data, typically billions of words, to learn the patterns and structures of natural language. LLMs have achieved significant advancements in natural language processing tasks such as language translation, question-answering, and text summarization. Examples of LLMs include GPT-3 (Generative Pre-trained Transformer) and BERT (Bidirectional Encoder Representations from Transformers). These models have been used for various applications, including chatbots, content creation, and language generation for virtual assistants. However, LLMs have also received scrutiny for their potential to perpetuate biases and misinformation, leading to ethical concerns about their use.


The founder of OpenAI is Elon Musk, along with co-founders Sam Altman, Greg Brockman, Ilya Sutskever, Wojciech Zaremba, and J

In [36]:
result = chat([
    SystemMessage(content='You are a rude person and do not want to answer questions'),
    HumanMessage(content='Tell me a fact about planet Earth')
])
result # see the response is an AIMessage

AIMessage(content="I'm sorry, but I am not going to answer your question.", additional_kwargs={}, example=False)

In [37]:
result = chat.generate([
    [SystemMessage(content='You are a rude person and do not want to answer questions'), HumanMessage(content='Tell me a fact about planet Earth')],
    [SystemMessage(content='You are a very kind environmentalist person and love to talk about nature'), HumanMessage(content='Tell me a fact about planet Earth')],
])
print(result.generations[0][0].text)
print(result.generations[1][0].text)

I'm sorry, but I am not going to answer your question.
One fascinating fact about our planet Earth is that it is the only known planet in our solar system that has liquid water on its surface. This abundance of water is essential for supporting life as we know it.


In [38]:
# test cache
langchain.llm_cache = InMemoryCache()
llm.predict('Tell me a fact about chickens:')

'\n\nChickens have full-color vision and can see more colors than humans. They can also see ultraviolet light, which allows them to see things we cannot, such as bugs and flowers.'

In [39]:
# second prompt is faster using cache for the same prompt
llm.predict('Tell me a fact about chickens:')

'\n\nChickens have full-color vision and can see more colors than humans. They can also see ultraviolet light, which allows them to see things we cannot, such as bugs and flowers.'

In [41]:
# prompt templates
planet = 'Pluto'
llm(f'Tell me a fact about {planet}')

'\n\nPluto was discovered in 1930 by American astronomer Clyde Tombaugh.'

In [42]:
no_input_prompt = PromptTemplate(input_variables=[], template='Tell me a fact')
llm(no_input_prompt.format())

'\n\nA group of flamingos is called a flamboyance. '

In [43]:
single_input_prompt = PromptTemplate(input_variables=["topic"], template="Tell me a fact about {topic}")
llm(single_input_prompt.format(topic='Venus'))

'\n\nVenus is the hottest planet in our solar system, with temperatures reaching up to 864 degrees Fahrenheit (462 degrees Celsius) due to its thick atmosphere and greenhouse effect. '

In [44]:
multi_input_prompt = PromptTemplate(input_variables=["topic", "level"], template="Tell me a fact about {topic} for a {level} student")
llm(multi_input_prompt.format(topic='Money', level='1st grade'))

'\n\nMoney is used to buy things like toys, food, and clothes.'

In [45]:
system_template = "You are an AI recipes assistant that specializes in {diet} dishes that can be prepared in {time}"
system_message_prompt = SystemMessagePromptTemplate.from_template(system_template)

human_template = "{recipe_request}"
human_message_prompt = HumanMessagePromptTemplate.from_template(human_template)

chat_prompt = ChatPromptTemplate.from_messages([system_message_prompt, human_message_prompt])
prompt = chat_prompt.format_prompt(diet='vegan', time='15 minutes', recipe_request='snack').to_messages()
result = chat(prompt)
print(prompt)
print(result.content)

[SystemMessage(content='You are an AI recipes assistant that specializes in vegan dishes that can be prepared in 15 minutes', additional_kwargs={}), HumanMessage(content='snack', additional_kwargs={}, example=False)]
Sure! How about a quick and easy vegan snack like avocado toast? Here's a simple recipe that can be prepared in just 15 minutes:

Ingredients:
- 1 ripe avocado
- 2 slices of bread (choose a vegan option)
- Salt and pepper to taste
- Optional toppings: cherry tomatoes, red pepper flakes, nutritional yeast, or hemp seeds

Instructions:
1. Toast the bread slices until they are golden brown and crispy.
2. While the bread is toasting, mash the ripe avocado in a bowl with a fork until smooth. Add salt and pepper to taste.
3. Once the bread is toasted, spread the mashed avocado evenly on top of each slice.
4. Add your desired toppings, such as sliced cherry tomatoes, a sprinkle of red pepper flakes, a dash of nutritional yeast, or a sprinkle of hemp seeds.
5. Cut the avocado toas

In [47]:
# parsing outputs
output_parser = CommaSeparatedListOutputParser()
print(output_parser.get_format_instructions())
reply = "apple, orange, banana"
output_parser.parse(reply)

Your response should be a list of comma separated values, eg: `foo, bar, baz`


['apple', 'orange', 'banana']

In [50]:
human_template = "{request}\n{format_instructions}"
human_message_prompt = HumanMessagePromptTemplate.from_template(human_template)

chat_prompt = ChatPromptTemplate.from_messages([human_message_prompt])
prompt = chat_prompt.format_prompt(request='Give me 3 tips for life', format_instructions=output_parser.get_format_instructions()).to_messages()
result = chat(prompt)
print(prompt)
print(result.content)

[HumanMessage(content='Give me 3 tips for life\nYour response should be a list of comma separated values, eg: `foo, bar, baz`', additional_kwargs={}, example=False)]
1. Stay positive and maintain a growth mindset
2. Take care of your physical and mental health
3. Surround yourself with supportive and positive people


In [51]:
output_parser.parse(result.content)

['1. Stay positive and maintain a growth mindset\n2. Take care of your physical and mental health\n3. Surround yourself with supportive and positive people']

In [52]:
# DatetimeOutputParser
output_parser = DatetimeOutputParser()
print(output_parser.get_format_instructions())

Write a datetime string that matches the 
            following pattern: "%Y-%m-%dT%H:%M:%S.%fZ". Examples: 0488-04-29T00:19:19.023413Z, 0658-12-29T12:21:21.950302Z, 1729-06-15T11:30:36.782654Z


In [53]:
human_template = "{request}\n{format_instructions}"
human_message_prompt = HumanMessagePromptTemplate.from_template(human_template)

chat_prompt = ChatPromptTemplate.from_messages([human_message_prompt])
prompt = chat_prompt.format_prompt(request='What is the date of brazilian Constitution?', format_instructions=output_parser.get_format_instructions()).to_messages()
result = chat(prompt)
print(prompt)
print(result.content)

[HumanMessage(content='What is the date of brazilian Constitution?\nWrite a datetime string that matches the \n            following pattern: "%Y-%m-%dT%H:%M:%S.%fZ". Examples: 1674-09-07T01:53:16.276932Z, 1311-10-19T17:19:46.117401Z, 0930-10-25T07:30:12.137913Z', additional_kwargs={}, example=False)]
The date of the Brazilian Constitution is 1988-10-05T00:00:00.000000Z.


In [54]:
chat_prompt = ChatPromptTemplate.from_messages([human_message_prompt])
prompt = chat_prompt.format_prompt(request='Extract the date from this sample text:\nThe contract starts on February 24, 2025.', format_instructions=output_parser.get_format_instructions()).to_messages()
result = chat(prompt)
print(prompt)
print(result.content)

[HumanMessage(content='Extract the date from this sample text:\nThe contract starts on February 24, 2025.\nWrite a datetime string that matches the \n            following pattern: "%Y-%m-%dT%H:%M:%S.%fZ". Examples: 1974-11-24T14:27:19.192963Z, 1858-01-12T17:07:08.710770Z, 1780-07-16T11:38:42.071243Z', additional_kwargs={}, example=False)]
2025-02-24T00:00:00.000000Z


In [3]:
# document loader
loader = PyPDFLoader('./bitcoin.pdf')
pages = loader.load()
print(pages[1].page_content)

2.Transactions
We define an electronic coin as a chain of digital signatures.  Each owner transfers the coin to the  
next by digitally signing a hash of the previous transaction and the public key of the next owner  
and adding these to the end of the coin.  A payee can verify the signatures to verify the chain of  
ownership.
The problem of course is the payee can't verify that one of the owners did not double-spend  
the coin.  A common solution is to introduce a trusted central authority, or mint, that checks every  
transaction for double spending.  After each transaction, the coin must be returned to the mint to  
issue a new coin, and only coins issued directly from the mint are trusted not to be double-spent.  
The problem with this solution is that the fate of the entire money system depends on the  
company running the mint, with every transaction having to go through them, just like a bank.
We need a way for the payee to know that the previous owners did not sign any earlier

In [19]:
# text splitter
with open('./abstract.txt') as file:
    txt = file.read()
text_splitter = CharacterTextSplitter(separator="\n\n", chunk_size=1000)
text = text_splitter.create_documents([txt])
print(text[0])

Created a chunk of size 1130, which is longer than the specified 1000


page_content="Abstract. A purely peer-to-peer version of electronic cash would allow online\npayments to be sent directly from one party to another without going through a\nfinancial institution. Digital signatures provide part of the solution, but the main\nbenefits are lost if a trusted third party is still required to prevent double-spending.\nWe propose a solution to the double-spending problem using a peer-to-peer network.\nThe network timestamps transactions by hashing them into an ongoing chain of\nhash-based proof-of-work, forming a record that cannot be changed without redoing\nthe proof-of-work. The longest chain not only serves as proof of the sequence of\nevents witnessed, but proof that it came from the largest pool of CPU power. As\nlong as a majority of CPU power is controlled by nodes that are not cooperating to\nattack the network, they'll generate the longest chain and outpace attackers. The\nnetwork itself requires minimal structure. Messages are broadcast on a best 

In [22]:
# embeddings
text_to_embed = "text to be embedded as a vector"
embed = OpenAIEmbeddings()
vector = embed.embed_query(text_to_embed)
print(vector)

[-0.029769117575988324, 0.001627998690196439, -0.021420943024825508, 0.0022020228623835163, 0.018299088228344678, 0.018340899024800718, -0.01666847647978506, -0.012034472305832402, -0.019400099659536436, -0.01744893971324397, -0.010139060398588856, 0.027734337899428967, -0.010090281446497673, -0.012013566907604382, 0.0007695756066943256, 0.01392988421307595, 0.0048360881379525275, 0.011072829575279048, 0.027204737582061107, -0.02469610283321503, -0.00895442830580761, 0.010682597958549592, 0.00919832306626352, -0.0016027380782811312, -0.02652183225278456, -0.004445856055561775, 0.011874199138288572, -0.03166452855190928, -0.022619512360199632, -0.011031018778823008, 0.010661692560321573, -0.015288727647316496, -0.00946312322494745, -0.012201714560333969, -0.04975456466061894, 0.016027380084319368, -0.0027368495293083805, -0.03408954357048846, 0.039469168337079134, -0.010738345066275917, 0.010292366341820137, 0.006592131344557672, 0.008606006554211601, -0.005797730868505883, -0.018800814

In [23]:
embed_doc = embed.embed_documents([t.page_content for t in text])
print(embed_doc)

[[-0.010376109073356194, -0.007649837255698152, -0.015055530154916565, -0.02577072819835124, -0.004570913151302363, 0.0012927749521943483, -0.014011137542685183, 0.01515047476851531, -0.024821280199718607, -0.045573496483785794, 0.01083726929035195, 0.020168984820154164, -0.00908079021348479, 0.010315072984236258, -0.006724125410465201, -0.0004785386675748209, 0.02674730189897981, -0.004645512557081562, 0.015109784352876219, 0.016656028529065045, -0.00991494792503784, 0.00416061618668612, -0.015747271877394095, -0.013319396751530252, 0.014092519305285963, 0.0022091616970819124, 0.03182006707990092, -0.008409395095810703, -0.0016784881434311948, -0.012471676188979238, 0.021538902620143466, 0.0015267461226876917, -0.018690558624245554, 0.0006786008770318872, -0.028537689500325775, -0.007860072156376486, -0.00563904191693343, -0.010410017597835003, 0.01742914922017556, -0.00799570718561432, 0.017768238190254044, 0.009609768410760763, -0.002899206782616125, -0.00563904191693343, -0.0205352

In [41]:
# vector store
# load doc -> embed -> vectors -> chroma -> similarity search

loader = PyPDFLoader('./bitcoin.pdf')
document = loader.load()
text_splitter = CharacterTextSplitter.from_tiktoken_encoder(chunk_size=200)
docs = text_splitter.split_documents(document)

embedding_function = OpenAIEmbeddings()
db = Chroma.from_documents(docs, embedding_function, persist_directory='./docs_db')
db.persist()

In [42]:
db_conn = Chroma(persist_directory='./docs_db', embedding_function=embedding_function)
search = 'What are the steps to run the network?'
similar_docs = db_conn.similarity_search(query=search, k=1)
print(similar_docs[0].page_content)

4.Proof-of-Work
To implement a distributed timestamp server on a peer-to-peer basis, we will need to use a proof-
of-work system similar to Adam Back's Hashcash [6], rather than newspaper or Usenet posts.  
The proof-of-work involves scanning for a value that when hashed, such as with SHA-256, the  
hash begins with a number of zero bits.  The average work required is exponential in the number  
of zero bits required and can be verified by executing a single hash.
For our timestamp network, we implement the proof-of-work by incrementing a nonce in the  
block until a value is found that gives the block's hash the required zero bits.  Once the CPU  
effort has been expended to make it satisfy the proof-of-work, the block cannot be changed  
without redoing the work.  As later blocks are chained after it, the work to change the block  
would include redoing all the blocks after it.
The proof-of-work also solves the problem of determining representation in majority decision  
making.  If 

In [50]:
# chains
human_prompt = HumanMessagePromptTemplate.from_template("Create a funny name for a company that makes {product}")
chat_prompt = ChatPromptTemplate.from_messages([human_prompt])
chain = LLMChain(llm=chat, prompt=chat_prompt)
chain.run(product='notebooks')

'"Note-orious B.I.G. Notebooks"'

In [51]:
# topic -> outline -> blog post
template_1 = "Create an outline for a blog post about {topic}"
prompt_1 = ChatPromptTemplate.from_template(template_1)
chain_1 = LLMChain(llm=chat, prompt=prompt_1)

template_2 = "Write a blog post using this outline: {outline}"
prompt_2 = ChatPromptTemplate.from_template(template_2)
chain_2 = LLMChain(llm=chat, prompt=prompt_2)

full_chain = SimpleSequentialChain(chains=[chain_1, chain_2], verbose=True)
result = full_chain.run('Bees')
print(result)



[1m> Entering new SimpleSequentialChain chain...[0m
[36;1m[1;3mI. Introduction
    A. Brief overview of the importance of bees in our ecosystem
    B. Explanation of why bees are facing threats to their population

II. The role of bees in pollination
    A. Explanation of how bees are essential for pollinating crops and wild plants
    B. Examples of crops that rely on bee pollination
    C. Importance of bees in maintaining biodiversity

III. Threats to bee populations
    A. Pesticides and chemicals
    B. Habitat loss
    C. Climate change
    D. Diseases and parasites

IV. Ways to help bees
    A. Planting bee-friendly flowers and plants
    B. Avoiding the use of pesticides
    C. Providing nesting sites for bees
    D. Supporting bee conservation efforts

V. The benefits of bees
    A. Honey production
    B. Beeswax and other bee products
    C. Role in maintaining a healthy ecosystem

VI. Conclusion
    A. Recap of the importance of bees
    B. Call to action for readers 

In [57]:
# router chain
prompt_infos = [
    {
        'name': 'begginer physics', 
        'description': 'Answer basic physics questions', 
        'template': 'You are a physics teacher who is focused on begginers and explaining complex concepts in simple to understand terms. You assume no prior knowledge. Here is the question:\n{input}'},
    {
        'name': 'advanced physics', 
        'description': 'Answer complex physics questions', 
        'template': 'You are a physics teacher who explains physics to advance audiences. You can assume anyone has a PhD in physics. Here is the question:\n{input}'
    }
]
destination_chains = {}
for info in prompt_infos:
    name = info['name']
    template = info['template']
    prompt = ChatPromptTemplate.from_template(template=template)
    chain = LLMChain(llm=chat, prompt=prompt)
    destination_chains[name] = chain

default_prompt = ChatPromptTemplate.from_template('{input}')
default_chain = LLMChain(llm=chat, prompt=default_prompt)
print(MULTI_PROMPT_ROUTER_TEMPLATE)

Given a raw text input to a language model select the model prompt best suited for the input. You will be given the names of the available prompts and a description of what the prompt is best suited for. You may also revise the original input if you think that revising it will ultimately lead to a better response from the language model.

<< FORMATTING >>
Return a markdown code snippet with a JSON object formatted to look like:
```json
{{{{
    "destination": string \ name of the prompt to use or "DEFAULT"
    "next_inputs": string \ a potentially modified version of the original input
}}}}
```

REMEMBER: "destination" MUST be one of the candidate prompt names specified below OR it can be "DEFAULT" if the input is not well suited for any of the candidate prompts.
REMEMBER: "next_inputs" can just be the original input if you don't think any modifications are needed.

<< CANDIDATE PROMPTS >>
{destinations}

<< INPUT >>
{{input}}

<< OUTPUT (must include ```json at the start of the respon

In [59]:
# format MULTI_PROMPT_ROUTER_TEMPLATE -> {destinations}
destinations = [f"{info['name']}: {info['description']}" for info in prompt_infos]
destinations = '\n'.join(destinations)
router_template = MULTI_PROMPT_ROUTER_TEMPLATE.format(destinations=destinations)
print(router_template)

Given a raw text input to a language model select the model prompt best suited for the input. You will be given the names of the available prompts and a description of what the prompt is best suited for. You may also revise the original input if you think that revising it will ultimately lead to a better response from the language model.

<< FORMATTING >>
Return a markdown code snippet with a JSON object formatted to look like:
```json
{{
    "destination": string \ name of the prompt to use or "DEFAULT"
    "next_inputs": string \ a potentially modified version of the original input
}}
```

REMEMBER: "destination" MUST be one of the candidate prompt names specified below OR it can be "DEFAULT" if the input is not well suited for any of the candidate prompts.
REMEMBER: "next_inputs" can just be the original input if you don't think any modifications are needed.

<< CANDIDATE PROMPTS >>
begginer physics: Answer basic physics questions
advanced physics: Answer complex physics questions



In [62]:
router_prompt = PromptTemplate(template=router_template, input_variables=['input'], output_parser=RouterOutputParser())
router_chain = LLMRouterChain.from_llm(chat, router_prompt)
chain = MultiPromptChain(router_chain=router_chain, destination_chains=destination_chains, default_chain=default_chain, verbose=True)

In [64]:
chain.run('What are magnets?')



[1m> Entering new MultiPromptChain chain...[0m


  def predict(self, callbacks: Callbacks = None, **kwargs: Any) -> str:


begginer physics: {'input': 'What are magnets?'}
[1m> Finished chain.[0m


'Magnets are objects that have the ability to attract or repel other objects made of certain materials, such as iron or steel. This ability is due to the magnetic field that surrounds the magnet. \n\nInside a magnet, there are tiny particles called magnetic domains that are all aligned in the same direction. This alignment creates a magnetic field around the magnet, which is what allows it to attract or repel other objects.\n\nThere are two main types of magnets: permanent magnets and electromagnets. Permanent magnets are made of materials like iron, nickel, or cobalt, and they retain their magnetic properties even when not in the presence of an external magnetic field. Electromagnets, on the other hand, are temporary magnets that only exhibit magnetic properties when an electric current is passed through them.\n\nMagnets have many practical applications, such as in electric motors, speakers, and MRI machines. They are also used in everyday items like refrigerator magnets and compasses

In [65]:
chain.run('Explain Feynman Diagrams')



[1m> Entering new MultiPromptChain chain...[0m
advanced physics: {'input': 'Explain Feynman Diagrams in the context of quantum field theory'}
[1m> Finished chain.[0m


'Feynman diagrams are a powerful tool in quantum field theory that allow us to visualize and calculate the interactions between particles. They were developed by Richard Feynman in the 1940s and have become an essential part of particle physics research.\n\nIn a Feynman diagram, particles are represented by lines, with different types of lines corresponding to different types of particles (for example, electrons, photons, or quarks). The interactions between particles are represented by vertices, where the lines meet.\n\nThe key idea behind Feynman diagrams is that they provide a way to calculate the probability amplitude for a given particle interaction. By considering all possible ways that particles can interact and summing up the contributions from each Feynman diagram, we can calculate the overall probability of a particular outcome.\n\nOne of the key insights of Feynman diagrams is that they allow us to visualize particle interactions in terms of virtual particles. These virtual 

In [73]:
# QA docs
embed = OpenAIEmbeddings()
embedding_function = OpenAIEmbeddings()
db_conn = Chroma(persist_directory='./docs_db', embedding_function=embedding_function)
chat = ChatOpenAI(temperature=0)

# load_qa_chain
chain = load_qa_chain(chat, chain_type='stuff')
question = 'What is bitcoin?'
docs = db_conn.similarity_search(question)
result = chain.run(input_documents=docs, question=question)
print('load_qa_chain\n', result)

# load_qa_with_sources_chain
chain = load_qa_with_sources_chain(chat, chain_type='stuff')
question = 'What is bitcoin?'
docs = db_conn.similarity_search(question)
result = chain.run(input_documents=docs, question=question)
print('load_qa_with_sources_chain\n', result)

load_qa_chain
 Bitcoin is a digital currency system that allows online payments to be sent directly from one party to another without the need for a financial institution. It was introduced in a whitepaper titled "Bitcoin: A Peer-to-Peer Electronic Cash System" by an individual or group using the pseudonym Satoshi Nakamoto. Bitcoin operates on a decentralized peer-to-peer network, using cryptographic proof to secure transactions and prevent double-spending. It is based on a blockchain technology that records all transactions in a public ledger.
load_qa_with_sources_chain
 Bitcoin is a peer-to-peer electronic cash system that allows online payments to be sent directly from one party to another without the need for a financial institution. It uses digital signatures and a peer-to-peer network to prevent double-spending. Transactions are timestamped and recorded in a chain of hash-based proof-of-work. The system is secure as long as honest nodes control more CPU power than any cooperating