In [None]:
!pip install cascades
!pip install duckduckgo-search  # or your preferred web query api

Looking in indexes: https://pypi.org/simple, https://us-python.pkg.dev/colab-wheels/public/simple/
Looking in indexes: https://pypi.org/simple, https://us-python.pkg.dev/colab-wheels/public/simple/
Looking in indexes: https://pypi.org/simple, https://us-python.pkg.dev/colab-wheels/public/simple/


In [None]:
import os
import openai
api_key = None # @param
if api_key:
  os.environ['OPENAI_API_KEY'] = api_key

In [None]:
import cascades as cc

In [None]:
# Check that we can sample from GPT.
dist = cc.GPT(prompt='Probabilistic programming is ',
       # engine='davinci-codex', 
       temperature=0.7, 
       stop=('\n',))
x = dist.sample(rng=0)
x

RandomSample(log_p=-32.80007293, value='an important technique for deep learning, as we can directly sample from our desired distribution and use those samples as training data.', dist=None)

# Web Search

In [None]:
from duckduckgo_search import ddg

In [None]:
keywords = 'How many legs does a rabbit have?'
results = ddg(keywords, region='wt-wt', safesearch='Moderate', time='y', max_results=3)
print(results)

[{'title': 'How Many Legs Does A Rabbit Have - Realonomics', 'href': 'https://aeries.norushcharge.com/how-many-legs-does-a-rabbit-have/', 'body': "Does a rabbit have 2 or 4 legs? A rabbit has four legs. The two in the front are called the forelegs and the two in the rear are called the hind legs. … Muscling in the hind legs is much more extensive than in the forelegs. A rabbit's body is broken into two sections called the forequarter and the hindquarter. How many feet does a rabbit have?"}, {'title': 'How Many Legs Do Rabbits Have - Realonomics', 'href': 'https://aeries.norushcharge.com/how-many-legs-do-rabbits-have/', 'body': 'How Many Legs Do Rabbits Have? four legs Do rabbits have legs? The hind limbs of the rabbit are longer than the front limbs. This allows them to produce their hopping form of locomotion. Longer hind limbs are more capable of producing faster speeds. Hares which have longer legs than cottontail rabbits are able to move considerably faster.'}, {'title': 'How many 

In [None]:
results[0].keys(), results[0]['body']

(dict_keys(['title', 'href', 'body']),
 "Does a rabbit have 2 or 4 legs? A rabbit has four legs. The two in the front are called the forelegs and the two in the rear are called the hind legs. … Muscling in the hind legs is much more extensive than in the forelegs. A rabbit's body is broken into two sections called the forequarter and the hindquarter. How many feet does a rabbit have?")

In [None]:
import functools

@functools.lru_cache(maxsize=1000)
def get_passages(query, num_passages=5, output=None):
  # output: json, csv, print
  res = ddg(keywords=query, max_results=num_passages, output=output) 
  return res

In [None]:
@cc.model
def qa_with_search(question):
  """Answer question."""
  context = get_passages(question, num_passages=1)[0]['body']
  yield cc.log(context, name='context')
  prompt = f"""The answer sheet for the questions is below:

Question: Which planet is the hottest in the solar system?
Context: It has a strong greenhouse effect, similar to the one we experience on Earth. Because of this, Venus is the hottest planet in the solar system. The surface of Venus is approximately 465°C! Fourth from the Sun, after Earth, is Mars.
Answer: Venus

Question: Which country produces the most coffee in the world?
Context: With the rise in popularity of coffee among Europeans, Brazil became the world's largest producer in the 1840s and has been ever since. Some 300,000 coffee farms are spread over the Brazilian landscape.
Answer: Brazil

Question: {question}
Context: {context}
Answer:"""
  answer = yield cc.GPT(prompt=prompt, stop='\n', name='answer')
  return answer.value

@cc.model
def qa(question):
  """Answer question."""
  prompt = f"""Answer the questions below given a document from the web:

Question: What is often seen as the smallest unit of memory?
Answer: kilobyte

Question: Which planet is the hottest in the solar system?
Answer: Venus

Question: Which country produces the most coffee in the world?
Answer: Brazil

Question: {question}
Answer:"""
  answer = yield cc.GPT(prompt=prompt, stop='\n', name='answer')
  return answer.value

In [None]:
%time no_search = qa.sample('Which bones are babies born without?')
no_search

CPU times: user 615 ms, sys: 447 ms, total: 1.06 s
Wall time: 2.72 s


Record(
  answer: Sample(name='answer', score=0, value=' middle ear bones', should_stop=False, replayed=False, metadata=None)
  return:  middle ear bones
)

In [None]:
%time with_search = qa_with_search.sample('Which bones are babies born without?')
with_search

CPU times: user 41.1 ms, sys: 4.51 ms, total: 45.6 ms
Wall time: 2.54 s


Record(
  context: Log(name='context', score=None, value='One example of a bone that babies are born without: the kneecap (or patella). The kneecap starts out as cartilage and starts significantly hardening into bone between the ages of 2 and 6 years old. In most cases, several areas of cartilage in the knee begin to harden at the same time and eventually fuse together to form one solid bone.', should_stop=False, replayed=False, metadata=None)
  answer: Sample(name='answer', score=0, value=' Kneecap', should_stop=False, replayed=False, metadata=None)
  return:  Kneecap
)

In [None]:
def compare(question):
  no_search = qa.sample(question)
  search = qa_with_search.sample(question)
  return no_search, search

In [None]:
compare('Which bone are babies born without')

(Record(
   answer: Sample(name='answer', score=0, value=' Stapes', should_stop=False, replayed=False, metadata=None)
   return:  Stapes
 ), Record(
   context: Log(name='context', score=None, value='Firstly, a newborn has several "proto bones" (ie cartilagneous precusors) which are not bones at all - yet. During development, many bones consist of "several bones, joined by cartilage" which will become one bone eventually. Take an example - the femur. This consists of at least five bones until total fusion aged perhaps 17yo.', should_stop=False, replayed=False, metadata=None)
   answer: Sample(name='answer', score=0, value=' rib', should_stop=False, replayed=False, metadata=None)
   return:  rib
 ))

In [None]:
from concurrent import futures
pool = futures.ThreadPoolExecutor(16)

In [None]:
Q = 'Which bone is a baby born without?'
rs = qa_with_search.sample_parallel(pool, Q, n=4)
rs  # List of running traces.

[Record(
 
 ), Record(
 
 ), Record(
 
 ), Record(
 
 )]

In [None]:
# show 20 results
rs[0].future.result(20)

' patella'

In [None]:
[r.return_value for r in rs]

[' patella', ' patella', ' patella', ' patella']

In [None]:
%%time
rs = qa.sample_parallel(pool, Q, n=4)
[r.future.result(20) for r in rs]
print([r.return_value for r in rs])

[' radius', ' middle ear', ' baby tooth', ' Clavicle']
CPU times: user 14.4 ms, sys: 849 µs, total: 15.2 ms
Wall time: 14.6 ms
