# Generator
> Generate answer based on retrieved chunks

In [None]:
%load_ext autoreload
%autoreload 2

In [None]:
#| default_exp generator

In [None]:
#|export
import os
import openai
import fastcore.all as fc
import braintrust
from dotenv import load_dotenv
from wattbot import retriever, eda, utils

In [None]:
load_dotenv()

True

## Prompt 

I have saved a prompt in [braintrust](https://www.braintrust.dev/) via its UI.

In the following code I am extracting it

In [None]:
braintrust.load_prompt(project="wattbot", slug="wattbot-prompt-v2")

<braintrust.logger.Prompt>

### Prompt Template

In [None]:
#|export
@fc.patch
def template(self:braintrust.logger.Prompt):
    return self.prompt.messages[0].content

In [None]:
braintrust.load_prompt(project="wattbot", slug="wattbot-prompt-v2").template()[:100]

"You are analyzing academic papers about AI's environmental impact. Answer the question using ONLY th"

### System Prompt

In [None]:
#|export
@fc.patch
def system_prompt(self:braintrust.logger.Prompt): return self.prompt.messages[1].content

In [None]:
sys_prompt = braintrust.load_prompt(project="wattbot", slug="wattbot-prompt-v2").system_prompt()
sys_prompt[:100]

'You are an expert research assistant specializing in AI environmental impact analysis. Your role is '

### Fill Template

In [None]:
#|export
def create_context(chunks):
    return "\n\n".join([retriever.Nugget(chunk, i+1) for i, chunk in enumerate(chunks)])

In [None]:
all_chunks = retriever.chunk_all(retriever.chunk_doc)
context = create_context(all_chunks[:2])
print(context[:100])

### Chunk 1
            Text: Amazon 
Sustainability 
Report
2023 Contents
Overview
3 Introduction
4


In [None]:
query = eda.get_train_data().question
query

"True or False: The Transformer architecture eventuallly outperforms the Evolved Transformers architecture on the WMT'24 EN-DE BLUE task as the model sizes grow."

In [None]:
#|export
@fc.patch
def fill_template(self:braintrust.logger.Prompt,  question, chunks):
    return self.build(question=question, context=create_context(chunks))['messages'][0]['content']

In [None]:
print(braintrust.load_prompt(project="wattbot", slug="wattbot-prompt-v2").fill_template(query, all_chunks[:2])[:100])

You are analyzing academic papers about AI's environmental impact. Answer the question using ONLY th


## LLM 

In [None]:
llm_model = 'accounts/fireworks/models/gpt-oss-20b'

In [None]:
ls = retriever.LexicalSearch(all_chunks)

In [None]:
rc = ls.search(query)
query, rc[0].text

("True or False: The Transformer architecture eventuallly outperforms the Evolved Transformers architecture on the WMT'24 EN-DE BLUE task as the model sizes grow.",
 'to   a   better   understanding   of   cost-accuracy   tradeoffs   \nin   ML   models,   potentially   further   reducing   overall   emissions   by   empowering   more   informed   ML   model   \nselection,   as   the   next   subsection   explains.   \n    \nFigure   4:   Reproduction   of   Figure   4   from   So    et   al.    Dots   on   the   blue   line   represent   various   sizes   of   plain   \nTransformer   NLP   models,   while   dots   on   the   red   line   represent   various   sizes   of   the   open-sourced   \nEvolved   Transformer   architecture   that   was   discovered   by   the   neural   architecture   search   run   in   [So19] .   \nRed   arrows   are   at   131M   and   210M   parameters   and   show   that   an   Evolved   Transformer   can   achieve   \nhigher   accuracy   at   less   cost:

In [None]:
prompt_msg = braintrust.load_prompt(project="wattbot", slug="wattbot-prompt-v2").fill_template(query, rc)

In [None]:
#|export
def mk_msg(msg, sys): return [{"role": "system", "content": sys}, {"role": "user", "content": msg}]

In [None]:
mk_msg("how many r in strawberry?", "You are a helpful assistant that provides concise and professional answers.")

[{'role': 'system',
  'content': 'You are a helpful assistant that provides concise and professional answers.'},
 {'role': 'user', 'content': 'how many r in strawberry?'}]

In [None]:
utils.fw()

<openai.OpenAI>

In [None]:
#|export
@fc.patch
def generate(self:openai.OpenAI, model, text, sys):
    return self.chat.completions.create(model=model, messages=mk_msg(text, sys), response_format={"type": "json_object"}).choices[0].message.content

In [None]:
utils.fw().generate(llm_model, "how many r in strawberry?", "You are a helpful assistant that provides concise and professional answers.")

'{"count":3}'

In [None]:
fc.loads(utils.fw().generate(llm_model, prompt_msg, sys_prompt))

{'answer': 'FALSE',
 'answer_value': '0',
 'answer_unit': 'is_blank',
 'ref_id': ['patterson2021'],
 'ref_url': ['https://arxiv.org/pdf/2104.10350'],
 'supporting_materials': '"Red arrows are at 131M and 210M parameters and show that an Evolved Transformer can achieve higher accuracy at less cost"',
 'explanation': "The provided excerpts state that the Evolved Transformer consistently delivers higher accuracy (than the plain Transformer) while using fewer resources; there is no evidence that the plain Transformer later surpasses it in the WMT'24 EN‑DE BLEU task as model size grows."}

In [None]:
#|export
required_keys = ['answer', 'answer_value', 'answer_unit', 'ref_id', 'ref_url', 'supporting_materials', 'explanation']

In [None]:
#|export
def default_answer(): return {key: 'is_blank' for key in required_keys}

In [None]:
default_answer()

{'answer': 'is_blank',
 'answer_value': 'is_blank',
 'answer_unit': 'is_blank',
 'ref_id': 'is_blank',
 'ref_url': 'is_blank',
 'supporting_materials': 'is_blank',
 'explanation': 'is_blank'}

In [None]:
#|export
def replace_missing_keys(answer): 
    return {key: answer.get(key, 'is_blank') for key in required_keys}

In [None]:
answer = {'answer_value': '40',
 'answer_unit': 'is_blank',
 'ref_id': ['372'],
 'ref_url': ['https://arxiv.org/pdf/2505.06371']}
replace_missing_keys(answer)

{'answer': 'is_blank',
 'answer_value': '40',
 'answer_unit': 'is_blank',
 'ref_id': ['372'],
 'ref_url': ['https://arxiv.org/pdf/2505.06371'],
 'supporting_materials': 'is_blank',
 'explanation': 'is_blank'}

In [None]:
#|export
def post_process(answer):
    answer = replace_missing_keys(answer)
    if len(answer['ref_id']) == 0 or answer['ref_id'] == ['is_blank']: answer['ref_id'] = 'is_blank'
    if len(answer['ref_url']) == 0 or answer['ref_url'] == ['is_blank']: answer['ref_url'] = 'is_blank'
    return answer

In [None]:
post_process(answer)

{'answer': 'is_blank',
 'answer_value': '40',
 'answer_unit': 'is_blank',
 'ref_id': ['372'],
 'ref_url': ['https://arxiv.org/pdf/2505.06371'],
 'supporting_materials': 'is_blank',
 'explanation': 'is_blank'}

In [None]:
#|export
class RAG:
    def __init__(self, r, g, model='accounts/fireworks/models/gpt-oss-20b'):
        fc.store_attr()
        self.pt = braintrust.load_prompt(project="wattbot", slug="wattbot-prompt-v2")
        self.sp = self.pt.system_prompt()
        
    def answer(self, q, n_rc=10, json=True):
        rc = self.r.search(q, n=n_rc)
        pm = self.pt.fill_template(q, rc)
        try:
            ans = self.g.generate(self.model, pm, self.sp)
            if json: ans = fc.loads(ans)
        except Exception as e:
            print(e)
            ans = default_answer()
        if json: ans = post_process(ans)
        return fc.NS(ans=ans, rc=rc, pm=pm)

In [None]:
rag = RAG(ls, utils.fw())
response = rag.answer(query)
response.ans

{'answer': 'FALSE',
 'answer_value': '0',
 'answer_unit': 'is_blank',
 'ref_id': ['patterson2021'],
 'ref_url': ['https://arxiv.org/pdf/2104.10350'],
 'supporting_materials': '"Figure\xa04 shows that the Evolved Transformer, found by NAS [So19], has 37% fewer parameters and converges to the same accuracy with 25% less energy expenditure (see Table\xa01) than the vanilla Transformer (Big) model on WMT English to German translation."',
 'explanation': 'The document reports that the Evolved Transformer achieves the same accuracy as the vanilla Transformer with fewer parameters and lower energy, but does not indicate that larger plain Transformers eventually outperform the Evolved Transformer on the WMT EN‑DE BLEU task. Hence the claim is unsupported and therefore false.'}

In [None]:
len(response.rc), response.rc[0]

(10,
 namespace(text='to   a   better   understanding   of   cost-accuracy   tradeoffs   \nin   ML   models,   potentially   further   reducing   overall   emissions   by   empowering   more   informed   ML   model   \nselection,   as   the   next   subsection   explains.   \n    \nFigure   4:   Reproduction   of   Figure   4   from   So    et   al.    Dots   on   the   blue   line   represent   various   sizes   of   plain   \nTransformer   NLP   models,   while   dots   on   the   red   line   represent   various   sizes   of   the   open-sourced   \nEvolved   Transformer   architecture   that   was   discovered   by   the   neural   architecture   search   run   in   [So19] .   \nRed   arrows   are   at   131M   and   210M   parameters   and   show   that   an   Evolved   Transformer   can   achieve   \nhigher   accuracy   at   less   cost:   it   runs   1.3X   faster   and   produces   1.3x   less   CO 2 e.     \n4.2   There   are   more   resources   used   for   training   than  

In [None]:
len(response.pm)

22465

In [None]:
rag.answer(query, json=False).ans

'{"answer":"False","answer_value":"0","answer_unit":"is_blank","ref_id":["patterson2021"],"ref_url":["https://arxiv.org/pdf/2104.10350"],"supporting_materials":"\\"Red arrows are at 131M and 210M parameters and show that an Evolved Transformer can achieve higher accuracy at less cost: it runs 1.3X faster and produces 1.3x less CO 2 e.\\"","explanation":"The excerpts state that at the examined model sizes (131M and 210M) the Evolved Transformer achieves higher accuracy than the plain Transformer. No evidence is provided that the plain Transformer eventually surpasses it as model size increases; thus the statement is false."}'

In [None]:
rag.answer(eda.get_train_data().question).ans

{'answer': 'The estimated CO2 emissions are 1,438\u202flbs.',
 'answer_value': '1438',
 'answer_unit': 'lbs',
 'ref_id': ['dodge2022'],
 'ref_url': ['https://arxiv.org/pdf/2206.05229'],
 'supporting_materials': '"BERTbase V100x64 ... 79 1507 1438"',
 'explanation': 'In Table\u202f3 of the paper, the entry for BERT base trained on 64 V100 GPUs for 79\u202fh lists a CO2e of 1,438\u202flbs.'}

In [None]:
ls = retriever.LexicalSearch(all_chunks, neighbour_chunks=True)
rag = RAG(ls, utils.fw())
len(rag.answer(query).rc)

26

In [None]:
#| hide
import nbdev; nbdev.nbdev_export()