# Selective Generation through Confidence-Aware Self-Reflection

Author: Lee Jia Sheng

In [88]:
import getpass
import os
import time
import csv
from typing import Any, Dict, Iterable
import torch
from transformers import AutoTokenizer, AutoModelForCausalLM
from huggingface_hub import login
from datasets import load_dataset
import pandas as pd
from langchain_core.documents import Document
from langchain_core.embeddings import Embeddings
import math
import ast
import numpy as np
import re

In [5]:
model_id = "meta-llama/Llama-3.2-3B-Instruct"

tokenizer = AutoTokenizer.from_pretrained(model_id)
model = AutoModelForCausalLM.from_pretrained(
    model_id,
    torch_dtype=torch.bfloat16,
    # device_map="auto"
    device_map="cpu"
)

Loading checkpoint shards: 100%|███████████████████████████████████████████████████████████████████████████████████████████████████████████████████████| 2/2 [00:00<00:00,  8.13it/s]


In [6]:
messages = [
    {"role": "system", "content": "You are a helpful assistant"},
    {"role": "user", "content": "Who are you?"},
]

input_ids = tokenizer.apply_chat_template(
    messages,
    add_generation_prompt=True,
    return_tensors="pt"
)

terminators = [
    tokenizer.eos_token_id,
    tokenizer.convert_tokens_to_ids("<|eot_id|>")
]

start_time = time.time()
print("--- generate begins ---")

outputs = model.generate(
    input_ids,
    max_new_tokens=256,
    eos_token_id=terminators,
    do_sample=True,
    temperature=0.6,
    top_p=0.9,
    output_scores=True,
    output_logits=True,
    output_attentions=True,
    return_dict_in_generate=True
)

print("--- generate ends, time taken: %s seconds ---" % (time.time() - start_time))

The attention mask and the pad token id were not set. As a consequence, you may observe unexpected behavior. Please pass your input's `attention_mask` to obtain reliable results.
Setting `pad_token_id` to `eos_token_id`:None for open-end generation.


--- generate begins ---


Starting from v4.46, the `logits` model output will have the same type as the model (except at train time, where it will always be FP32)
From v4.47 onwards, when a model cache is to be returned, `generate` will return a `Cache` instance instead by default (as opposed to the legacy tuple of tuples format). If you want to keep returning the legacy format, please set `return_legacy_cache=True`.


--- generate ends, time taken: 972.1969430446625 seconds ---


In [7]:
from torch.nn import functional as F
print(len(outputs.logits))

generated_ids = outputs['sequences']
generated_text = tokenizer.decode(generated_ids[0][input_ids.shape[-1]:], skip_special_tokens=True)
print(generated_text)

211
I'm an assistant designed to provide information and answer questions to the best of my ability. I don't have a personal identity, but I'm here to help with any topic or task you'd like to discuss. I can provide information, explain concepts, offer suggestions, and even help with tasks such as language translation, text summarization, and more.

I'm a large language model, which means I've been trained on a massive dataset of text from various sources, including books, articles, and websites. This training allows me to understand and generate human-like language, making me a useful tool for a wide range of applications.

Some examples of things I can help with include:

* Answering questions on a particular topic
* Providing definitions and explanations
* Offering suggestions and ideas
* Translating text from one language to another
* Summarizing long pieces of text into shorter versions
* Generating text on a given topic or subject
* And more!

Feel free to ask me anything, and I'

In [8]:
print(len(outputs['logits']))

211


In [9]:
print(len(input_ids[0]))
print(len(outputs['sequences'][0]))
# len(input_ids[0]) + len(outputs[logits]) = len(outputs['sequence'][0])

44
255


In [10]:
# logits = outputs['logits']
# probabilities = F.softmax(logits, dim=-1)
# print(probabilities)

In [15]:
import torch.nn.functional as F

sequence_likelihood = 0

# Apply softmax to the logits to get probabilities
for i, logits in enumerate(outputs['logits']):
    # Apply softmax over the last dimension (vocab size) to get probabilities
    probabilities = F.softmax(logits, dim=-1)
    
    # Get the index of the generated token for this step
    generated_token_id = outputs['sequences'][0][input_ids.shape[-1] + i].item()
    
    # Get the probability of the generated token
    generated_token_prob = probabilities[0, generated_token_id].item()
    
    # Print the token ID and its probability
    print(f"Token {i+1}: ID {generated_token_id}, Probability: {generated_token_prob}")
    
    sequence_likelihood += math.log(generated_token_prob, 10)

sequence_likelihood = math.pow(10, sequence_likelihood)
print("sequence likelihood:", sequence_likelihood)

Token 1: ID 40, Probability: 0.9999953508377075
Token 2: ID 2846, Probability: 0.8917848467826843
Token 3: ID 459, Probability: 0.9984965324401855
Token 4: ID 18328, Probability: 0.3911396563053131
Token 5: ID 6319, Probability: 0.9997716546058655
Token 6: ID 311, Probability: 0.9999997615814209
Token 7: ID 3493, Probability: 0.9996452331542969
Token 8: ID 2038, Probability: 0.9351160526275635
Token 9: ID 323, Probability: 0.9933071732521057
Token 10: ID 4320, Probability: 0.9123563170433044
Token 11: ID 4860, Probability: 0.9982454776763916
Token 12: ID 311, Probability: 0.999617338180542
Token 13: ID 279, Probability: 0.9999938011169434
Token 14: ID 1888, Probability: 0.999996542930603
Token 15: ID 315, Probability: 0.9999978542327881
Token 16: ID 856, Probability: 0.9999585151672363
Token 17: ID 5845, Probability: 0.568086564540863
Token 18: ID 13, Probability: 0.9999982118606567
Token 19: ID 358, Probability: 0.9998130202293396
Token 20: ID 1541, Probability: 0.6516890525817871
Tok

In [12]:
# from captum.attr import (
#     FeatureAblation, 
#     ShapleyValues,
#     LayerIntegratedGradients, 
#     LLMAttribution, 
#     LLMGradientAttribution, 
#     TextTokenInput, 
#     TextTemplateInput,
#     ProductBaselines,
# )

# lig = LayerIntegratedGradients(model, model.model.embed_tokens)

# llm_attr = LLMGradientAttribution(lig, tokenizer)

In [13]:
# print(tokenizer.decode(input_ids[0]))

In [14]:
# eval_prompt = tokenizer.decode(input_ids[0])
# inp = TextTokenInput(
#     eval_prompt,
#     tokenizer,
#     skip_tokens=[1],  # skip the special token for the start of the text <s>
# )

# attr_res = llm_attr.attribute(inp, target=generated_text)

# attr_res.plot_seq_attr(show=True)
# attr_res.plot_token_attr(show=True)

# High uncertainty token

In [23]:
import torch
from transformers import AutoTokenizer, AutoModelForCausalLM, LogitsProcessorList


# Input text and tokenization
input_text2 = "The capital of France is Paris."
inputs2 = tokenizer(input_text2, return_tensors="pt")
print("inputs2:", inputs2)
input_ids2 = inputs2['input_ids']

# Forward pass through the model
with torch.no_grad():
    outputs = model(**inputs2)
    logits2 = outputs.logits  # Shape: (batch_size, seq_length, vocab_size)

# Convert logits to log probabilities
log_probs = torch.nn.functional.softmax(logits2, dim=-1)

# Identify low-confidence tokens by selecting the max probability at each time step
# Threshold for low confidence (tune as needed, e.g., log_prob < -2.0)
threshold = 0.2
low_confidence_mask = log_probs.max(dim=-1).values < threshold

# Decode tokens and annotate those with low confidence
decoded_tokens = tokenizer.convert_ids_to_tokens(input_ids2[0])

annotated_text = []
for token, is_low_conf in zip(decoded_tokens, low_confidence_mask[0]):
    if is_low_conf:
        annotated_text.append(f"*{token}*")
    else:
        annotated_text.append(token)

# Join tokens to form the final output text
final_text = tokenizer.convert_tokens_to_string(annotated_text)
print(final_text)

inputs2: {'input_ids': tensor([[128000,    791,   6864,    315,   9822,    374,  12366,     13]]), 'attention_mask': tensor([[1, 1, 1, 1, 1, 1, 1, 1]])}
*<|begin_of_text|>**The* capital* of* France is Paris.


In [34]:
from datasets import load_dataset

ds = load_dataset("truthfulqa/truthful_qa", "multiple_choice")
print(ds['validation'])

Dataset({
    features: ['question', 'mc1_targets', 'mc2_targets'],
    num_rows: 817
})


In [42]:
df = pd.DataFrame(ds['validation'])
print(df[:1])

# # Use pd.json_normalize to convert 'info' column into multiple columns
# df_flattened = pd.json_normalize(df['validation'])

# # Concatenate with the original DataFrame if needed
# df_final = pd.concat([df.drop('info', axis=1), df_flattened], axis=1)

# print(df_final)

                                            question  \
0  What is the smallest country in the world that...   

                                         mc1_targets  \
0  {'choices': ['Nauru is the smallest country in...   

                                         mc2_targets  
0  {'choices': ['Nauru is the smallest country in...  


In [75]:
df = pd.read_csv('validation.csv')
# print(df[:5])
def inspect(df, i):
    print('[context]')
    print(df.loc[i]['context'])
    print('[question]')
    print(df.loc[i]['question'])
    print('[answers]')
    
    data_str_cleaned = re.sub(r"array\(", "", df.loc[i]['answers'])    # Remove 'array('
    data_str_cleaned = re.sub(r", dtype=[a-zA-Z0-9_]+", "", data_str_cleaned)  # Remove 'dtype=...'
    data_str_cleaned = re.sub(r"\)", "", data_str_cleaned)  # Remove closing '
    
    answer_dict = ast.literal_eval(data_str_cleaned)
    answer_text = answer_dict['text']
    answer_start = answer_dict['answer_start']
    print(answer_text)
inspect(df, 0)
inspect(df, 1)

[context]
The Normans (Norman: Nourmands; French: Normands; Latin: Normanni) were the people who in the 10th and 11th centuries gave their name to Normandy, a region in France. They were descended from Norse ("Norman" comes from "Norseman") raiders and pirates from Denmark, Iceland and Norway who, under their leader Rollo, agreed to swear fealty to King Charles III of West Francia. Through generations of assimilation and mixing with the native Frankish and Roman-Gaulish populations, their descendants would gradually merge with the Carolingian-based cultures of West Francia. The distinct cultural and ethnic identity of the Normans emerged initially in the first half of the 10th century, and it continued to evolve over the succeeding centuries.
[question]
In what country is Normandy located?
[answers]
['France', 'France', 'France', 'France']
[context]
The Normans (Norman: Nourmands; French: Normands; Latin: Normanni) were the people who in the 10th and 11th centuries gave their name to N

# Chain-of-Reflection (Iterative Reflection)

Zero shot

In [None]:
model_id = "meta-llama/Llama-3.2-3B-Instruct"

tokenizer = AutoTokenizer.from_pretrained(model_id)
model = AutoModelForCausalLM.from_pretrained(
    model_id,
    torch_dtype=torch.bfloat16,
    # device_map="auto"
    device_map="cpu"
)

In [76]:
system_prompt = """
Let's take it step by step. You are student sitting in an reading comprehension exam. Given a context and question, 
give the answer in a short span of text. Some of the questions may be unanswerable, in that case, 
leave the [Answer] section empty, and state your explanation in [Reasoning] section.

[Context]: ...
[Question]: When were the Normans in Normandy

Answer in the following format:

[Reasoning]: [Provide your explanation here if any]
[Answer]: [Answer here]

======
You will receive feedback from teacher in subsequent conversation, based on the feedback, 
reflect on your previous answer and answer again in the following format:

[Reflection]: [Provide your reflection]
[Answer]: [Answer here]
"""

verifier_prompt = """
You are a teacher reading student's attempt in a reading comprehension exam. Students are provided context and question, 
and they give an answer together with their reasoning to derive the answer. Some of the questions may be unanswerable, 
in that case, the expected [Answer] section is empty. Student's confidence is also attached with their answer.

Give your feedback to the student's answer below. 
"""

input_to_verifier = """
[Context]: {context}
[Question]: {question}
[Reasoning]: {reasoning}
[Answer]: {answer}
[Confidence]: {confidence}
"""

grader_prompt = """
You are a grader grading a reading comprehension exam. Students are provided context and question, 
and they give an answer together with their reasoning to derive the answer. Some of the questions may be unanswerable, 
in that case, the expected [Answer] section is empty.

Answer either True or False given the student's answer.
===
[Context]: Math
[Question]: 1 + 2 * 3
[Reasoning]: Multiplication is performed before addition
[Answer]: 7

True
===
[Context]: The Normans (Norman: Nourmands; French: Normands; Latin: Normanni) were the people who in the 10th and 11th centuries gave their name to Normandy, a region in France. They were descended from Norse ("Norman" comes from "Norseman") raiders and pirates from Denmark, Iceland and Norway who, under their leader Rollo, agreed to swear fealty to King Charles III of West Francia. Through generations of assimilation and mixing with the native Frankish and Roman-Gaulish populations, their descendants would gradually merge with the Carolingian-based cultures of West Francia. The distinct cultural and ethnic identity of the Normans emerged initially in the first half of the 10th century, and it continued to evolve over the succeeding centuries.
[Question]: In what country is Normandy located?
[Reasoning]: Random guess
[Answer]: China

False
===
"""

input_to_grader = """
[Context]: {context}
[Question]: {question}
[Reasoning]: {reasoning}
[Answer]: {answer}
"""

question_format = """
[Context]: {context}
[Question]: {question}
[Reasoning]: [Your reasoning here]
[Answer]: [Your short answer here]
"""

In [77]:
input1 = question_format.format(
    context="""The Normans (Norman: Nourmands; French: Normands; Latin: Normanni) were the people who in the 10th and 11th centuries gave their name to Normandy, a region in France. They were descended from Norse ("Norman" comes from "Norseman") raiders and pirates from Denmark, Iceland and Norway who, under their leader Rollo, agreed to swear fealty to King Charles III of West Francia. Through generations of assimilation and mixing with the native Frankish and Roman-Gaulish populations, their descendants would gradually merge with the Carolingian-based cultures of West Francia. The distinct cultural and ethnic identity of the Normans emerged initially in the first half of the 10th century, and it continued to evolve over the succeeding centuries.""",
    question="In what country is Normandy located?")

messages = [
    {"role": "system", "content": system_prompt}
]

def generate(model, tokenizer, messages, user_question):
    """
    :param model: model
    :param tokenizer: tokenizer
    :param messages: conversation
    :param user_question: user_question
    :return: outputs, input_ids, messages with user_question
    """
    messages.append({"role": "user", "content": user_question})
    input_ids = tokenizer.apply_chat_template(
        messages,
        add_generation_prompt=True,
        return_tensors="pt"
    )

    terminators = [
        tokenizer.eos_token_id,
        tokenizer.convert_tokens_to_ids("<|eot_id|>")
    ]

    start_time = time.time()
    print("--- generate begins ---")

    outputs = model.generate(
        input_ids,
        max_new_tokens=256,
        eos_token_id=terminators,
        do_sample=True,
        temperature=0.6,
        top_p=0.9,
        output_scores=True,
        output_logits=True,
        output_attentions=True,
        return_dict_in_generate=True
    )
    print("--- generate ends, time taken: %s seconds ---" % (time.time() - start_time))
    return outputs, input_ids, messages

# generated_ids = outputs['sequences']
# generated_text = tokenizer.decode(generated_ids[0][input_ids.shape[-1]:], skip_special_tokens=True)
# print(generated_text)


def get_confidence(output_logits):
    return 1.0

In [78]:
outputs, messages = generate(model, tokenizer, messages, input1)

The attention mask and the pad token id were not set. As a consequence, you may observe unexpected behavior. Please pass your input's `attention_mask` to obtain reliable results.
Setting `pad_token_id` to `eos_token_id`:None for open-end generation.


--- generate begins ---
--- generate ends, time taken: 497.7900297641754 seconds ---


In [80]:
print(messages)
print(outputs)

GenerateDecoderOnlyOutput(sequences=tensor([[128000, 128006,   9125, 128007,    271,  38766,   1303,  33025,   2696,
             25,   6790,    220,   2366,     18,    198,  15724,   2696,     25,
            220,    717,   5020,    220,   2366,     19,    271,  10267,    596,
           1935,    433,   3094,    555,   3094,     13,   1472,    527,   5575,
          11961,    304,    459,   5403,  62194,   7151,     13,  16644,    264,
           2317,    323,   3488,     11,    720,  47530,    279,   4320,    304,
            264,   2875,   9575,    315,   1495,     13,   4427,    315,    279,
           4860,   1253,    387,    653,   9399,    481,     11,    304,    430,
           1162,     11,    720,  22233,    279,    510,  16533,     60,   3857,
           4384,     11,    323,   1614,    701,  16540,    304,    510,  26197,
            287,     60,   3857,    382,     58,   2014,   5787,  12515,     58,
          14924,   5787,   3277,   1051,    279,  20935,    598,    304, 

In [82]:
messages = [
    {"role": "system", "content": system_prompt},
    {"role": "user", "content": input1}
]
generated_ids = outputs['sequences']
input_ids = tokenizer.apply_chat_template(
        messages,
        add_generation_prompt=True,
        return_tensors="pt"
    )
generated_text = tokenizer.decode(generated_ids[0][input_ids.shape[-1]:], skip_special_tokens=True)
print(generated_text)

[Reasoning]: Based on the context provided, Normandy is mentioned as a region in France, and it is mentioned that the Normans gave their name to Normandy. This suggests a direct connection between the Normans and the region of Normandy.

[Answer]: France


In [87]:
def extract_answer(text):
    # Regular expression pattern to match everything after "[Answer]: "
    pattern = r"\[Answer\]:\s*(.*)"
    match = re.search(pattern, text, re.DOTALL)
    
    if match:
        answer_text = match.group(1)
        return answer_text
    else:
        print("No match found")
        return ""

print(extract_answer(generated_text))

France


## Evaluation

[{'id': '56ddde6b9a695914005b9628', 'context': 'The Normans (Norman: Nourmands; French: Normands; Latin: Normanni) were the people who in the 10th and 11th centuries gave their name to Normandy, a region in France. They were descended from Norse ("Norman" comes from "Norseman") raiders and pirates from Denmark, Iceland and Norway who, under their leader Rollo, agreed to swear fealty to King Charles III of West Francia. Through generations of assimilation and mixing with the native Frankish and Roman-Gaulish populations, their descendants would gradually merge with the Carolingian-based cultures of West Francia. The distinct cultural and ethnic identity of the Normans emerged initially in the first half of the 10th century, and it continued to evolve over the succeeding centuries.', 'question': 'In what country is Normandy located?', 'is_impossible': 'False', 'answers': "[{'text': 'France', 'answer_start': 159}, {'text': 'France', 'answer_start': 159}, {'text': 'France', 'answer_start':

The Normans (Norman: Nourmands; French: Normands; Latin: Normanni) were the people who in the 10th and 11th centuries gave their name to Normandy, a region in France. They were descended from Norse ("Norman" comes from "Norseman") raiders and pirates from Denmark, Iceland and Norway who, under their leader Rollo, agreed to swear fealty to King Charles III of West Francia. Through generations of assimilation and mixing with the native Frankish and Roman-Gaulish populations, their descendants would gradually merge with the Carolingian-based cultures of West Francia. The distinct cultural and ethnic identity of the Normans emerged initially in the first half of the 10th century, and it continued to evolve over the succeeding centuries.
In what country is Normandy located?
[{'text': 'France', 'answer_start': 159}, {'text': 'France', 'answer_start': 159}, {'text': 'France', 'answer_start': 159}, {'text': 'France', 'answer_start': 159}]
Normandy is located in France.
The Normans (Norman: Nou