In [1]:
import torch
from transformers import AutoTokenizer, AutoModelForCausalLM, AutoModel
import torch.nn.functional as F

device = torch.device("cuda:0" if torch.cuda.is_available() else "cpu")
model_checkpoint = "C:/Users/jywun/Desktop/NYCU/模組/QA_ARAG/Llama3-ChatQA-1.5-8B"
tokenizer = AutoTokenizer.from_pretrained(model_checkpoint)
model = AutoModelForCausalLM.from_pretrained(model_checkpoint, torch_dtype=torch.float16).to(device)

  from .autonotebook import tqdm as notebook_tqdm
Special tokens have been added in the vocabulary, make sure the associated word embeddings are fine-tuned or trained.
Loading checkpoint shards: 100%|██████████| 2/2 [00:10<00:00,  5.44s/it]


In [2]:
def get_formatted_input(messages, context):
    system = "System: This is a chat between a user and an artificial intelligence assistant. The assistant gives helpful, detailed, and polite answers to the user's questions based on the context. The assistant should also indicate when the answer cannot be found in the context."
    instruction = "Please give a full and complete answer for the question."

    for item in messages:
        if item['role'] == "user":
            ## only apply this instruction for the first user turn
            item['content'] = instruction + " " + item['content']
            break

    conversation = '\n\n'.join(["User: " + item["content"] if item["role"] == "user" else "Assistant: " + item["content"] for item in messages]) + "\n\n Assistant:"
    formatted_input = system + "\n\n" + context + "\n\n" + conversation
    
    return formatted_input

In [3]:
messages = [
    {"role": "user", "content": "what is the percentage change of the net income from Q4 FY23 to Q4 FY24?"}
]
document = """NVIDIA (NASDAQ: NVDA) today reported revenue for the fourth quarter ended January 28, 2024, of $22.1 billion, up 22% from the previous quarter and up 265% from a year ago.\nFor the quarter, GAAP earnings per diluted share was $4.93, up 33% from the previous quarter and up 765% from a year ago. Non-GAAP earnings per diluted share was $5.16, up 28% from the previous quarter and up 486% from a year ago.\nQ4 Fiscal 2024 Summary\nGAAP\n| $ in millions, except earnings per share | Q4 FY24 | Q3 FY24 | Q4 FY23 | Q/Q | Y/Y |\n| Revenue | $22,103 | $18,120 | $6,051 | Up 22% | Up 265% |\n| Gross margin | 76.0% | 74.0% | 63.3% | Up 2.0 pts | Up 12.7 pts |\n| Operating expenses | $3,176 | $2,983 | $2,576 | Up 6% | Up 23% |\n| Operating income | $13,615 | $10,417 | $1,257 | Up 31% | Up 983% |\n| Net income | $12,285 | $9,243 | $1,414 | Up 33% | Up 769% |\n| Diluted earnings per share | $4.93 | $3.71 | $0.57 | Up 33% | Up 765% |"""

In [4]:
formatted_input = get_formatted_input(messages, document)
tokenized_prompt = tokenizer(tokenizer.bos_token + formatted_input, return_tensors="pt").to(model.device)

terminators = [
    tokenizer.eos_token_id,
    tokenizer.convert_tokens_to_ids("<|eot_id|>")
]

In [5]:
tokenized_prompt_recursive = tokenized_prompt.input_ids
max_length = 128
generated_tokens = []
generated_probabilities = []

model.eval()
with torch.no_grad():
    for _ in range(max_length):
        outputs = model(tokenized_prompt_recursive)
        logits = outputs.logits
        # 使用 softmax 函数将 logits 转换为概率分布
        probabilities = F.softmax(logits, dim=-1)
        # 从概率分布中采样一个标记
        next_token = torch.argmax(probabilities[:, -1, :], dim=-1).unsqueeze(-1)
        # 将生成的标记添加到生成的文本中
        generated_tokens.append(next_token.item())
        generated_probabilities.append(probabilities[:, -1, :].squeeze()[next_token])
        # 将生成的标记加入到输入中以生成下一个标记
        tokenized_prompt_recursive = torch.cat([tokenized_prompt_recursive, next_token], dim=1)
        if next_token.item() in terminators:
            break
        
# 将生成的标记转换为文本
generated_text = tokenizer.decode(generated_tokens)
print("Generated:",generated_text)

  attn_output = torch.nn.functional.scaled_dot_product_attention(


Generated: <|begin_of_text|> The percentage change of the net income from Q4 FY23 to Q4 FY24 is calculated using the formula (($12,285 million - $1,414 million) / $1,414 million * 100), resulting in a 769% increase.<|end_of_text|>


In [6]:
def find_below_threshold_indices(generated_tokens, generated_probabilities, threshold):
    below_threshold_indices = []

    for i, val in enumerate(generated_probabilities):
        if val < threshold:
            print(f"{i}: ",tokenizer.decode(generated_tokens[i]))
            below_threshold_indices.append(i)
    return below_threshold_indices

def remove_below_threshold_tokens(generated_tokens, generated_probabilities, threshold):
    below_threshold_indices = find_below_threshold_indices(generated_tokens, generated_probabilities, threshold)
    for i in reversed(below_threshold_indices):
        generated_tokens.pop(i)
    return generated_tokens
cloze = remove_below_threshold_tokens(generated_tokens, generated_probabilities, 0.4)
print("Generated:",tokenizer.decode(cloze))

50:  769
Generated: <|begin_of_text|> The percentage change of the net income from Q4 FY23 to Q4 FY24 is calculated using the formula (($12,285 million - $1,414 million) / $1,414 million * 100), resulting in a % increase.<|end_of_text|>


In [7]:
messages = [
    {"role": "user", "content": "what is the percentage change of the net income from Q4 FY23 to Q4 FY24?"}
]
document = """NVIDIA (NASDAQ: NVDA) today reported revenue for the fourth quarter ended January 28, 2024, of $22.1 billion, up 22% from the previous quarter and up 265% from a year ago.\nFor the quarter, GAAP earnings per diluted share was $4.93, up 33% from the previous quarter and up 765% from a year ago. Non-GAAP earnings per diluted share was $5.16, up 28% from the previous quarter and up 486% from a year ago.\nQ4 Fiscal 2024 Summary\nGAAP\n| $ in millions, except earnings per share | Q4 FY24 | Q3 FY24 | Q4 FY23 | Q/Q | Y/Y |\n| Revenue | $22,103 | $18,120 | $6,051 | Up 22% | Up 265% |\n| Gross margin | 76.0% | 74.0% | 63.3% | Up 2.0 pts | Up 12.7 pts |\n| Operating expenses | $3,176 | $2,983 | $2,576 | Up 6% | Up 23% |\n| Operating income | $13,615 | $10,417 | $1,257 | Up 31% | Up 983% |\n| Net income | $12,285 | $9,243 | $1,414 | Up 33% | Up 769% |\n| Diluted earnings per share | $4.93 | $3.71 | $0.57 | Up 33% | Up 765% |"""

In [8]:
def get_formatted_input_cloze(messages, context, generated_tokens):
    system = "System: This is an artificial intelligence assistant that can complete sentences with uncertain parts removed due to low confidence. The assistant provides contextually accurate answers to the user's questions."
    instruction = "Please complete answer based on context and user questions."
    generated_tokens = generated_tokens[1:-1]
    cloze = tokenizer.decode(generated_tokens)
    for item in messages:
        if item['role'] == "user":
            ## only apply this instruction for the first user turn
            item['content'] = instruction + " Question:" + item['content']
            break

    conversation = '\n\n'.join(["User: " + item["content"] if item["role"] == "user" else "Assistant: " + item["content"] for item in messages]) + " Answer:" +cloze + "\n\nAssistant:"
    formatted_input = system + "\n\n" + context + "\n\n" + conversation
    
    return formatted_input
output = get_formatted_input_cloze(messages, document, generated_tokens)
output

"System: This is an artificial intelligence assistant that can complete sentences with uncertain parts removed due to low confidence. The assistant provides contextually accurate answers to the user's questions.\n\nNVIDIA (NASDAQ: NVDA) today reported revenue for the fourth quarter ended January 28, 2024, of $22.1 billion, up 22% from the previous quarter and up 265% from a year ago.\nFor the quarter, GAAP earnings per diluted share was $4.93, up 33% from the previous quarter and up 765% from a year ago. Non-GAAP earnings per diluted share was $5.16, up 28% from the previous quarter and up 486% from a year ago.\nQ4 Fiscal 2024 Summary\nGAAP\n| $ in millions, except earnings per share | Q4 FY24 | Q3 FY24 | Q4 FY23 | Q/Q | Y/Y |\n| Revenue | $22,103 | $18,120 | $6,051 | Up 22% | Up 265% |\n| Gross margin | 76.0% | 74.0% | 63.3% | Up 2.0 pts | Up 12.7 pts |\n| Operating expenses | $3,176 | $2,983 | $2,576 | Up 6% | Up 23% |\n| Operating income | $13,615 | $10,417 | $1,257 | Up 31% | Up 98

In [9]:
output = get_formatted_input_cloze(messages, document, generated_tokens)
tokenized_prompt = tokenizer(tokenizer.bos_token + formatted_input, return_tensors="pt").to(model.device)

terminators = [
    tokenizer.eos_token_id,
    tokenizer.convert_tokens_to_ids("<|eot_id|>")
]
tokenized_prompt_recursive = tokenized_prompt.input_ids
max_length = 128
generated_tokens = []
generated_probabilities = []

model.eval()
with torch.no_grad():
    for _ in range(max_length):
        outputs = model(tokenized_prompt_recursive)
        logits = outputs.logits
        # 使用 softmax 函数将 logits 转换为概率分布
        probabilities = F.softmax(logits, dim=-1)
        # 从概率分布中采样一个标记
        next_token = torch.argmax(probabilities[:, -1, :], dim=-1).unsqueeze(-1)
        # 将生成的标记添加到生成的文本中
        generated_tokens.append(next_token.item())
        generated_probabilities.append(probabilities[:, -1, :].squeeze()[next_token])
        # 将生成的标记加入到输入中以生成下一个标记
        tokenized_prompt_recursive = torch.cat([tokenized_prompt_recursive, next_token], dim=1)
        if next_token.item() in terminators:
            break
        
# 将生成的标记转换为文本
generated_text = tokenizer.decode(generated_tokens)
print("Generated:",generated_text)

Generated: <|begin_of_text|> The percentage change of the net income from Q4 FY23 to Q4 FY24 is calculated using the formula (($12,285 million - $1,414 million) / $1,414 million * 100), resulting in a 769% increase.<|end_of_text|>
