In [None]:

# ---------------------------------------------------------
# Setup
# ---------------------------------------------------------

import math
import torch
from transformers import AutoTokenizer, AutoModelForCausalLM
from collections import defaultdict
import re
import itertools
import numpy as np
from datasets import load_dataset
from tqdm import tqdm
import datetime
import pandas as pd
from transformers import logging
logging.set_verbosity_error()
import math


class LMHeadModel:
    def __init__(self, model_name, device="cuda" if torch.cuda.is_available() else "cpu"):
        # Initialize the model and tokenizer
        self.device = device
        self.model = AutoModelForCausalLM.from_pretrained(model_name, torch_dtype=torch.float32).to(self.device)
        self.tokenizer = AutoTokenizer.from_pretrained(model_name)
        self.model.eval()

        # Ensure the tokenizer has a padding token
        if self.tokenizer.pad_token is None:
            self.tokenizer.pad_token = self.tokenizer.eos_token  # Use EOS token as padding
            self.tokenizer.padding_side = "right"

        self.batch_prediction_count = 0


    def batch_encode(self, sentences):
        """
        Encodes a batch of sentences into input tensors.
        Args:
            sentences (list of str): The input sentences to encode.
        Returns:
            inputs (dict): A dictionary of tokenized inputs ready for the model.
        """
        return self.tokenizer(
            sentences,
            return_tensors="pt",
            padding=True,  # Pad to the longest sequence in the batch
            truncation=True,  # Truncate sequences longer than the model's max length
        ).to(self.device)

    def batch_decode(self, token_ids):
        """
        Decodes a batch of token IDs back to sentences.
        Args:
            token_ids (torch.Tensor): A tensor of token IDs to decode.
        Returns:
            decoded_sentences (list of str): The decoded sentences.
        """
        return self.tokenizer.batch_decode(token_ids, skip_special_tokens=True)
    def batch_decode_top_k(self, token_ids_batch, tokenizer):
        """
        Decodes token IDs to meaningful text while merging subword tokens.
        Args:
            token_ids_batch (torch.Tensor): A batch of token IDs (e.g., from `topk`).
            tokenizer: The tokenizer used for encoding/decoding.
        Returns:
            list of list of str: Decoded tokens (words/subwords) for each sequence in the batch.
        """
        decoded_tokens = []
        for token_ids in token_ids_batch:
            # Decode each token ID in the batch, joining subwords correctly
            tokens = [tokenizer.decode([token_id]).strip() for token_id in token_ids]
            decoded_tokens.append(tokens)
        return decoded_tokens

    def get_batch_predictions(self, sentences, top_k=100):
        """
        Predicts the next tokens for a batch of input sentences.
        Args:
            sentences (list of str): The input sentences.
            top_k (int): Number of top tokens to return for each sentence.
        Returns:
            predictions (list of list of tuples): Top-k token predictions for each sentence.
        """
        #Increment to see how many times this function is called after a given layer of trellis.
        self.batch_prediction_count += 1


        # Tokenize inputs
        inputs = self.batch_encode(sentences)

        # Pass through the model
        with torch.no_grad():
            outputs = self.model(**inputs,use_cache = False)

        # Get logits for the last token in each sequence
        logits = outputs.logits[:, -1, :]  # Shape: (batch_size, vocab_size)


        # Compute probabilities using softmax
        probs = torch.nn.functional.softmax(logits, dim=-1)
        top_probs, top_token_ids = torch.topk(probs, k=top_k, dim=-1)
        top_tokens = self.batch_decode_top_k(top_token_ids, self.tokenizer)


        predictions = [
            [(token, prob.item()) for token, prob in zip(top_tokens[i], top_probs[i]) if token and token != "\n"]
            for i in range(len(sentences))
        ]
        return predictions

    def get_batch_prediction_count(self):
        """
        Returns the number of times batch predictions have been made.
        """
        return self.batch_prediction_count

    def reset_batch_prediction_count(self):
        """ Resets the count
        """

        self.batch_prediction_count = 0
class SearchTree:
    def __init__(self,context,probability,token_id,model,tokenizer,parent = None,child = None,parent_index = None):
        self.token_id = token_id
        context = context.strip()
        self.context = context
        self.probability = probability
        self.parent = parent
        self.child = []
        self.parent_index = parent_index  # newly created.
        if child is not None:
           self.child.append(child)
        
        # Cache cumulative probability at node creation
        if parent:
            self.cached_prob = parent.calcProbTillNow()+probability #parent.calcProbTillNow() * probability
        else:
            self.cached_prob = probability

    def build_Context(self):
        context_list = []
        full_context = []
        node = self
        while node.parent is not None:
            context_list.extend([node.token_id])
            node = node.parent
        context_list.reverse()
        full_context.extend(node.token_id)
        full_context.extend(context_list)
        
        # formatted_contextList = []
        # for i in range(len(context_list)):
        #     if context_list[i] in ['.',':',',','?','!',';'] or ("'" in context_list[i]):
        #         if (i-1>= 0):
        #             if context_list[i-1] not in  ['.',':',',','?','!',';'] and ("'" not in context_list[i-1]):#if two consecutive contexts are , ' etc.
        #                 word = context_list[i-1]+context_list[i]

        #                 formatted_contextList.remove(context_list[i-1])
        #                 formatted_contextList.append(word)
        #             else:
        #                 formatted_contextList.append(context_list[i])
        #     else:

        #         formatted_contextList.append(context_list[i])
        # return ' '.join(formatted_contextList)
        generated_sentence = tokenizer.decode(full_context, skip_special_tokens=True)
        return generated_sentence


    def create_child(self):
        if self.parent is not None:
           self.parent.child.append(self)

    def replace_parent(self, new_parent):
        """Assign a new parent and update cached probability."""
        self.parent = new_parent
        self.cached_prob = new_parent.calcProbTillNow() * self.probability
    

    def calcProbTillNow(self):
        """Return cached cumulative probability to avoid redundant calculations."""
        return self.cached_prob
    
    def change_probability(self,new_probability):
        self.cached_prob = self.cached_prob - self.probability
        self.probability = new_probability
        self.cached_prob += self.probability

    # def calcProbTillNow(self):
    #   prob = self.probability
    #   node = self
    #   while node.parent is not None:
    #     prob = prob*node.parent.probability
    #     node = node.parent
    #   return prob    #can make this negative log probability.

    def assign_parent_index(self,parent_index):
      self.parent_index = parent_index


def findProbability(InitialToken, FinalTokens, model,tokenizer):
    context = InitialToken.build_Context()
    # tokens_50K = model.get_batch_predictions([context], 500)
    tokens_50K = generate_token_and_probability(model, tokenizer, [context], top_k=500)
    token_dict = {}  # Dictionary to store only the first occurrence of each token

    for _,token_id,token, prob in tokens_50K[0]:
        # token = token.strip()
        if token_id.item() not in token_dict or prob>token_dict[token_id.item()]:  # Store only the first occurrence
            token_dict[token_id.item()] = prob
    return [token_dict.get(FinalToken.token_id, -math.inf) for FinalToken in FinalTokens]  # Return probability if found, else 0


def VITERBI_Lists(state_transition_probmat, initial_state_prob):

    viterbi_mat = []
    backpointer = []
    viterbi_1stLayer = []
    for i in range(len(initial_state_prob)):
        viterbi_1stLayer.append(float(initial_state_prob[i]))
    viterbi_mat.append(viterbi_1stLayer)

    for time_step in range(len(state_transition_probmat)):
        viterbi_layer = []
        backpointer_layer = []
        for state in range(len(state_transition_probmat[time_step])):
            iteration_vec = [viterbi_mat[time_step][i]+state_transition_probmat[time_step][state][i] for i in range(len(viterbi_mat[time_step]))]

            maxval = max(iteration_vec)
            maxind = iteration_vec.index(maxval)
            viterbi_layer.append(maxval)
            backpointer_layer.append(maxind)

        viterbi_mat.append(viterbi_layer)
        backpointer.append(backpointer_layer)

    best_path_prob = max(viterbi_mat[-1])
    # max_index = max(range(len(viterbi_mat[-1])), key = lambda i: viterbi_mat[-1][i])
    max_index = viterbi_mat[-1].index(best_path_prob)
    best_backpointer = max_index
    best_path = [best_backpointer]
    j = 0
    for i in reversed(range(len(state_transition_probmat))):
        best_path.append(backpointer[i][best_path[j]])
        j += 1
    best_path = best_path[::-1]
    return best_path, viterbi_mat,best_path_prob
def decodePath(best_path,unique_tokens_list,root_string,tokenizer):
    resultant_token_ids = []

    for i in range(len(best_path)):
      #   print("decode path: ")
      #   print("i: ", i)
      #   print(unique_tokens_list[i][best_path[i]])
      #   if unique_tokens_list[i][best_path[i]] in ['.',':',',','?','!',';']:
      #         if (i-1>= 0):
      #             resultant_string = resultant_string+unique_tokens_list[i][best_path[i]]
      #   elif "'" in unique_tokens_list[i][best_path[i]]:
      #           resultant_string = resultant_string + unique_tokens_list[i][best_path[i]]
      #   else:
      #         resultant_string = resultant_string + ' '+ unique_tokens_list[i][best_path[i]]
      token_id_to_add=(unique_tokens_list[i][best_path[i]]).token_id
      resultant_token_ids.append(token_id_to_add)
    print("Generated Token Ids: ", resultant_token_ids)
    generated_sentence = tokenizer.decode(resultant_token_ids, skip_special_tokens=True)


    return root_string + generated_sentence

def generate_token_and_probability(model, tokenizer, batch_prompts, probabilityMatrix,uniqueTokensList,max_length=1, top_k=3):
    tokenizer.pad_token= tokenizer.eos_token
    tokenized_result = tokenizer(batch_prompts, return_tensors="pt",padding = True,truncation = True)
    input_ids = tokenized_result["input_ids"].to(model.device)
    for i in range(len(input_ids)):
        if input_ids[i][-1] == 50256: #end of text token or the pad token

    attn_mask = tokenized_result["attention_mask"].to(model.device)
    outputs = model.generate(
        input_ids=input_ids,
        attention_mask=attn_mask,
        max_length=input_ids.size(-1) + max_length,
        do_sample=False,  # Greedy decoding
        output_scores=True, 
        return_dict_in_generate=True
    )

    sequences, scores = outputs.sequences, outputs.scores  # scores will have only one element per batch
    predictions = []
    # print("generated_token_id",generated_token_id)
    for i in range(len(batch_prompts)):
        generated_token_id = sequences[i][input_ids.size(-1):].tolist()[0]  # Extract generated token ID
        generated_token = tokenizer.decode(generated_token_id, skip_special_tokens=True)
       

        # Log probabilities of all possible tokens at the generated step
        log_probs = torch.nn.functional.log_softmax(scores[0][i], dim=-1)  # scores[0] corresponds to the single generation step
        topk_logprobs, topk_ids = log_probs.topk(top_k)  # Get top-k log probabilities
        topk_tokens = tokenizer.batch_decode(topk_ids, skip_special_tokens=True)

        predictions.append([
            (generated_token,token_id,tok,lp.item()) for token_id,tok, lp in zip(topk_ids,topk_tokens, topk_logprobs)
        ])

    return predictions


def check_bad_predictions(text):
    bad_patterns = [r'={2,}', r'!{2,}', r'\?{2,}', r',{2,}', r';{2,}', r'\|{2,}', r'~{2,}', r'&{2,}', r'-{2,}']
    
    # Check for unwanted punctuation patterns (two or more consecutive occurrences)
    for pattern in bad_patterns:
        if re.search(pattern, text):
            return True
    
    # Check for non-ASCII characters
    if any(ord(char) > 127 for char in text):  # ASCII characters are in the range 0-127
        return True
    
    return False

def generateIntermediates(root,model,tokenizer,numTokens = 3, loop_runner = 4): 
  root_token_id = tokenizer.encode(root)
  print("root_token_id: ", root_token_id)
  sentence = SearchTree(root,0,token_id =root_token_id,model = model, tokenizer = tokenizer)
  context = []
  prob_list = []
  num_tokens = numTokens
  content = []
  probability = []

  tokens_50K = generate_token_and_probability(model, tokenizer, [root], top_k=numTokens) #arbitrarly setting +1
  children = []
  overlap = []
  most_common = []
  #unique_elements = []   # to store unique elements at each iteration
  unique_tokens = set()
  probabilityMatrix = []
  uniqueTokensList = []
  new_content = []
  uniqueTokenLength = []

  flops_counter = {}
  cached_probs = {}
  batch_size = 75
  holdout_number = 15
  for i in range(num_tokens):
    _,token_id,context,prob = tokens_50K[0][i]  # Assuming it's structured as a tuple (best_token, token, probability)
    context = context.strip()  #This is not the correct solution. I am doing this rather than only leaving one strip command in search tree because I am appending to unique tokens before I am assigning this to search tree. 
    # context2 = context.strip()
    # bad_prediction_checker = check_bad_predictions(context2)
    print("initial loop")
    print(tokens_50K[0][i])
    # if context2:
    unique_tokens.add(context)
    probability.append(prob)  
    context = SearchTree(context,prob,token_id = token_id.item(),model = model,tokenizer = tokenizer,parent =sentence,parent_index = 0)
    new_content.append(context)
    context.create_child()
    uniqueTokensList.append(context)
    children.append(context)

  content.append(new_content)
  previousUniqueLength = num_tokens
  #unique_elements.append(unique_tokens)
  initialStateProbability = probability 
  uniqueTokenLength.append(num_tokens)
  for i in range(2,loop_runner):
    unique_tokens = set()
    probability = []
    new_content = []
    total_predictions = []
    previousSetLength = 0
    batch_sentences = [child.build_Context() for child in uniqueTokensList]
    print("batch_sentences: ", batch_sentences)
    if len(batch_sentences)>batch_size:
        batch_sentences2 = batch_sentences[0:-holdout_number]
        batch_predictions =  generate_token_and_probability(model, tokenizer, batch_sentences2, top_k=numTokens)
        total_predictions = []
        total_predictions.extend(batch_predictions)
        batch_predictions1 = generate_token_and_probability(model, tokenizer, batch_sentences[-holdout_number:], top_k=numTokens)
        total_predictions.extend(batch_predictions1)
    else:
        total_predictions = generate_token_and_probability(model, tokenizer, batch_sentences, top_k=numTokens)

    for j in range(len(uniqueTokensList)):
      print("j: ", j)
      for s in range(num_tokens):
        print("s: ", s)
        print("total_predictions[j][s]: ", total_predictions[j][s])
        _,token_id,context,prob = total_predictions[j][s]
        context2 = context.strip()
        #bad_predictions_checker = check_bad_predictions(context2)
        # if context2:
        unique_tokens.add(context)   # also this if condition is not the correct solution
        context = SearchTree(context,prob,token_id = token_id.item(),model = model,tokenizer = tokenizer,parent = uniqueTokensList[j])   #probably redundant: Because I should only create SearchTree of unique tokens
        # context.create_child() Removed this 2/19/2025
        if (len(unique_tokens)>previousSetLength):
          previousSetLength = len(unique_tokens)
          uniqueTokensList.append(context)
          new_content.append(context)


    #unique_elements.append(unique_tokens) # append the unique tokens list at each iteration to unique_elements list
    content.append(new_content) # for storing tokens which will pass to the decode_path function.

   
    comb_prob = []
    for prevToken in uniqueTokensList[:previousUniqueLength]:
      comb_prob.append(findProbability(prevToken,uniqueTokensList[previousUniqueLength:], model,tokenizer))
    comb_prob = list(itertools.chain(*comb_prob)) # flattening the list

    for tokenumber,newToken in enumerate(uniqueTokensList[previousUniqueLength:]):
      probs = [comb_prob[a*len(uniqueTokensList[previousUniqueLength:]) + tokenumber] for a in range(len(uniqueTokensList[:previousUniqueLength]))]
      probs2 = [probs[i] + uniqueTokensList[:previousUniqueLength][i].calcProbTillNow() for i in range(len(probs))]
      #   print("parent_prob Up Till now: ",[uniqueTokensList[:previousUniqueLength][i].calcProbTillNow() for i in range(len(probs))])
      #   print("combined probs: ", probs2)
      #   print("actual_probs: ", [math.exp(probs2[i]) for i in range(len(probs2))])
      if not probs2:
        continue
      else:
        max_value = max(probs2)
        max_index = probs2.index(max_value)
        newToken.replace_parent(uniqueTokensList[:previousUniqueLength][max_index])
        newToken.change_probability(max_value) # just added this 3/27/2025
        newToken.assign_parent_index(max_index)
      probability.append(probs)
    probabilityMatrix.append(probability)
    # flops_counter[i-1] = model.get_batch_prediction_count()
    #model.reset_batch_prediction_count()


    uniqueTokenLength.append(len(uniqueTokensList[previousUniqueLength:]))

    previousUniqueLength = len(uniqueTokensList[previousUniqueLength:])
    uniqueTokensList = uniqueTokensList[len(uniqueTokensList)-previousUniqueLength:]

  return probabilityMatrix, initialStateProbability, content,uniqueTokenLength #, flops_counter
def runViterbiTransformerPipeline(rootSentence, numTokens = 3, loop_runner=3):
    model = AutoModelForCausalLM.from_pretrained("gpt2")

    tokenizer = AutoTokenizer.from_pretrained("gpt2")

    probabilityMatrix,initialStateProbability,content,uniqueTokenLength = generateIntermediates(rootSentence,model,tokenizer,numTokens = numTokens,loop_runner =loop_runner+1)
    best_path,viterbi_mat,best_path_prob = VITERBI_Lists(probabilityMatrix, initialStateProbability)
    # print('content: ',content)
    decodedString = decodePath(best_path,content,rootSentence,tokenizer)
    return decodedString,best_path_prob
def runTransformerPipeline(rootSentence,loop_runner = 3):
  model = LMHeadModel("gpt2")
  prob = 1
  finalSentence = rootSentence
  for i in range(loop_runner):
    tokens_50K = model.get_batch_predictions([finalSentence])

    context = tokens_50K[0][0][0]
    prob =  prob*tokens_50K[0][0][1]
    if context in ['.',':',',','?','!',';'] or "'" in context:
      finalSentence += context

    else:
      finalSentence = finalSentence + ' ' + context
  return finalSentence,prob

def gather_log_probabilities(logits: torch.Tensor, labels: torch.LongTensor) -> torch.Tensor:
    """Gather log probabilities of the given labels from the logits."""
    log_probs = torch.nn.functional.log_softmax(logits.float(), dim=-1)
    log_probs_labels = log_probs.gather(dim=-1, index=labels.unsqueeze(dim=-1))
    return log_probs_labels.squeeze(dim=-1)



In [80]:
model = AutoModelForCausalLM.from_pretrained("gpt2")
tokenizer = AutoTokenizer.from_pretrained("gpt2")

Mydecoder_text,decoder_prob = runViterbiTransformerPipeline("I enjoy walking in the park.\n\n", loop_runner = 1)
print("Mydecoder_text: ",Mydecoder_text)
print(math.exp(decoder_prob))
# -- Greedy decoding
tokenized_result = tokenizer("I enjoy walking in the park.\n\n",return_tensors = "pt")
input_ids = tokenized_result["input_ids"]
#= Valkyria Chronicles III = \n \n The
print("input ids: ", input_ids)
attn_mask = tokenized_result['attention_mask']
greedy_ids = model.generate(
    input_ids=input_ids,
    max_length= len(input_ids[0])+2,
    do_sample=False,  # Greedy
    attention_mask = attn_mask
)
print("greedy_ids: ",greedy_ids)
greedy_text = tokenizer.decode(greedy_ids[0], skip_special_tokens=True)
print("greedy_text: ", greedy_text)


root_token_id:  [40, 2883, 6155, 287, 262, 3952, 13, 628]
initial loop
('\n', tensor(198), '\n', -0.0003575639275368303)
initial loop
('\n', tensor(13), '.', -9.920859336853027)
initial loop
('\n', tensor(357), ' (', -10.000357627868652)
Generated Token Ids:  [198]
Mydecoder_text:  I enjoy walking in the park.



0.9996424999908258
input ids:  tensor([[  40, 2883, 6155,  287,  262, 3952,   13,  628]])
greedy_ids:  tensor([[  40, 2883, 6155,  287,  262, 3952,   13,  628,  198,   40]])
greedy_text:  I enjoy walking in the park.


I


In [91]:
batch_sentences = ['I enjoy walking in the park, but','I enjoy walking in the park, and', 'I enjoy walking in the park. I', 'I enjoy walking in the park and seeing', 'I enjoy walking in the park and it', 'I enjoy walking in the park. It', 'I enjoy walking in the park.\n', 'I enjoy walking in the streets of New', 'I enjoy walking in the streets of the', 'I enjoy walking in the streets of London']
batch_predictions =  generate_token_and_probability(model, tokenizer, batch_sentences, top_k=3)

for k in range(len(batch_sentences)):
    print(batch_predictions[k])

batch_sentences2 = ["Hi How are you", "I am good"]
tokenizer.pad_token= tokenizer.eos_token

tokenized_result = tokenizer(batch_sentences2, return_tensors="pt",padding = True,truncation = True)
s = tokenized_result["input_ids"][1][-1] == tokenizer.decode(1)
if s:
    print("YES")

print(tokenizer.decode(50256))


[(' I', tensor(314), ' I', -1.1460292339324951), (' I', tensor(340), ' it', -2.455843687057495), (' I', tensor(618), ' when', -3.269465208053589)]
[(' I', tensor(314), ' I', -1.2076544761657715), (' I', tensor(340), ' it', -2.942868709564209), (' I', tensor(262), ' the', -3.3185324668884277)]
[(' love', tensor(1842), ' love', -2.1047072410583496), (' love', tensor(588), ' like', -2.1849684715270996), (' love', tensor(1101), "'m", -2.600236415863037)]
[(' the', tensor(262), ' the', -1.2142137289047241), (' the', tensor(661), ' people', -2.660304546356201), (' the', tensor(477), ' all', -2.892085552215576)]
[("'s", tensor(338), "'s", -0.7176357507705688), ("'s", tensor(318), ' is', -2.272681713104248), ("'s", tensor(1838), ' makes', -2.7028956413269043)]
[("'s", tensor(338), "'s", -0.49828243255615234), ("'s", tensor(318), ' is', -2.229475975036621), ("'s", tensor(1838), ' makes', -3.3147878646850586)]
[('\n', tensor(198), '\n', -0.008988158777356148), ('\n', tensor(40), 'I', -6.90969181

In [None]:
model2 = LMHeadModel("gpt2")
tokens = model2.get_batch_predictions(["I enjoy walking in the park.\n"])
print(tokens)

[[("'s", 0.6075733304023743), ('is', 0.10758479684591293), ('makes', 0.03634175658226013), ('feels', 0.02430432289838791), ('gives', 0.020894328132271767), ('has', 0.01991804502904415), ('was', 0.014500156976282597), ('reminds', 0.010581542737782001), ('really', 0.010477031581103802), ('helps', 0.007916023954749107), ('can', 0.006842233706265688), ('doesn', 0.0053792246617376804), ('keeps', 0.004913671407848597), ('takes', 0.004836901556700468), ('gets', 0.004592712037265301), ('seems', 0.004339112900197506), ('allows', 0.004170210566371679), ('just', 0.003746775444597006), ('would', 0.0037268472369760275), ('means', 0.0032763087656348944), ('does', 0.003250489942729473), ('brings', 0.003102917456999421), ('will', 0.0028180910740047693), ('looks', 0.0025168289430439472), ('provides', 0.002448026556521654), ('always', 0.002390843816101551), ('also', 0.002261810004711151), ('isn', 0.0019119289936497808), ('adds', 0.0015579728642478585), ('definitely', 0.0012738914228975773), ('offers', 0

In [91]:
answer=generate_token_and_probability(model,tokenizer,[" Unlike its two predecessors , Valkyria Chronicles III was not released in the west . According to Sega , this was due to"])
print(answer)
print(answer[0][1])
print(answer[0][2])

Setting `pad_token_id` to `eos_token_id`:50256 for open-end generation.


[[(' the', ' the', -1.3555811643600464), (' the', ' a', -1.9958933591842651), (' the', ' an', -3.4174938201904297)]]
(' the', ' a', -1.9958933591842651)
(' the', ' an', -3.4174938201904297)


In [222]:
wikitext = load_dataset("wikitext", "wikitext-103-v1")
prompts = []

for text in wikitext["train"]["text"][:1000]:
    if len(text) > 0:
        # Use regex to find word boundaries
        matches = list(re.finditer(r'\b\w+\b', text))
        if len(matches) >= 20:
            # Get the end position of the 20th word
            end_pos = matches[19].end()
            prompt = text[:end_pos]
            prompts.append(prompt)
        else:
            prompts.append(text)
print(prompts)
print(len(prompts))

[' = Valkyria Chronicles III = \n', ' Senjō no Valkyria 3 : <unk> Chronicles ( Japanese : 戦場のヴァルキュリア3 , lit . Valkyria of the Battlefield 3 ) , commonly referred to as Valkyria Chronicles', ' The game began development in 2010 , carrying over a large portion of the work done on Valkyria Chronicles II . While', ' It met with positive sales in Japan , and was praised by both Japanese and western critics . After release , it received', ' = = Gameplay = = \n', ' As with previous <unk> Chronicles games , Valkyria Chronicles III is a tactical role @-@ playing game where players take control of', " The game 's battle system , the <unk> system , is carried over directly from <unk> Chronicles . During missions , players select each", ' Troops are divided into five classes : Scouts , <unk> , Engineers , Lancers and Armored Soldier . Troopers can switch classes by changing their', ' = = Plot = = \n', ' The game takes place during the Second Europan War . Gallian Army Squad 422 , also known as " T

In [143]:
model_name = "gpt2"
tokenizer = AutoTokenizer.from_pretrained(model_name)
model = AutoModelForCausalLM.from_pretrained(model_name).to(model.device)

prompts = ["I enjoy walking in the"]
results = generate_token_and_probability(model, tokenizer, prompts, max_length=3, top_k=5)

for res in results:
    print(res)


[(' park', ' park', -1.8385964632034302), (' park', ' woods', -2.2997779846191406), (' park', ' streets', -3.1739540100097656), (' park', ' dark', -3.468158721923828), (' park', ' door', -3.519336700439453)]


In [None]:
model_name = "gpt2"
tokenizer = AutoTokenizer.from_pretrained(model_name)
model = AutoModelForCausalLM.from_pretrained(model_name)
# model2 = LMHeadModel(model_name)
model.eval()  # put model in inference mode

# If using GPU (e.g., on Colab), you could also do:
# device = torch.device("cuda" if torch.cuda.is_available() else "cpu")
# model.to(device)



# ---------------------------------------------------------
# Function to generate text using different decoders
# ---------------------------------------------------------
def generate_texts(model, tokenizer, prompt, max_length=40):
    """Generate text from a prompt using different decoding strategies."""
    tokenized_result = tokenizer(prompt,return_tensors = "pt")
    input_ids = tokenized_result["input_ids"]
    # If on GPU, uncomment next line:
    # input_ids = input_ids.to(device)
    attn_mask = tokenized_result['attention_mask']
    

    # -- Greedy decoding
    greedy_ids = model.generate(
        input_ids=input_ids,
        max_length=max_length,
        do_sample=False,  # Greedy
        attention_mask = attn_mask
    )
    greedy_text = tokenizer.decode(greedy_ids[0], skip_special_tokens=True)

    # -- Beam search
    beam_ids = model.generate(
        input_ids=input_ids,
        max_length=max_length,
        num_beams=3,    # for example
        early_stopping=True,
        attention_mask = attn_mask
    )
    beam_text = tokenizer.decode(beam_ids[0], skip_special_tokens=True)

    # -- Top-k sampling
    topk_ids = model.generate(
        input_ids=input_ids,
        max_length=max_length,
        do_sample=True,
        top_k=50,  # for example
        attention_mask = attn_mask
    )
    topk_text = tokenizer.decode(topk_ids[0], skip_special_tokens=True)

    # -- Nucleus (top-p) sampling
    topp_ids = model.generate(
        input_ids=input_ids,
        max_length=max_length,
        do_sample=True,
        top_p=0.9,  # for example
        attention_mask = attn_mask
    )
    topp_text = tokenizer.decode(topp_ids[0], skip_special_tokens=True)

    Mydecoder_text,decoder_prob = runViterbiTransformerPipeline(prompt, loop_runner = 5)
    Mygreedy_text,Mygreedy_prob  =runTransformerPipeline(prompt,loop_runner = 5)

    return {
        "greedy": greedy_text,
        "beam": beam_text,
        "topk": topk_text,
        "topp": topp_text,
        "ourDecoder": Mydecoder_text,
        "ourGreedy": Mygreedy_text

    }



# ---------------------------------------------------------
# Function to compute perplexity of a string
# ---------------------------------------------------------
def compute_perplexity(model, tokenizer, text):
    """Compute perplexity of `text` under `model`."""
    encodings = tokenizer(text, return_tensors="pt")
    input_ids = encodings.input_ids
    # If on GPU, uncomment next line:
    # input_ids = input_ids.to(device)

    with torch.no_grad():
        # The model returns a tuple of (loss, logits, ...)
        outputs = model(input_ids, labels=input_ids)
        # outputs.loss is the average cross-entropy across tokens
        neg_log_likelihood = outputs.loss.item()

    perplexity = math.exp(neg_log_likelihood)
    return perplexity

# ---------------------------------------------------------
# Main loop: generate text, then compute perplexities
# ---------------------------------------------------------
all_results = []

#prompts = ["I enjoy walking in the"]
for prompt in prompts[0]:

    input_ids = tokenizer(prompt, return_tensors = "pt")["input_ids"]
    length = len(input_ids[0])

    # Generate text via different decoding methods
    gen_texts = generate_texts(model, tokenizer, prompt,max_length = length+5)

    # Compute perplexities of the generated texts
    results_for_prompt = {"prompt": prompt}
    for method, text in gen_texts.items():
        ppl = compute_perplexity(model, tokenizer, text)
        results_for_prompt[f'{method}_text'] = text
        results_for_prompt[f'{method}_ppl'] = ppl

    # Store results
    all_results.append(results_for_prompt)

# ---------------------------------------------------------
# Printing the Results
for res in all_results:
    print(f"Prompt: {res['prompt']}")
    print(f"  Greedy PPL: {res['greedy_ppl']:.2f}")
    print(f"  Beam   PPL: {res['beam_ppl']:.2f}")
    print(f"  Top-k  PPL: {res['topk_ppl']:.2f}")
    print(f"  Top-p  PPL: {res['topp_ppl']:.2f}")
    print(f" Viterbi PPL: {res['ourDecoder_ppl']:.2f}")
    print(f"OurGreedy  PPL: {res["ourGreedy_ppl"]:.2f}")
    print(f"Greedy answer: {res["greedy_text"]}")

    print(f"ourGreedy answer: {res["ourGreedy_text"]}")
    print(f"ourDecoder answer: {res["ourDecoder_text"]}")
    print(f"Beam answer: {res['beam_text']}")


# ---------------------------------------------------------
# Save results to CSV
# ---------------------------------------------------------
# results_df = pd.DataFrame(all_results)
# timestamp = datetime.datetime.now().strftime("%Y%m%d_%H%M%S")
# output_file = f"C:/Users/jivesh/Desktop/SeniorThesis/decoder_comparison_{timestamp}.csv"
# results_df.to_csv(output_file, index=False)
# print(f"Results saved to {output_file}")

In [217]:
import pandas as pd

# Load the CSV file
df = pd.read_csv(output_file)  

# Count occurrences where ourDecoder_ppl > beam_ppl and ourDecoder_ppl > greedy_ppl
count_beam = ((df["ourDecoder_ppl"] < df["beam_ppl"])).sum()  
count_greedy = (df["ourDecoder_ppl"] < df["greedy_ppl"]).sum()

print("Number of times ourDecoder_ppl is greater than beam_ppl ", count_beam)
print("Number of times ourDecoder_ppl is greater than greedy_ppl ", count_greedy)


Number of times ourDecoder_ppl is greater than beam_ppl  2
Number of times ourDecoder_ppl is greater than greedy_ppl  2


In [None]:
perplexity = compute_perplexity(model,tokenizer,greedy_text)
print(perplexity)

58.50249398366958


In [38]:
prompt = prompts[0]
result = re.findall(r'\w+|[.,!?;'']', prompt)
print(result)
input_ids = tokenizer(prompt, return_tensors = "pt")["input_ids"]
print(len(input_ids[0]))
print(len(result))


['Valkyria', 'Chronicles', 'III']
9
3


In [47]:
import torch
from transformers import AutoModelForCausalLM, AutoTokenizer
import itertools

class SearchTree:
    def __init__(self, context, probability, parent=None, parent_index=None):
        self.context = context
        self.probability = probability
        self.parent = parent
        self.parent_index = parent_index
        self.children = []
    
    def create_child(self):
        return self
    
    def build_Context(self):
        if self.parent is None:
            return self.context
        else:
            return self.parent.build_Context() + self.context
    
    def calcProbTillNow(self):
        if self.parent is None:
            return self.probability
        else:
            return self.probability * self.parent.calcProbTillNow()
    
    def replace_parent(self, new_parent):
        self.parent = new_parent
        if new_parent is not None:
            new_parent.children.append(self)
    
    def assign_parent_index(self, index):
        self.parent_index = index

def findProbability(prevToken, newTokens, model_tokenizer_tuple):
    model, tokenizer = model_tokenizer_tuple
    
    probs = []
    batch_sentences = [prevToken.build_Context() + newToken.context for newToken in newTokens]
    
    # Tokenize inputs
    inputs = tokenizer(batch_sentences, return_tensors="pt", padding=True)
    input_ids = inputs["input_ids"].to(model.device)
    attention_mask = inputs["attention_mask"].to(model.device)
    
    # Pass through the model
    with torch.no_grad():
        outputs = model(input_ids=input_ids, attention_mask=attention_mask)
    
    # Get logits for the last token in each sequence
    last_token_indices = attention_mask.sum(dim=1) - 1
    batch_size = input_ids.shape[0]
    logits = outputs.logits[torch.arange(batch_size), last_token_indices]
    
    # Compute probabilities using softmax
    probs_tensor = torch.nn.functional.softmax(logits, dim=-1)
    
    # Get the probability of the specific token that was generated
    for i, newToken in enumerate(newTokens):
        token_id = tokenizer.encode(newToken.context, add_special_tokens=False)[-1]
        if token_id < probs_tensor.shape[1]:
            probs.append(probs_tensor[i, token_id].item())
        else:
            probs.append(0.0)  # Fallback if token ID is out of range
    
    return probs

def generateIntermediates(root, numTokens=3, loop_runner=4):
    # Initialize transformers model and tokenizer directly
    model_name = "gpt2"
    tokenizer = AutoTokenizer.from_pretrained(model_name)
    model = AutoModelForCausalLM.from_pretrained(model_name)
    model.eval()
    device = "cuda" if torch.cuda.is_available() else "cpu"
    model = model.to(device)
    
    # Ensure padding token is set
    if tokenizer.pad_token is None:
        tokenizer.pad_token = tokenizer.eos_token
        tokenizer.padding_side = "right"
    
    # Model and tokenizer tuple for probability calculations
    model_tokenizer = (model, tokenizer)
    
    sentence = SearchTree(root, 1)
    context = []
    prob_list = []
    num_tokens = numTokens
    content = []
    probability = []
    children = []
    overlap = []
    most_common = []
    unique_tokens = set()
    probabilityMatrix = []
    uniqueTokensList = []
    new_content = []
    uniqueTokenLength = []
    
    flops_counter = {}
    batch_size = 75
    holdout_number = 15
    
    # Get initial predictions
    input_ids = tokenizer(sentence.context, return_tensors="pt").input_ids.to(device)
    with torch.no_grad():
        outputs = model(input_ids)
    
    # Get logits for the last token
    logits = outputs.logits[:, -1, :]
    probs = torch.nn.functional.softmax(logits, dim=-1)
    top_probs, top_indices = torch.topk(probs, k=numTokens+3)
    
    # Process initial tokens
    for i in range(num_tokens):
        token_id = top_indices[0, i].item()
        token_text = tokenizer.decode([token_id]).strip()
        token_prob = top_probs[0, i].item()
        
        unique_tokens.add(token_text)
        new_content.append(token_text)
        probability.append(token_prob)
        
        context = SearchTree(token_text, token_prob, sentence, parent_index=0)
        context.create_child()
        uniqueTokensList.append(context)
        children.append(context)
    
    content.append(new_content)
    previousUniqueLength = num_tokens
    initialStateProbability = probability
    uniqueTokenLength.append(num_tokens)
    
    # Main loop for building the trellis
    for i in range(2, loop_runner):
        unique_tokens = set()
        probability = []
        new_content = []
        previousSetLength = 0
        
        # Prepare batch sentences
        batch_sentences = [child.build_Context() for child in uniqueTokensList]
        total_predictions = []
        
        # Process in batches
        if len(batch_sentences) > holdout_number:
            # First batch
            first_batch = batch_sentences[0:-holdout_number]
            first_batch_predictions = get_top_k_predictions(first_batch, model, tokenizer, numTokens+2)
            total_predictions.extend(first_batch_predictions)
            
            # Second batch
            second_batch = batch_sentences[-holdout_number:]
            second_batch_predictions = get_top_k_predictions(second_batch, model, tokenizer, numTokens+2)
            total_predictions.extend(second_batch_predictions)
        else:
            total_predictions = get_top_k_predictions(batch_sentences, model, tokenizer, numTokens+2)
        
        # Process predictions
        for j in range(len(uniqueTokensList)):
            for s in range(num_tokens):
                if s < len(total_predictions[j]):
                    context_text = total_predictions[j][s][0]
                    prob = total_predictions[j][s][1]
                    
                    unique_tokens.add(context_text)
                    context = SearchTree(context_text, prob, uniqueTokensList[j])
                    
                    if len(unique_tokens) > previousSetLength:
                        previousSetLength = len(unique_tokens)
                        uniqueTokensList.append(context)
                        new_content.append(context.context)
        
        # Store content
        content.append(new_content)
        
        # Calculate combined probabilities
        comb_prob = []
        for prevToken in uniqueTokensList[:previousUniqueLength]:
            comb_prob.append(findProbability(prevToken, uniqueTokensList[previousUniqueLength:], model_tokenizer))
        comb_prob = list(itertools.chain(*comb_prob))  # flattening the list
        
        # Update parent relationships
        for tokenumber, newToken in enumerate(uniqueTokensList[previousUniqueLength:]):
            probs = [comb_prob[a*len(uniqueTokensList[previousUniqueLength:]) + tokenumber] for a in range(len(uniqueTokensList[:previousUniqueLength]))]
            probs2 = [probs[i]*uniqueTokensList[:previousUniqueLength][i].calcProbTillNow() for i in range(len(probs))]
            
            if not probs2:
                continue
            else:
                max_value = max(probs2)
                max_index = probs2.index(max_value)
                newToken.replace_parent(uniqueTokensList[:previousUniqueLength][max_index])
                newToken.assign_parent_index(max_index)
            
            probability.append(probs)
        
        probabilityMatrix.append(probability)
        flops_counter[i-1] = i  # Just a placeholder since we're not tracking actual FLOPS
        
        uniqueTokenLength.append(len(uniqueTokensList[previousUniqueLength:]))
        previousUniqueLength = len(uniqueTokensList[previousUniqueLength:])
        uniqueTokensList = uniqueTokensList[len(uniqueTokensList)-previousUniqueLength:]
    
    return probabilityMatrix, initialStateProbability, content, uniqueTokenLength, flops_counter

def get_top_k_predictions(sentences, model, tokenizer, top_k=100):
    """
    Get top-k token predictions for each sentence.
    Returns a list of lists of (token, probability) tuples.
    """
    predictions = []
    
    for sentence in sentences:
        # Tokenize
        inputs = tokenizer(sentence, return_tensors="pt")
        input_ids = inputs["input_ids"].to(model.device)
        
        # Get model output
        with torch.no_grad():
            outputs = model(input_ids)
        
        # Get predictions for the last token
        logits = outputs.logits[:, -1, :]
        probs = torch.nn.functional.softmax(logits, dim=-1)
        top_probs, top_indices = torch.topk(probs, k=top_k)
        
        # Decode tokens and pair with probabilities
        sentence_predictions = []
        for i in range(min(top_k, top_indices.shape[1])):
            token_id = top_indices[0, i].item()
            token = tokenizer.decode([token_id]).strip()
            if token and token != "\n":
                sentence_predictions.append((token, top_probs[0, i].item()))
        
        predictions.append(sentence_predictions)
    
    return predictions

In [None]:
import torch
from transformers import AutoModelForCausalLM, AutoTokenizer

def generate_with_probabilities(text, max_new_tokens=50, top_k=10):
    # Load model and tokenizer
    model_name = "gpt2"
    tokenizer = AutoTokenizer.from_pretrained(model_name)
    model = AutoModelForCausalLM.from_pretrained(model_name)
    model.eval()
    
    # Make sure padding token is set
    if tokenizer.pad_token is None:
        tokenizer.pad_token = tokenizer.eos_token
    
    # Tokenize input text
    input_ids = tokenizer.encode(text, return_tensors="pt")
    
    # Track generated tokens and their probabilities
    generated_tokens = []
    token_probabilities = []
    
    # Generate tokens one by one
    for _ in range(max_new_tokens):
        with torch.no_grad():
            outputs = model(input_ids)
            
        # Get logits for the last token
        next_token_logits = outputs.logits[:, -1, :]
        
        # Apply softmax to get probabilities
        next_token_probs = torch.nn.functional.softmax(next_token_logits, dim=-1)
        
        # Get top k tokens and their probabilities
        topk_probs, topk_indices = torch.topk(next_token_probs, top_k)
        
        # Select the token with highest probability
        next_token = topk_indices[0, 0].unsqueeze(0).unsqueeze(0)
        next_token_prob = topk_probs[0, 0].item()
        
        # Append to results
        generated_tokens.append(tokenizer.decode(next_token[0]))
        token_probabilities.append(next_token_prob)
        
        # Update input_ids for next iteration
        input_ids = torch.cat([input_ids, next_token], dim=1)
    
    return generated_tokens, token_probabilities

# Example usage
text = "I enjoy walking in the park"
tokens, probs = generate_with_probabilities(text)

# Print results
for token, prob in zip(tokens, probs):
    print(f"Token: {token}, Probability: {prob:.4f}")

# Get the full generated text
full_text = text + "".join(tokens)
print(f"\nFull text: {full_text}")