## Ensemble learning on transformer based models from Hugginface

In [2]:
from transformers import AutoModelForCausalLM
from transformers import AutoTokenizer
import torch.nn.functional as F
import torch

# typings
from typing import List, Tensor

In [None]:
model_name = "microsoft/Phi-3-mini-4k-instruct"

In [None]:
weight = 1.0
tokenizer = AutoTokenizer.from_pretrained(model_name, trust_remote_code=True)
model = AutoModelForCausalLM.from_pretrained(model_name, trust_remote_code=True)

In [None]:
prompt = "What is the capital of France?"
prompt_tokenized = tokenizer(prompt, return_tensors="pt")

### Get the top-k tokens

In [None]:
"""
Get the top-k token probabilities from the model output.

:param k: The number of top tokens to retrieve.
:return: A list of tuples containing the top-k tokens and their probabilities.
"""
def get_top_k(input, k: int = 10) -> List[(List[str], Tensor)]:
    # Get the model outputs
    with torch.no_grad():
        outputs = model(**input)
        logits = outputs.logits

    probabilities = F.softmax(logits, dim=-1)           # Convert logits to probabilities
    last_token_probabilities = probabilities[0, -1, :]  # Get the probabilities for the last token

    # Convert probabilities to a more readable format
    probs = last_token_probabilities.cpu().numpy()

    # Get the top 10 probabilities
    top_k = 10
    top_k_indices = probs.argsort()[-top_k:][::-1]
    top_k_probs = probs[top_k_indices]
    top_k_tokens = tokenizer.convert_ids_to_tokens(top_k_indices)

    top_k_indices = last_token_probabilities.argsort()[-k:][::-1]
    top_k_probs = last_token_probabilities[top_k_indices]
    top_k_tokens = tokenizer.convert_ids_to_tokens(top_k_indices)

    return list(zip(top_k_tokens, top_k_probs))

### Get Computed Probabilities

In [None]:
print(f"Token: {top_k.token}, Probability: {top_k.prob:.4f}")