In [10]:
from transformers import AutoTokenizer, AutoModelForCausalLM
model_name = "epfl-llm/meditron-7b"  
tokenizer = AutoTokenizer.from_pretrained(model_name)
model = AutoModelForCausalLM.from_pretrained(model_name)

Loading checkpoint shards: 100%|██████████| 8/8 [00:08<00:00,  1.12s/it]


In [4]:
def generate_cot_response(prompt):
  """
  Generates a Chain-of-Thought (CoT) response using the Meditron model.

  Args:
    prompt: The input question or task.

  Returns:
    The generated CoT response.
  """
  print("step one")
  # 1. Prompt the model for a CoT explanation:
  cot_prompt = f"Let's think step-by-step. {prompt}"
  input_ids = tokenizer(cot_prompt, return_tensors="pt").input_ids
  print("step two")
  # 2. Generate the CoT response:
  generated_ids = model.generate(
      input_ids=input_ids,
      max_length=256,  # Adjust as needed
      num_beams=5,     # Adjust for beam search
      no_repeat_ngram_size=2,
      early_stopping=True
  )
  print("step 3")
  # 3. Decode the generated response:
  cot_response = tokenizer.decode(generated_ids[0], skip_special_tokens=True)

  return cot_response

# Example usage:
question = "What are standard treatments against ovarian cancer?"
cot_response = generate_cot_response(question)
print(cot_response) 

The attention mask and the pad token id were not set. As a consequence, you may observe unexpected behavior. Please pass your input's `attention_mask` to obtain reliable results.
Setting `pad_token_id` to `eos_token_id`:None for open-end generation.


step one
step two
step 3
Let's think step-by-step. What are standard treatments against ovarian cancer?
- Surgery is the first line of treatment. It is used to remove as much of the tumor as possible. The surgeon removes the uterus, the fallopian tubes, and the ovary on the affected side. If the cancer has spread to the lymph nodes in the pelvis, they are also removed. This is called a total hysterectomy with bilateral salpingo-oophrectomy (THBSO). This surgery can be done laparoscopically or through a large abdominal incision (laparotomy). In some cases, it is not possible to completely remove the disease. In this case, a biopsy is performed to confirm the diagnosis and to determine the type of cancer.

## - 
Chemotherapy is a treatment that uses drugs to kill cancer cells. Chemotherapeutic agents are given intravenously (through a vein) or orally (by mouth). The most commonly used chemotherapies are carboplatin, cisplatin, paclitaxel,


In [6]:
import torch
import torch.nn.functional as F
def get_softmax_values(prompt):
  """
  Generates a Chain-of-Thought (CoT) response using the Meditron model 
  and returns the softmax values of the logits.

  Args:
    prompt: The input question or task.

  Returns:
    A tuple containing:
      - The generated CoT response.
      - A tensor containing the softmax values of the logits.
  """

  # 1. Prepare input:
  cot_prompt = f"Let's think step-by-step. {prompt}"
  input_ids = tokenizer(cot_prompt, return_tensors="pt").input_ids

  # 2. Generate the CoT response:
  outputs = model(input_ids)
  logits = outputs.logits 

  # 3. Apply softmax:
  softmax_probs = F.softmax(logits, dim=-1)

  # 4. Decode the generated response:
  generated_ids = torch.argmax(logits, dim=-1)
  cot_response = tokenizer.decode(generated_ids[0], skip_special_tokens=True)

  return cot_response, softmax_probs

# Example usage:
question = "What is the capital of France?"
cot_response, softmax_probs = get_softmax_values(question)
print(cot_response) 
print(softmax_probs) 

-s get about byby-step about
 is the first of the?

tensor([[[1.5687e-09, 8.4315e-10, 1.8103e-07,  ..., 5.1785e-06,
          1.8418e-06, 1.1702e-05],
         [7.2899e-09, 3.9186e-09, 2.1544e-07,  ..., 3.1517e-08,
          3.2411e-07, 3.5569e-08],
         [2.5702e-09, 2.3227e-08, 9.9168e-10,  ..., 5.5458e-08,
          5.3238e-08, 7.4496e-08],
         ...,
         [6.0340e-09, 3.9582e-09, 1.1753e-06,  ..., 2.7216e-07,
          1.6272e-07, 1.3918e-07],
         [2.3242e-09, 1.9306e-08, 1.9580e-08,  ..., 2.8468e-07,
          7.0467e-07, 3.1991e-08],
         [3.2856e-09, 1.4302e-08, 7.5604e-07,  ..., 8.9140e-07,
          2.4390e-06, 4.6813e-07]]], grad_fn=<SoftmaxBackward0>)


In [8]:

softmax_probs.shape

torch.Size([1, 18, 32017])

In [11]:
from transformers import AutoTokenizer, AutoModelForCausalLM
import torch
import torch.nn.functional as F



def get_tokens_with_probs(prompt, top_k=5):
  """
  Generates a Chain-of-Thought (CoT) response using the Meditron model 
  and returns the top-k most probable tokens at each position.

  Args:
    prompt: The input question or task.
    top_k: The number of top probable tokens to return.

  Returns:
    A list of tuples, where each tuple contains:
      - The generated token.
      - The probability of the generated token.
  """

  # 1. Prepare input:
  cot_prompt = f"Let's think step-by-step. {prompt}"
  input_ids = tokenizer(cot_prompt, return_tensors="pt").input_ids

  # 2. Generate the CoT response:
  outputs = model(input_ids)
  logits = outputs.logits 

  # 3. Apply softmax:
  softmax_probs = F.softmax(logits, dim=-1)

  # 4. Get top-k probabilities and corresponding tokens:
  top_k_probs, top_k_indices = torch.topk(softmax_probs, k=top_k, dim=-1)

  # 5. Decode the top-k tokens:
  top_k_tokens = tokenizer.convert_ids_to_tokens(top_k_indices[0].tolist())

  # 6. Combine tokens and probabilities:
  token_prob_pairs = []
  for i in range(top_k):
    token_prob_pairs.append((top_k_tokens[i], top_k_probs[0][i].item()))

  return token_prob_pairs

# Example usage:
question = "What is the capital of France?"
token_prob_pairs = get_tokens_with_probs(question)

for token, prob in token_prob_pairs:
  print(f"Token: {token}, Probability: {prob:.4f}") 

TypeError: int() argument must be a string, a bytes-like object or a real number, not 'list'

In [28]:
from transformers import AutoTokenizer, AutoModelForCausalLM
import torch
import torch.nn.functional as F


def get_tokens_with_probs(prompt, top_k=5):
  """
  Generates a Chain-of-Thought (CoT) response using the Meditron model 
  and returns the top-k most probable tokens at each position.

  Args:
    prompt: The input question or task.
    top_k: The number of top probable tokens to return.

  Returns:
    A list of tuples, where each tuple contains:
      - The generated token.
      - The probability of the generated token.
  """

  # 1. Prepare input:
  cot_prompt = f"Let's think step-by-step. {prompt}"
  input_ids = tokenizer(cot_prompt, return_tensors="pt").input_ids

  # 2. Generate the CoT response:
  outputs = model(input_ids)
  logits = outputs.logits 

  # 3. Apply softmax:
  softmax_probs = F.softmax(logits, dim=-1)
  generated_ids = torch.argmax(logits, dim=-1)
  cot_response = tokenizer.decode(generated_ids[0], skip_special_tokens=True)

  cot_response = tokenizer.decode(generated_ids[0], skip_special_tokens=True)
  # 4. Get top-k probabilities and corresponding tokens:
  top_k_probs, top_k_indices = torch.topk(softmax_probs, k=top_k, dim=-1)
  top_k_tokens = []
  for i in range(top_k_indices.size(0)):  # Iterate over batch dimension
      decoded_tokens = []
      for token in top_k_indices[i].tolist():
          decoded_tokens.append(tokenizer.convert_ids_to_tokens(token))
      top_k_tokens.append(decoded_tokens) 

  # 6. Combine tokens and probabilities:
  token_prob_pairs = []
  for i in range(top_k):
    token_prob_pairs.append((top_k_tokens[0][i], top_k_probs[:,i]))  # Access the first sequence in the batch
 
  return token_prob_pairs, cot_response


# Example usage:
question = "What is the capital of France?"
get_tokens_with_probs(question)


([(['▁', '▁O', '▁S', '▁C', '▁B'],
   tensor([[0.3874, 0.0321, 0.0215, 0.0204, 0.0185]], grad_fn=<SelectBackward0>)),
  (['-', 'ter', "'", 'roz', 'ters'],
   tensor([[0.3440, 0.3117, 0.0415, 0.0392, 0.0271]], grad_fn=<SelectBackward0>)),
  (['s', '▁s', 'S', 'er', '▁'],
   tensor([[9.9246e-01, 1.9348e-03, 7.2038e-04, 1.8667e-04, 1.7605e-04]],
          grad_fn=<SelectBackward0>)),
  (['▁get', '▁talk', '▁Get', '▁T', '▁face'],
   tensor([[0.0815, 0.0699, 0.0475, 0.0443, 0.0305]], grad_fn=<SelectBackward0>)),
  (['▁about', '▁of', '▁for', '▁this', '▁outside'],
   tensor([[0.5842, 0.0869, 0.0216, 0.0160, 0.0150]], grad_fn=<SelectBackward0>))],
 '-s get about byby-step about\n is the first of the?\n')

In [25]:
tokenizer.convert_ids_to_tokens(1900)

'▁best'