**Official implementation of paper "Token-based Decision Criteria Are Suboptimal in In-context Learning" (NAACL 2025)**:

### Source code for the Analysis in Sec. 5.2.

This repository contains the source code for the analysis in Sec. 5.2 (a part) of the paper "Token-based Decision Criteria Are Suboptimal in In-context Learning" (NAACL 2025).

Mainly to calculate the inter-category distance.

Used in experiments of:

1. The blue curve in the Fig. 12.

Author: Hakaze Cho, yfzhao@jaist.ac.jp

In [1]:
# Configs
huggingface_model_name = "facebook/opt-2.7b"
huggingface_token = "API_TOKEN"
quantization = False

dataset_name = "SemEvalR" # Alternative: "SemEvalR", "SemEvalL", "poem_sentiment", "TEE", "TEH", "TES", "FP", "AGNews", "MR", "hate_speech"

k = 4

In [5]:
# Import libraries, and nessessary definitions

import sys
sys.path.append("hidden_calibration_released") # Replace with the path from the working directory to the root of this project. If the working directory is already the root of the project, this line is not needed.

import util.prompting as prompting
from transformers import AutoTokenizer, AutoModelForCausalLM, BitsAndBytesConfig
import torch
import util.dataset_loader as dataset_loader
import numpy as np
from tqdm import tqdm
from scipy import spatial
import matplotlib.pyplot as plt

def softmax(x):
    f_x = np.exp(x) / np.sum(np.exp(x))
    return f_x

dataset_name_to_class = {
    "SemEvalR": dataset_loader.SemEval2014_Restaurants,
    "SemEvalL": dataset_loader.SemEval2014_Laptops,
    "poem_sentiment": dataset_loader.poem_sentiment,
    "TEE": dataset_loader.tweet_eval_emotion,
    "TEH": dataset_loader.tweet_eval_hate,
    "TES": dataset_loader.tweet_eval_sentiment,
    "FP": dataset_loader.financial_phrasebank,
    "AGNews": dataset_loader.agnews,
    "MR": dataset_loader.rooten_tomato,
    "hate_speech": dataset_loader.hate_speech18,
}

plt.style.use('default')
plt.rc('font',family='Times New Roman')
plt.rcParams['font.family'] = 'serif'
plt.rcParams['font.serif'] = ['Times New Roman'] + plt.rcParams['font.serif']

In [None]:
# Load model and tokenizer from Huggingface

torch.cuda.empty_cache()

tokenizer = AutoTokenizer.from_pretrained(huggingface_model_name, token = huggingface_token)
if quantization:
    model = AutoModelForCausalLM.from_pretrained(huggingface_model_name, token = huggingface_token, quantization_config = BitsAndBytesConfig(
        load_in_4bit=True,
        bnb_4bit_quant_type="nf4",
        bnb_4bit_use_double_quant=True,
        bnb_4bit_compute_dtype=torch.bfloat16
    ))
else:
    model = AutoModelForCausalLM.from_pretrained(huggingface_model_name, token = huggingface_token).cuda()

In [7]:
# Load the training and testing data

test_data = dataset_name_to_class[dataset_name]().default_testing_division()
if dataset_name == "AGNews":
    test_data.cut_by_length(226)

**Calculate the hidden states of the text examples.**

In [None]:
# Inference the hidden states of the testing data

label_indexed_by_test_samples = []
last_hidden_state_indexed_by_test_samples = []

torch.no_grad()
for i in tqdm(range(test_data.get_max())):
    torch.cuda.empty_cache()
    prpt = prompting.default_prompting(test_data, k, query_index=i)
    tknzd_data = tokenizer(prpt[0], return_tensors="pt").input_ids.cuda()
    result = model(tknzd_data, output_hidden_states = True)
    hidden_state = result.hidden_states[-1][-1][-1].detach().cpu().numpy()
    last_hidden_state_indexed_by_test_samples.append(hidden_state)
    tkized_label_space = []
    label_indexed_by_test_samples.append(test_data.label_space.index(prpt[1]))

In [None]:
# Calculate the centroid of each label

hidden_state_list_indexed_by_labels = []

for label in test_data.label_space:
    hidden_state_list_indexed_by_labels.append([])
    
for i in range(len(label_indexed_by_test_samples)):
    hidden_state_list_indexed_by_labels[label_indexed_by_test_samples[i]].append(last_hidden_state_indexed_by_test_samples[i])
    
hidden_state_centroid_indexed_by_labels = []
for lists in hidden_state_list_indexed_by_labels:
    hidden_state_centroid_indexed_by_labels.append(np.mean(lists, axis=0))

In [11]:
# Calculate the averaged distance between the centroids of each label

distance_in_pair = []
distance_list = []

for i in range(len(hidden_state_centroid_indexed_by_labels)):
    temp = []
    for j in range(len(hidden_state_centroid_indexed_by_labels)):
        distance = spatial.distance.euclidean(hidden_state_centroid_indexed_by_labels[i], hidden_state_centroid_indexed_by_labels[j])
        temp.append(distance)
        if i != j:
            distance_list.append(distance)
    distance_in_pair.append(temp) 

**Result output**

In [None]:
print("Result report on " + dataset_name + " dataset, metric: Euclidean distance between centroids of each label\n")

print(distance_in_pair)

In [None]:
print("Result report on " + dataset_name + " dataset, metric: averaged Euclidean distance among centroids of every label\n")

print(np.mean(distance_list))