**Official implementation of paper "Revisiting In-context Learning Inference Circuit in Large Language Models" (ICLR 2025)**:

### Induction Head Counting

This experiment is to count the number of induction heads and correct induction heads in each layer, also calculate the accuracy based on various induction. Results in Fig. 6 (Left, Middle).

Author: Hakaze Cho, yfzhao@jaist.ac.jp, 2024/08

Organized, commented, and modified by: Hakaze Cho, 2025/01/29

**Part I: Import, Define, and Load Everything**

What you should do:
1. [Cell 1] Change to the path from your working directory to the directory containing the README.md file.
2. [Cell 2] Define your experiment parameters.
3. Run the Cell 1 and Cell 2.


In [None]:
# Cell 1: Import libraries and change the working directory.

## Change the working directory
import os
try:
    # Change to the path from your working directory to the directory containing the README.md file.
    os.chdir("ICL_Inference_Dynamics_Released") 
except:
    print("Already in the correct directory or the directory does not exist.")

## Import libraries
from util import load_model_and_data, inference, induction_evaluation
import StaICC
import matplotlib.pyplot as plt
import pickle
import numpy as np
from tqdm import tqdm as tqdm

## Some definations for the plots.
plt.style.use('default')
plt.rc('font',family='Cambria Math')
plt.rcParams['font.family'] = 'serif'
plt.rcParams['font.serif'] = ['Cambria Math'] + plt.rcParams['font.serif']

In [2]:
# Cell 2: Model and huggingfacetoken configurations

## The huggingface model name to be tested as the LM for ICL. 
## Recommended: "meta-llama/Meta-Llama-3-8B", "tiiuae/falcon-7b", "meta-llama/Meta-Llama-3-70B", "tiiuae/falcon-40b"
ICL_model_name = "tiiuae/falcon-7b"

## Whether to use the quantized version of the model. 
## Recommended: Keep it default.
quantized = False if ICL_model_name in ["meta-llama/Meta-Llama-3-8B", "EleutherAI/pythia-6.9b", "tiiuae/falcon-7b"] else True

## The huggingface token to access the model. If you use the Llama model, you need to set this.
huggingface_token = "your token here"


# Experiment parameters

## The demonstration numbers. Recommended: 0, 1, 2, 4, 8, 12.
k = 4 

## The used dataset index from the StaICC library. Alternative: 0, 1, 2, 3, 4, 5. See the README.md for more information.
dataset_index = 2 

## Force the ICL_model to reload, even the ICL_model is already in the variables. 
## Recommended: False.
model_forced_reload = False

## Force the experiment to be redone, even the intermediate results are already in the path `experiment_material`.
## Recommended: False.
experiment_forced_redo = False

## Define the const in the threshold to be judged as a forerunner token head. (e.g., "5" in 5/n_t)
induction_head_threthold_times = 5

In [None]:
# Cell 3: Load the data and build the test inputs.

bench = StaICC.Normal(k)
prompts, queries = load_model_and_data.load_data_from_StaICC_experimentor(bench[dataset_index], "label_words")

In [None]:
# Cell 4: Load the model. 

vars_dict = vars() if "ICL_model" in vars() else locals()
if "ICL_model" not in vars_dict or model_forced_reload:
    ICL_model, ICL_tknz = load_model_and_data.load_ICL_model(ICL_model_name, huggingface_token = huggingface_token, quantized = quantized)
    loaded = True

**Part II: Run the Experiment**

What you should do:

1. Run the Cell 5 - 7.

In [None]:
# Cell 5: Inference the hidden states and save the intermediate results. If the intermediate results in path `experiment_matrial` (shown below) has been detected, automatically load the results.

data_file_name = "experiment_matrial/" + ICL_model_name.replace('/', '_') + ",induc_Hidd_att" + ',' + str(k) + ',' + str(dataset_index + 1) + ".pickle"
if os.path.exists(data_file_name):
    with open(data_file_name, 'rb') as f:
        ICL_hidden_states = pickle.load(f)
        print("loaded")
else:
    ICL_hidden_states = inference.step3_get_fl_feature_and_lastftol_attention(ICL_model, ICL_tknz, prompts, bench[dataset_index])
    with open(data_file_name, 'wb') as f:
        pickle.dump(ICL_hidden_states, f)

In [6]:
# Cell 6: Calculate the counted times as induction head for each attention head, and calculate the layered statistics.

mean_max_magnitude = []
mean_head_count = []

for layers in range(len(ICL_hidden_states[1][0])):
    temp = []
    head_count = []
    for sample in range(len(ICL_hidden_states[1])):
        thre = induction_evaluation.get_theresold_magnitude_from_prompt(ICL_tknz, prompts[sample], induction_head_threthold_times, 1)
        magnitudes = induction_evaluation.get_induction_magnitude_for_single_layer(ICL_hidden_states[1], bench[dataset_index], sample, layers)
        temp.append(max(magnitudes))
        count = 0
        for temp_res in magnitudes:
            if temp_res > thre:
                count += 1
        head_count.append(count)
    mean_max_magnitude.append(np.mean(temp))
    mean_head_count.append(np.mean(head_count))

In [7]:
# Cell 7: Calculate the counted times as **correct** induction head for each attention head, and calculate the layered statistics.

mean_max_correct_magnitude = []
mean_correct_head_count = []
correct_induction_head_statics = []

for layers in range(len(ICL_hidden_states[1][0])):
    temp = []
    head_count = []
    layer_head_statics = [0] * len(ICL_hidden_states[1][0][0])
    for sample in range(len(ICL_hidden_states[1])):
        thre = induction_evaluation.get_theresold_correctness_from_prompt(ICL_tknz, prompts[sample], induction_head_threthold_times, len(bench[dataset_index].get_label_space()), 1)
        magnitudes = induction_evaluation.get_induction_correctness_for_single_layer(ICL_hidden_states[1], bench[dataset_index], sample, layers)
        temp.append(max(magnitudes))
        count = 0
        for i, temp_res in enumerate(magnitudes):
            if temp_res > thre:
                count += 1
                layer_head_statics[i] += 1
        head_count.append(count)
    correct_induction_head_statics.append(layer_head_statics)
    mean_max_correct_magnitude.append(np.mean(temp))
    mean_correct_head_count.append(np.mean(head_count))

**Part III: Plot and Save the Result**

What you should do:

1. Run the Cell 8 - 10. You can define your own file name and dictionary to save the result in Cell 10.

In [None]:
# Cell 8: Plot the counted times for each attention head as a correct head in a heatmap.

r = plt.imshow(correct_induction_head_statics, cmap = 'Greens', vmin = 0)
plt.gca().invert_yaxis()
plt.colorbar(r, shrink=0.5)
plt.xlabel('Head #', fontsize = 12)
plt.ylabel('Transformer Block', fontsize = 12) 
plt.title("Correct Induction Head Counted" + "\n Dataset " + str(dataset_index + 1) + " with k = " + str(k) + "\n model: " + ICL_model_name, fontsize = 12)

In [None]:
# Cell 9: Plot the figure like Fig. 6 (Left).

plt.figure(figsize=(4, 3))
plt.plot(range(1,len(mean_head_count) + 1), mean_head_count, color = "#257ab6", label = "Induction")
plt.plot(range(1,len(mean_correct_head_count) + 1), mean_correct_head_count, color = "green", label = "Correct Induction")
plt.xlim(-1, len(mean_correct_head_count) + 1)
plt.xlabel("Transformer Block Number", fontsize = 12)
plt.ylabel("Induction Head #", fontsize = 12)
plt.title("Induction Head Counted" + "\n Dataset " + str(dataset_index + 1) + " with k = " + str(k) + "\n model: " + ICL_model_name, fontsize = 12)

ax = plt.gca()
ylim = ax.get_ylim()
plt.ylim(ylim)

xrange = ax.get_xticks()
xrange[1] = 1
plt.xticks(xrange[1:-1])

plt.legend(loc = 1, prop={'size': 9})

In [10]:
# Cell 10: Save the induction head and correct induction head results.
# Result file organization:
# (mean_max_magnitude: list[layer_index] = max induction attention, mean_head_count: list[layer_index] = induction head count)
# (mean_max_correct_magnitude: list[layer_index] = max correct induction attention, mean_correct_head_count: list[layer_index] = induction induction head count)

data_file_name = "data/" + ICL_model_name.replace('/', '_') + ",induction_magnitude" + ',' + str(k) + ',' + str(dataset_index + 1) + ".pickle"
with open(data_file_name, 'wb') as f:
    pickle.dump([mean_max_magnitude, mean_head_count], f)

data_file_name = "data/" + ICL_model_name.replace('/', '_') + ",correct_induction_magnitude" + ',' + str(k) + ',' + str(dataset_index + 1) + ".pickle"
with open(data_file_name, 'wb') as f:
    pickle.dump([mean_max_correct_magnitude, mean_correct_head_count], f)

**Part IV: Calculate the data for Fig. 6 (Middle), and plot / save**

What you should do:

1. Run the Cell 11 - 13. You can define your own file name and dictionary to save the result in Cell 13.

In [None]:
# Cell 11: Calculate the mean accuracy based on the full space similarity.

mean_accuracy_based_on_full_space_similarity = []
mean_accuracy_based_on_besthead = []
mean_accuracy_based_on_averagehead = []

for layers in tqdm(range(len(ICL_hidden_states[1][0]))):
    temp = []
    for sample in range(len(ICL_hidden_states[1])):
        temp.append(induction_evaluation.get_induction_likelihood_full_space_similarity(ICL_hidden_states[0], bench[dataset_index], sample, layers))
    mean_accuracy_based_on_full_space_similarity.append(np.mean(temp))

for layers in tqdm(range(len(ICL_hidden_states[1][0]))):
    temp = []
    averaged_temp = []
    for sample in range(len(ICL_hidden_states[1])):
        temp.append(np.max(induction_evaluation.get_induction_likelihood_head(ICL_hidden_states[1], bench[dataset_index], sample, layers)))
        averaged_temp.append(np.mean(induction_evaluation.get_induction_likelihood_head(ICL_hidden_states[1], bench[dataset_index], sample, layers)))
    mean_accuracy_based_on_besthead.append(np.mean(temp))
    mean_accuracy_based_on_averagehead.append(np.mean(averaged_temp))

In [None]:
# Cell 12: Plot the figure like Fig. 6 (Middle).

plt.figure(figsize=(4, 3))
plt.plot(range(1,len(mean_accuracy_based_on_full_space_similarity) + 1), mean_accuracy_based_on_full_space_similarity, color = "#ff7f0e", label = "Vanilla Attention")
plt.plot(range(1,len(mean_accuracy_based_on_full_space_similarity) + 1), mean_accuracy_based_on_besthead, color = "green", label = "Best Ind. Head")
plt.plot(range(1,len(mean_accuracy_based_on_full_space_similarity) + 1), mean_accuracy_based_on_averagehead, color = "#257ab6", label = "Head Average")
plt.xlabel("Transformer Block Number", fontsize = 12)
plt.ylabel("Correct Label Assignment", fontsize = 12)
plt.axhline(1/len(bench[dataset_index].get_label_space()), color = "black", linestyle = "--", linewidth = 1, label = "Random")

ax = plt.gca()
ylim = ax.get_ylim()
plt.ylim((0.17, ylim[1]))
xrange = ax.get_xticks()
xrange[1] = 1
plt.xticks(xrange[1:-1])
plt.xlim(0, len(mean_accuracy_based_on_full_space_similarity) + 1)
plt.legend(loc = 4, prop={'size': 9}, ncol=2)

In [19]:
# Cell 13: Save the accuracy results in different assignments.

import pickle

data_file_name = "data/" + ICL_model_name.replace('/', '_')+ ",induction_likelihood" + ',' + str(k) + ',' + str(dataset_index + 1) + ".pickle"
with open(data_file_name, 'wb') as f:
    pickle.dump([mean_accuracy_based_on_full_space_similarity, mean_accuracy_based_on_besthead, mean_accuracy_based_on_averagehead], f)