In [1]:
import torch
from transformers import AutoTokenizer, AutoModel
from sklearn.metrics.pairwise import cosine_similarity
import json
from scipy.optimize import linear_sum_assignment
import numpy as np

phase = 1

def load_model():
    tokenizer_emb = AutoTokenizer.from_pretrained("Qwen/Qwen3-Embedding-8B")
    model_emb = AutoModel.from_pretrained("Qwen/Qwen3-Embedding-8B").to("cuda")
    return tokenizer_emb, model_emb

tokenizer_emb, model_emb = load_model()
def get_embeddings(texts):
    inputs = tokenizer_emb(texts, padding=True, truncation=True, return_tensors="pt").to("cuda")
    with torch.no_grad():
        outputs = model_emb(**inputs)
        emb = outputs.last_hidden_state.mean(dim=1)
    return emb.cpu().numpy()


  from .autonotebook import tqdm as notebook_tqdm
Loading checkpoint shards: 100%|██████████| 4/4 [00:04<00:00,  1.07s/it]


In [2]:
def set_similarity_hungarian(similarity_matrix):
    cost_matrix = -similarity_matrix
    row_indices, col_indices = linear_sum_assignment(cost_matrix)
    
    optimal_sum = similarity_matrix[row_indices, col_indices].sum()
    
    n = len(similarity_matrix)
    max_possible = n * 1.0 
    
    return optimal_sum / max_possible

In [3]:
results = []


for ckpt in range(4, 21):
    dir_path = f"../outputs_interp/direction/phase1/ckpt_{ckpt}.json"
    lora_path = f"../outputs_interp/phase1/ckpt_{ckpt}.json"

    with open(dir_path, "r") as f:
        interp_direction = json.load(f)

    with open(lora_path, "r") as f:
        interp_lora = json.load(f)

    arr_direction = [ i["theme"] for i in interp_direction]
    arr_lora = [i["theme"] for i in interp_lora]

    emb_lora = get_embeddings(arr_lora)
    emb_direction = get_embeddings(arr_direction)

    sim_matrix = cosine_similarity(emb_lora, emb_direction)

    similarity_score = set_similarity_hungarian(sim_matrix)

    max_idx = np.unravel_index(sim_matrix.argmax(), sim_matrix.shape)
    min_idx = np.unravel_index(sim_matrix.argmin(), sim_matrix.shape)

    max_score = sim_matrix[max_idx]
    min_score = sim_matrix[min_idx]

    results.append({
        "ckpt": ckpt,
        "similarity_score": float(similarity_score),
        "max_score": float(max_score),
        "max_lora": arr_lora[max_idx[0]],
        "max_direction": interp_direction[max_idx[1]]["theme"],
        "min_score": float(min_score),
        "min_lora": arr_lora[min_idx[0]],
        "min_direction": interp_direction[min_idx[1]]["theme"],
    })


In [5]:
results.sort(key=lambda x: x["similarity_score"], reverse=True)
output_file = r"../direction_rank.json"
with open(output_file, "w") as f:
    json.dump(results, f, indent=4)