In [None]:
print("hello")

In [None]:
# =================================================================
# Full Implementation: Internal Divergence Curiosity (IDC) for LLMs
# 目的: 内部特徴量の「意味論的距離」を用いて、既視感（Dejà Vu）を避け
#       拡散的（Diffusive）な探索を促す報酬モデルの実装と検証
# =================================================================

import torch
import torch.nn as nn
import torch.nn.functional as F
import numpy as np
import matplotlib.pyplot as plt
from tqdm import tqdm
from sklearn.decomposition import PCA
from transformers import AutoModelForCausalLM, AutoTokenizer

# ==========================================
# 1. 設定 & モデルロード
# ==========================================
MODEL_NAME = "Qwen/Qwen2.5-7B-Instruct" 
# ※ メモリが厳しい場合は "Qwen/Qwen2.5-0.5B-Instruct" や "gpt2" に変更してください
DEVICE = "cuda" if torch.cuda.is_available() else "cpu"

print(f"Using device: {DEVICE}")
print("Loading Model...")
tokenizer = AutoTokenizer.from_pretrained(MODEL_NAME)
model = AutoModelForCausalLM.from_pretrained(
    MODEL_NAME,
    torch_dtype=torch.float16 if torch.cuda.is_available() else torch.float32,
    device_map="auto"
)
model.eval()
print("Model Loaded.")

# ==========================================
# 2. 共通空間 (Common Subspace) の構築
#    (平凡さを定義するためのPCA実行)
# ==========================================
REMOVE_TOP_K = 20  # 除去する次元数 (共通成分)

# キャリブレーション用データ (幅広いジャンルで「普通」を定義する)
calibration_texts = [
    "The history of science is vast.",
    "Hello, how are you today?",
    "Python is a programming language.",
    "The weather is nice today.",
    "I like to eat apples and bananas.",
    "Deep learning revolutionized AI.",
    "Once upon a time in a land far away.",
    "Please summarize the following text.",
    "1 + 1 equals 2.",
    "Thank you very much for your help."
]

print("\n--- Step 1: Collecting Hidden States for PCA ---")
collected_hiddens = []

# 隠れ状態の収集
with torch.no_grad():
    for text in tqdm(calibration_texts):
        inputs = tokenizer(text, return_tensors="pt").to(DEVICE)
        outputs = model(**inputs, output_hidden_states=True)
        # 最終層の全トークンの隠れ状態を取得 (Batch, Seq, Dim) -> (Seq, Dim)
        h = outputs.hidden_states[-1].squeeze(0)
        collected_hiddens.append(h.cpu())

# 結合してPCAを実行
X_train = torch.cat(collected_hiddens, dim=0).float().numpy()
print(f"Training PCA on {X_train.shape[0]} tokens...")

# 中心化
mean_vec = np.mean(X_train, axis=0)
X_centered = X_train - mean_vec

# PCA実行
pca = PCA(n_components=REMOVE_TOP_K)
pca.fit(X_centered)

# Tensorとして保存 (IDCモデルに渡すため)
basis_tensor = torch.tensor(pca.components_, dtype=model.dtype).to(DEVICE) # [K, Dim]
mean_tensor = torch.tensor(mean_vec, dtype=model.dtype).to(DEVICE)         # [Dim]

print(">>> Common Subspace Created.")
print(f"    Explained Variance Ratio (Top {REMOVE_TOP_K}): {np.sum(pca.explained_variance_ratio_)*100:.2f}%")


# ==========================================
# 3. IDC (Internal Divergence Curiosity) モデル定義
# ==========================================
class IDCRewardModel:
    def __init__(self, basis, mean, device, memory_capacity=1024):
        """
        Args:
            basis: PCA基底 (共通成分)
            mean: PCA平均
            memory_capacity: 記憶する過去のベクトルの最大数
        """
        self.device = device
        self.basis = basis
        self.mean = mean
        self.dtype = basis.dtype
        
        # エピソード記憶 (FIFOバッファ)
        self.memory_capacity = memory_capacity
        # [Memory_Size, Dim]
        self.memory_buffer = torch.empty(0, basis.shape[1], device=device, dtype=self.dtype)
        
    def get_semantic_vector(self, h):
        """
        隠れ状態 h から「意味論ベクトル(残差)」を抽出する
        Common Subspace (平凡な成分) を除去したものを使用
        """
        h = h.to(self.dtype)
        h_centered = h - self.mean
        
        # 共通成分の算出と除去 (射影)
        # z = h_centered @ basis.T
        z_common = h_centered @ self.basis.T
        
        # 復元: h_common = z @ basis
        h_common = z_common @ self.basis
        
        # 残差ベクトル = 意味論ベクトル (Semantic Vector)
        h_semantic = h_centered - h_common
        return h_semantic

    def compute_reward_and_update(self, h_batch):
        """
        現在の隠れ状態と記憶との「非類似度」を報酬として計算し、記憶を更新する
        """
        # 1. 意味論ベクトルの抽出
        current_vectors = self.get_semantic_vector(h_batch) # [Batch, Dim]
        
        # 2. 正規化 (コサイン類似度計算のため)
        current_vectors_norm = F.normalize(current_vectors, p=2, dim=1)
        
        rewards = []
        
        # メモリが空の場合の処理 (エピソードの最初)
        if self.memory_buffer.shape[0] == 0:
            # 過去がない = 最大の驚き (1.0)
            rewards = torch.ones(current_vectors.shape[0], device=self.device)
        else:
            # 3. 記憶との類似度計算
            # メモリ側も正規化
            memory_norm = F.normalize(self.memory_buffer, p=2, dim=1)
            
            # Cosine Similarity Matrix: [Batch, Memory_Size]
            sim_matrix = torch.mm(current_vectors_norm, memory_norm.T)
            
            # 4. 「最も似ている過去 (既視感)」を探す (Max Similarity)
            max_sim, _ = torch.max(sim_matrix, dim=1)
            
            # 5. 報酬計算 (距離 = 1 - 類似度)
            # 類似度が高い(既視感がある)ほど報酬は下がる
            # 類似度が低い(新しいトピック)ほど報酬は上がる (最大2.0)
            rewards = 1.0 - max_sim
            
        # 6. 記憶の更新 (現在のベクトルを記憶に追加)
        self.memory_buffer = torch.cat([self.memory_buffer, current_vectors.detach()], dim=0)
        
        # 容量オーバーしたら古い記憶を捨てる (FIFO)
        if self.memory_buffer.shape[0] > self.memory_capacity:
            self.memory_buffer = self.memory_buffer[-self.memory_capacity:]
            
        return rewards

    def clear_memory(self):
        """エピソードごとのリセット用"""
        self.memory_buffer = torch.empty(0, self.basis.shape[1], device=self.device, dtype=self.dtype)


# ==========================================
# 4. 検証: 平凡 vs 多様 (時系列変化)
# ==========================================
print("\n--- Step 2: Running Verification ---")

# IDCモデルの初期化
idc_model = IDCRewardModel(basis_tensor, mean_tensor, DEVICE, memory_capacity=512)

# テストケース
test_cases = [
    {
        "label": "Repetitive (Dejà Vu)",
        "text": "The cat sat on the mat. The cat sat on the mat. The cat sat on the mat. The cat sat on the mat."
    },
    {
        "label": "Diverse (Diffusive)",
        "text": "The cat sat on the mat. Suddenly, a spaceship landed in the garden. Aliens came out and started dancing tango."
    }
]

plt.figure(figsize=(12, 6))

for case in test_cases:
    label = case["label"]
    text = case["text"]
    
    print(f"Processing: {label}")
    
    # 記憶をリセット
    idc_model.clear_memory()
    
    # 推論実行
    inputs = tokenizer(text, return_tensors="pt").to(DEVICE)
    with torch.no_grad():
        outputs = model(**inputs, output_hidden_states=True)
    
    # 隠れ状態シーケンス [Seq, Dim]
    h_seq = outputs.hidden_states[-1].squeeze(0)
    
    # トークンごとの報酬を計算
    step_rewards = []
    tokens = [tokenizer.decode(t) for t in inputs.input_ids[0]]
    
    # シミュレーションループ (1トークンずつ生成・記憶更新したと仮定)
    for t in range(h_seq.shape[0]):
        h_t = h_seq[t].unsqueeze(0) # [1, Dim]
        r_t = idc_model.compute_reward_and_update(h_t)
        step_rewards.append(r_t.item())
        
    # プロット
    plt.plot(step_rewards, marker='o', label=f"{label} (Avg: {np.mean(step_rewards):.3f})")

# グラフ設定
plt.title("IDC Reward Transition: Repetitive vs Diverse")
plt.xlabel("Token Step")
plt.ylabel("IDC Reward (1 - Max Similarity)")
plt.axhline(0, color='black', linewidth=0.5)
plt.legend()
plt.grid(True, linestyle='--', alpha=0.6)
plt.show()

print("\n=== 考察 ===")
print("Repetitive: グラフが右肩下がり（または低空飛行）になれば成功。過去の自分（記憶）と似ているため報酬が減衰する。")
print("Diverse   : グラフが高い値を維持、または新しい単語のタイミングでスパイクすれば成功。常に新しい意味空間へ移動している。")