In [None]:
print('hello')

In [None]:
import numpy as np
import pandas as pd
import torch
from sklearn.model_selection import train_test_split
import matplotlib.pyplot as plt
from tqdm.auto import tqdm
from transformers import AutoModelForCausalLM, AutoTokenizer
from datasets import load_dataset

# データ読み取り

In [None]:
csv_path = "data/df_full.csv"

In [None]:
df_full = pd.read_csv(csv_path)

In [None]:
df_full.columns

In [None]:
model_name = "Qwen/Qwen2.5-7B-Instruct"

tokenizer = AutoTokenizer.from_pretrained(model_name)
model = AutoModelForCausalLM.from_pretrained(
    model_name,
    torch_dtype=torch.bfloat16,
    device_map="auto"
)

# 解析用配列を抜き出す

In [None]:
df_stories = df_full.dropna(subset=["creativity_score"]).reset_index(drop=True)
print("有効サンプル数:", len(df_stories))

# EOS 埋め込みと生成平均埋め込みの列名を取得
eos_cols = [c for c in df_stories.columns if c.startswith("eos_dim_")]
gen_cols = [c for c in df_stories.columns if c.startswith("gen_dim_")]

print("EOS 次元数:", len(eos_cols))
print("GEN 次元数:", len(gen_cols))

# 行列化
X_eos = df_stories[eos_cols].to_numpy(dtype=np.float32)   # [N, hidden_dim]
X_gen = df_stories[gen_cols].to_numpy(dtype=np.float32)   # [N, hidden_dim]

# 目的変数（スコア類）
y_scores      = df_stories["creativity_score"].astype(np.float32).to_numpy()
creativity    = df_stories["creativity"].astype(np.float32).to_numpy()
originality   = df_stories["originality"].astype(np.float32).to_numpy()
coherence     = df_stories["coherence"].astype(np.float32).to_numpy()

print("X_eos shape:", X_eos.shape)
print("X_gen shape:", X_gen.shape)
print("y_scores shape:", y_scores.shape)

# EOS 埋め込みで creative_direction を作成

In [None]:
high_thresh = np.quantile(y_scores, 0.7)
low_thresh  = np.quantile(y_scores, 0.3)

mask_high = y_scores >= high_thresh
mask_low  = y_scores <= low_thresh

X_high = X_eos[mask_high]
X_low  = X_eos[mask_low]

print("X_high (EOS) shape:", X_high.shape)
print("X_low  (EOS) shape:", X_low.shape)

# 2) High/Low の平均差分ベクトル = creative_direction
mu_high = X_high.mean(axis=0)
mu_low  = X_low.mean(axis=0)

creative_direction_eos = mu_high - mu_low
creative_direction_eos = creative_direction_eos / np.linalg.norm(creative_direction_eos)

# 3) full データで projection とスコアの相関
proj_eos = X_eos @ creative_direction_eos  # [N]

corr_full = np.corrcoef(proj_eos, y_scores)[0, 1]
print("=== EOS direction: full データでの創造性スコアとの相関 ===")
print("corr(full):", corr_full)

# 4) train / test split で汎化を見る
X_train, X_test, y_train, y_test = train_test_split(
    X_eos, y_scores, test_size=0.3, random_state=42
)

high_th = np.quantile(y_train, 0.7)
low_th  = np.quantile(y_train, 0.3)

mask_train_high = y_train >= high_th
mask_train_low  = y_train <= low_th

Xh = X_train[mask_train_high]
Xl = X_train[mask_train_low]

print("train High (EOS) shape:", Xh.shape)
print("train Low  (EOS) shape:", Xl.shape)

mu_h = Xh.mean(axis=0)
mu_l = Xl.mean(axis=0)

cd_eos = mu_h - mu_l
cd_eos = cd_eos / np.linalg.norm(cd_eos)

proj_test = X_test @ cd_eos
corr_test = np.corrcoef(proj_test, y_test)[0, 1]

print("=== EOS direction: test データでの創造性スコアとの相関 ===")
print("corr(test):", corr_test)

# 生成部分平均埋め込みで creative_direction & 相関

In [None]:
high_thresh_gen = np.quantile(y_scores, 0.7)
low_thresh_gen  = np.quantile(y_scores, 0.3)

mask_high_gen = y_scores >= high_thresh_gen
mask_low_gen  = y_scores <= low_thresh_gen

X_high_gen = X_gen[mask_high_gen]
X_low_gen  = X_gen[mask_low_gen]

print("X_high (GEN) shape:", X_high_gen.shape)
print("X_low  (GEN) shape:", X_low_gen.shape)

mu_high_gen = X_high_gen.mean(axis=0)
mu_low_gen  = X_low_gen.mean(axis=0)

creative_direction_gen = mu_high_gen - mu_low_gen
creative_direction_gen = creative_direction_gen / np.linalg.norm(creative_direction_gen)

proj_gen = X_gen @ creative_direction_gen

corr_full_gen = np.corrcoef(proj_gen, y_scores)[0, 1]
print("=== GEN-mean direction: full データでの創造性スコアとの相関 ===")
print("corr(full):", corr_full_gen)

# 2) train / test split
X_train_g, X_test_g, y_train_g, y_test_g = train_test_split(
    X_gen, y_scores, test_size=0.3, random_state=42
)

high_th_g = np.quantile(y_train_g, 0.7)
low_th_g  = np.quantile(y_train_g, 0.3)

mask_train_high_g = y_train_g >= high_th_g
mask_train_low_g  = y_train_g <= low_th_g

Xh_g = X_train_g[mask_train_high_g]
Xl_g = X_train_g[mask_train_low_g]

mu_h_g = Xh_g.mean(axis=0)
mu_l_g = Xl_g.mean(axis=0)

cd_gen = mu_h_g - mu_l_g
cd_gen = cd_gen / np.linalg.norm(cd_gen)

proj_test_g = X_test_g @ cd_gen
corr_test_g = np.corrcoef(proj_test_g, y_test_g)[0, 1]

print("=== GEN-mean direction: test データでの創造性スコアとの相関 ===")
print("corr(test):", corr_test_g)

# Projection × 各スコアの相関（EOS direction）

In [None]:
def corr(a, b):
    return np.corrcoef(a, b)[0, 1]

# NaN を一応除外（ほぼ無いはずだが保険）
mask_valid = (
    np.isfinite(proj_eos)
    & np.isfinite(creativity)
    & np.isfinite(originality)
    & np.isfinite(coherence)
    & np.isfinite(y_scores)
)

proj_valid             = proj_eos[mask_valid]
creativity_valid       = creativity[mask_valid]
originality_valid      = originality[mask_valid]
coherence_valid        = coherence[mask_valid]
creativity_score_valid = y_scores[mask_valid]

print("===== Projection (EOS direction) × 各スコアの相関 =====")
print(f"creativity          : {corr(proj_valid, creativity_valid):.4f}")
print(f"originality         : {corr(proj_valid, originality_valid):.4f}")
print(f"coherence           : {corr(proj_valid, coherence_valid):.4f}")
print(f"creativity_score(*) : {corr(proj_valid, creativity_score_valid):.4f}")

# スコアの基本統計量 & 分布ヒストグラム

In [None]:
print(df_stories[["creativity", "originality", "coherence", "creativity_score"]].describe())

for col in ["creativity", "originality", "coherence", "creativity_score"]:
    plt.figure()
    df_stories[col].hist(bins=10)
    plt.title(col)
    plt.xlabel(col)
    plt.ylabel("count")
    plt.show()

# 各層の「生成部分平均 hidden」を全部集める

In [None]:
prompts_for_layers = df_full["prompt"].tolist()          # df_full と同じ順序のプロンプト
y_scores = df_full["creativity_score"].to_numpy(dtype=np.float32)  # ラベル

all_gen_layers = []  # shape: [N, num_layers_plus1, hidden_dim]

for prompt in tqdm(prompts_for_layers, desc="Collect per-layer gen-mean features"):
    inputs = tokenizer(prompt, return_tensors="pt").to(model.device)

    with torch.no_grad():
        out = model.generate(
            **inputs,
            max_new_tokens=300,
            temperature=1.0,
            do_sample=True,
            output_hidden_states=True,
            return_dict_in_generate=True,
        )

    # 1ステップごとの hidden_states のうち「最後のステップ」を取り出す
    # last_step_hiddens: tuple of length = num_layers + 1
    #   各要素: [batch=1, seq_len, hidden_dim]
    last_step_hiddens = out.hidden_states[-1]

    # 入力長・生成長計算
    input_len = inputs["input_ids"].shape[1]
    # 最終層の hidden から seq_len を取る（どの層でも同じ長さ）
    final_layer_hidden = last_step_hiddens[-1]
    seq_len = final_layer_hidden.shape[1]
    gen_len = max(1, seq_len - input_len)

    layer_gen_vecs = []
    for layer_hidden in last_step_hiddens:
        # 各層の「生成部分だけ」の平均ベクトルを計算
        # layer_hidden: [1, seq_len, hidden_dim]
        gen_hidden = layer_hidden[0, -gen_len:, :]          # [gen_len, hidden_dim]
        gen_mean = gen_hidden.mean(dim=0)                   # [hidden_dim]
        layer_gen_vecs.append(gen_mean.to(torch.float32).cpu().numpy())

    # (num_layers+1, hidden_dim) にまとめる
    layer_gen_mat = np.stack(layer_gen_vecs, axis=0)
    all_gen_layers.append(layer_gen_mat)

# (N, num_layers+1, hidden_dim)
all_gen_layers = np.stack(all_gen_layers, axis=0)

print("all_gen_layers shape:", all_gen_layers.shape)
# 例: (N, 29, 3584) みたいな形（28層 + 最終norm の 29 "層"）

In [None]:
N, L, D = all_gen_layers.shape

# (N, L*D) に reshape
flat = all_gen_layers.reshape(N, L * D)

# カラム名を作成
columns = [f"layer{layer}_dim{dim}" for layer in range(L) for dim in range(D)]

df_layers = pd.DataFrame(flat, columns=columns)

df_layers.to_csv("data/all_gen_layers_flat.csv", index=False)
print("Saved to all_gen_layers_flat.csv")

In [None]:
# import pandas as pd
# import numpy as np
# import re

# # === CSV 読み込み ===
# df = pd.read_csv("all_gen_layers_flat.csv")

# # === 層と次元の数を自動判定 ===
# layer_dim_pattern = r"layer(\d+)_dim(\d+)"
# layers = []
# dims = []

# for col in df.columns:
#     m = re.match(layer_dim_pattern, col)
#     if m:
#         layers.append(int(m.group(1)))
#         dims.append(int(m.group(2)))

# L = max(layers) + 1
# D = max(dims) + 1
# N = len(df)

# print(f"Detected shape: N={N}, L={L}, D={D}")

# # === numpy 配列に変換 ===
# arr = df.to_numpy(dtype=np.float32)

# # === (N, L, D) に reshape ===
# all_gen_layers = arr.reshape(N, L, D)

# print("all_gen_layers shape:", all_gen_layers.shape)
# print("復元完了！")

# 各層ごとに creative_direction と相関を計算する

In [None]:
N, L, D = all_gen_layers.shape  # N=サンプル数, L=層数(+final), D=hidden_dim

# 上位・下位 30% で High / Low を定義
high_thresh = np.quantile(y_scores, 0.7)
low_thresh  = np.quantile(y_scores, 0.3)

mask_high = y_scores >= high_thresh
mask_low  = y_scores <= low_thresh

results = []

for layer_idx in range(L):
    X_layer = all_gen_layers[:, layer_idx, :]  # [N, hidden_dim]

    X_high = X_layer[mask_high]
    X_low  = X_layer[mask_low]

    if len(X_high) == 0 or len(X_low) == 0:
        results.append({"layer_idx": layer_idx, "corr_full": np.nan})
        continue

    # High / Low の平均差分ベクトル
    mu_high = X_high.mean(axis=0)
    mu_low  = X_low.mean(axis=0)

    w = mu_high - mu_low
    norm = np.linalg.norm(w)
    if norm < 1e-8:
        results.append({"layer_idx": layer_idx, "corr_full": np.nan})
        continue
    w = w / norm

    # 各サンプルの projection
    proj = X_layer @ w

    # creativity_score との相関
    corr = np.corrcoef(proj, y_scores)[0, 1]

    results.append({
        "layer_idx": layer_idx,
        "corr_full": corr,
    })

df_layers = pd.DataFrame(results)
print("各層ごとの creative_direction × creativity_score の相関:")
display(df_layers.sort_values("corr_full", ascending=False))

In [None]:
import matplotlib.pyplot as plt

# NaN は除いておく
vals = df_layers["corr_full"].dropna()

plt.figure(figsize=(8,4))
plt.bar(df_layers["layer_idx"], df_layers["corr_full"])
plt.xlabel("layer_idx")
plt.ylabel("corr_full")
plt.title("corr_full by layer")
plt.axhline(0, linestyle="--")
plt.show()

# 一番相関が高い層で「どの次元が効いているか」を見る

In [None]:
# 相関が最大の layer_idx を取る
best_row = df_layers.iloc[df_layers["corr_full"].idxmax()]
best_layer_idx = int(best_row["layer_idx"])
print("相関最大の layer_idx:", best_layer_idx, " corr:", best_row["corr_full"])

X_layer = all_gen_layers[:, best_layer_idx, :]  # [N, hidden_dim]

# High / Low を改めて作り直し
high_thresh = np.quantile(y_scores, 0.7)
low_thresh  = np.quantile(y_scores, 0.3)

mask_high = y_scores >= high_thresh
mask_low  = y_scores <= low_thresh

X_high = X_layer[mask_high]
X_low  = X_layer[mask_low]

mu_high = X_high.mean(axis=0)
mu_low  = X_low.mean(axis=0)

w = mu_high - mu_low
w = w / (np.linalg.norm(w) + 1e-8)

# 絶対値が大きい次元トップKを表示
K = 20
top_idx = np.argsort(np.abs(w))[::-1][:K]

print(f"\nlayer {best_layer_idx} の creative_direction で寄与の大きい次元 Top-{K}:")
for rank, dim_idx in enumerate(top_idx, start=1):
    print(f"#{rank:2d}  dim={dim_idx:4d}  weight={w[dim_idx]: .6f}")

# f(x)の定義

In [None]:
import numpy as np
from sklearn.linear_model import LassoCV
from sklearn.model_selection import train_test_split
from sklearn.preprocessing import StandardScaler

# =========================================================
# 0. 前提：all_gen_layers, y_scores がすでにあるとする
#    all_gen_layers: (N, num_layers_plus1, hidden_dim)
#    y_scores      : (N,)
# =========================================================

print("all_gen_layers shape:", all_gen_layers.shape)
print("y_scores shape      :", y_scores.shape)

N, num_layers_plus1, hidden_dim = all_gen_layers.shape

# =========================================================
# 1. どの層を対象にするか決める
#    ここでは「中間層」を例として選ぶ（適宜変えてOK）
# =========================================================
# 例: 中間層
layer_idx = num_layers_plus1 // 2
# 例: 最終層を使いたければ layer_idx = num_layers_plus1 - 1

print(f"Using layer index: {layer_idx}")

# (N, hidden_dim) を取り出す
X_layer = all_gen_layers[:, layer_idx, :]   # shape: (N, hidden_dim)
y = y_scores.astype(np.float32)

print("X_layer shape:", X_layer.shape)

# =========================================================
# 2. train / test に分割
# =========================================================
X_train, X_test, y_train, y_test = train_test_split(
    X_layer, y, test_size=0.2, random_state=42
)

# =========================================================
# 3. 特徴量を標準化（Lasso の安定性向上のため）
# =========================================================
scaler = StandardScaler()
X_train_std = scaler.fit_transform(X_train)
X_test_std  = scaler.transform(X_test)

# =========================================================
# 4. L1 正則化付き線形回帰（LassoCV = α を自動チューニング）
#    k-sparse linear probe の「簡易版」として使う
# =========================================================
lasso = LassoCV(
    alphas=None,      # 自動で候補を作る
    cv=5,             # 5-fold CV
    random_state=42,
    n_jobs=-1,
)

lasso.fit(X_train_std, y_train)

print("Lasso best alpha:", lasso.alpha_)
print("Train R^2:", lasso.score(X_train_std, y_train))
print("Test  R^2:", lasso.score(X_test_std,  y_test))

# =========================================================
# 5. どのニューロン（次元）が重要かを見る
# =========================================================
coef = lasso.coef_      # shape: (hidden_dim,)
abs_coef = np.abs(coef)

# 非ゼロのニューロン数
nonzero_idx = np.where(abs_coef > 0)[0]
print("Number of non-zero neurons:", len(nonzero_idx))

# 上位 k 個を「創造性ニューロン」として見る
k = 50  # 好きな数に調整
topk_idx = np.argsort(-abs_coef)[:k]

print(f"Top-{k} important neuron indices (in this layer):")
print(topk_idx)

print("Their coefficients (w):")
print(coef[topk_idx])

# =========================================================
# 6. 内部報酬として使うときの関数イメージ
# =========================================================
def creativity_internal_reward(h_layer: np.ndarray) -> float:
    """
    h_layer: shape (hidden_dim,) のベクトル
             （この layer_idx の hidden）
    返り値: 内部 creativity 報酬（スカラー）
    """
    # スケーラーで標準化
    h_std = scaler.transform(h_layer.reshape(1, -1))  # (1, hidden_dim)
    # Lasso の線形予測 = GPT-4 creativity の近似
    reward = float(lasso.predict(h_std)[0])
    return reward

# 例: テストデータの1サンプルで試す
sample_h = X_test[0]   # shape: (hidden_dim,)
sample_y = y_test[0]
sample_reward = creativity_internal_reward(sample_h)
print("Sample true score (GPT-4):", sample_y)
print("Sample internal reward   :", sample_reward)

# 報酬関数の具体的な検討案

In [None]:
import numpy as np
import pandas as pd

from sklearn.preprocessing import StandardScaler
from sklearn.cross_decomposition import PLSRegression
from sklearn.linear_model import LassoCV

# ==========================================
# 0. 前処理：基本情報の取得
# ==========================================

# df_full と all_gen_layers が既にある前提
# df_full: pandas.DataFrame
# all_gen_layers: np.ndarray, shape (N, num_layers, hidden_dim)

assert "creativity_score" in df_full.columns, "df_full に 'creativity_score' カラムが必要です"

y = df_full["creativity_score"].to_numpy(dtype=np.float32)  # shape (N,)
N, num_layers, hidden_dim = all_gen_layers.shape
print("N, num_layers, hidden_dim:", N, num_layers, hidden_dim)

# 相関を計算する小ヘルパー
def corr(a, b):
    return float(np.corrcoef(a, b)[0, 1])

# ==========================================
# 1. Method1: 層ごとの creative_direction を重み付き合成した報酬
#    r_int1[i] = Σ_l w_l * <h_i^(l), d^(l)>
# ==========================================

creative_dirs = np.zeros((num_layers, hidden_dim), dtype=np.float32)  # d^(l)
layer_proj = np.zeros((N, num_layers), dtype=np.float32)              # 各層の投影値
layer_corrs = np.zeros(num_layers, dtype=np.float32)

# high/low の分位点は全層共通でOK
high_th = np.quantile(y, 0.7)
low_th  = np.quantile(y, 0.3)
mask_high = y >= high_th
mask_low  = y <= low_th

for l in range(num_layers):
    X_l = all_gen_layers[:, l, :]   # (N, hidden_dim)

    # high / low の平均差分方向
    mu_high = X_l[mask_high].mean(axis=0)
    mu_low  = X_l[mask_low].mean(axis=0)
    d_l = mu_high - mu_low
    norm = np.linalg.norm(d_l) + 1e-8
    d_l = d_l / norm

    creative_dirs[l] = d_l

    # 各サンプル i の投影値: proj_i = <h_i^(l), d_l>
    proj_l = X_l @ d_l  # (N,)
    layer_proj[:, l] = proj_l

    # 層ごとの相関
    layer_corrs[l] = corr(proj_l, y)

print("=== Method1: 層ごとの相関 ===")
for l in range(num_layers):
    print(f"layer {l:2d}: corr = {layer_corrs[l]:.4f}")

# 負の相関は切り捨てて、正の層だけ重みとして使う方が無難
w = np.maximum(layer_corrs, 0.0)
if w.sum() > 0:
    w = w / w.sum()
else:
    # 全部 <=0 の場合は絶対値で正規化
    w = np.abs(layer_corrs)
    w = w / (w.sum() + 1e-8)

print("Method1: 重み w（先頭10個）:", w[:10])

# サンプルごとの aggregated reward
reward_dir_agg = (layer_proj * w[None, :]).sum(axis=1)  # (N,)

df_full["reward_dir_agg"] = reward_dir_agg
print("Method1: corr(reward_dir_agg, creativity_score) =",
      corr(reward_dir_agg, y))

# ==========================================
# 2. Method2: 全層フラット + PLSRegression による
#    グローバル創造性方向（dense）
#    r_int2[i] = PLS(X_flat[i])（標準化済み）
# ==========================================

# (N, num_layers * hidden_dim) にフラット化
X_flat = all_gen_layers.reshape(N, num_layers * hidden_dim)

# 特徴量・ターゲットの標準化
scaler_X = StandardScaler()
X_scaled = scaler_X.fit_transform(X_flat)  # (N, D)

y_2d = y.reshape(-1, 1)
scaler_y = StandardScaler()
y_scaled = scaler_y.fit_transform(y_2d).ravel()  # 平均0, 分散1

# PLSRegression で n_components=1
pls = PLSRegression(n_components=1)
pls.fit(X_scaled, y_scaled)

y_pred_pls = pls.predict(X_scaled).ravel()  # (N,)

df_full["reward_pls_global"] = y_pred_pls

print("Method2: PLSRegression")
print("  corr(y_pred_pls, creativity_score)        =", corr(y_pred_pls, y))
print("  corr(y_pred_pls, y_scaled) (形のチェック) =", corr(y_pred_pls, y_scaled))

# ==========================================
# 3. Method3: 全層フラット + LassoCV による
#    グローバル sparse linear probe
#    r_int3[i] = Lasso(X_scaled[i])
# ==========================================

lasso = LassoCV(
    alphas=None,     # 自動
    cv=5,
    random_state=42,
    n_jobs=-1,
)
lasso.fit(X_scaled, y_scaled)

y_pred_lasso = lasso.predict(X_scaled)  # (N,)

df_full["reward_lasso_global"] = y_pred_lasso

num_nonzero = np.sum(lasso.coef_ != 0)
print("Method3: LassoCV")
print("  best alpha:", lasso.alpha_)
print("  non-zero features:", num_nonzero, "/", lasso.coef_.shape[0])
print("  corr(y_pred_lasso, creativity_score)        =", corr(y_pred_lasso, y))
print("  corr(y_pred_lasso, y_scaled) (形のチェック) =", corr(y_pred_lasso, y_scaled))

# ==========================================
# 4. 結果のざっくりサマリ
# ==========================================
print("\n=== Summary ===")
print("Method1 (layer-wise dir agg)   corr =", corr(df_full["reward_dir_agg"], y))
print("Method2 (PLS global direction) corr =", corr(df_full["reward_pls_global"], y))
print("Method3 (Lasso sparse global)  corr =", corr(df_full["reward_lasso_global"], y))

# 必要なら df_full.to_csv("df_with_internal_rewards.csv", index=False) などで保存

# Lasso RM

In [None]:
import numpy as np
import pandas as pd

from sklearn.model_selection import train_test_split
from sklearn.preprocessing import StandardScaler
from sklearn.linear_model import LassoCV
from sklearn.metrics import r2_score, mean_absolute_error, mean_squared_error

# ==========================================
# 0. 前提チェック
# ==========================================

assert "creativity_score" in df_full.columns, "df_full に 'creativity_score' カラムが必要です"
assert isinstance(all_gen_layers, np.ndarray), "all_gen_layers は np.ndarray である必要があります"

N, num_layers, hidden_dim = all_gen_layers.shape
print(f"N={N}, num_layers={num_layers}, hidden_dim={hidden_dim}")

# 目的変数
y = df_full["creativity_score"].to_numpy(dtype=np.float32)  # shape (N,)

# ヘルパー：相関
def corr(a, b):
    return float(np.corrcoef(a, b)[0, 1])

# ==========================================
# 1. 特徴量フラット化
#    all_gen_layers: (N, L, D) → X: (N, L*D)
# ==========================================

X = all_gen_layers.reshape(N, num_layers * hidden_dim)  # (N, D_flat)
print("X shape:", X.shape)

# ==========================================
# 2. train/test 分割（インデックスも保存）
# ==========================================

TEST_SIZE = 0.2
RANDOM_STATE = 42

indices = np.arange(N)
idx_train, idx_test = train_test_split(
    indices,
    test_size=TEST_SIZE,
    random_state=RANDOM_STATE,
)

X_train = X[idx_train]
X_test  = X[idx_test]
y_train = y[idx_train]
y_test  = y[idx_test]

print("Train size:", X_train.shape[0], " Test size:", X_test.shape[0])

# ==========================================
# 3. 標準化（X, y ともに train のみで fit）
# ==========================================

scaler_X = StandardScaler()
X_train_std = scaler_X.fit_transform(X_train)
X_test_std  = scaler_X.transform(X_test)

scaler_y = StandardScaler()
y_train_std = scaler_y.fit_transform(y_train.reshape(-1, 1)).ravel()

# ==========================================
# 4. LassoCV の学習（sparse global RM）
# ==========================================

lasso = LassoCV(
    alphas=None,      # 自動選択
    cv=5,             # 5-fold CV
    random_state=RANDOM_STATE,
    n_jobs=-1,
    max_iter=5000,
)

lasso.fit(X_train_std, y_train_std)

print("\n=== LassoCV fit done ===")
print("best alpha:", lasso.alpha_)

# ==========================================
# 5. train/test の予測 & 元スケールへの逆変換
# ==========================================

y_train_pred_std = lasso.predict(X_train_std)         # 標準化空間
y_test_pred_std  = lasso.predict(X_test_std)

# 元スケールへ戻す
y_train_pred = scaler_y.inverse_transform(
    y_train_pred_std.reshape(-1, 1)
).ravel()
y_test_pred = scaler_y.inverse_transform(
    y_test_pred_std.reshape(-1, 1)
).ravel()

# ==========================================
# 6. 評価指標の計算
# ==========================================

metrics = {}

metrics["corr_train"] = corr(y_train_pred, y_train)
metrics["corr_test"]  = corr(y_test_pred,  y_test)

metrics["r2_train"] = r2_score(y_train, y_train_pred)
metrics["r2_test"]  = r2_score(y_test,  y_test_pred)

metrics["mae_train"] = mean_absolute_error(y_train, y_train_pred)
metrics["mae_test"]  = mean_absolute_error(y_test,  y_test_pred)

metrics["rmse_train"] = np.sqrt(mean_squared_error(y_train, y_train_pred))
metrics["rmse_test"]  = np.sqrt(mean_squared_error(y_test,  y_test_pred))
print("\n=== Evaluation (Lasso global RM) ===")
for k, v in metrics.items():
    print(f"{k:>10}: {v:.4f}")

# ==========================================
# 7. 予測を df_full に格納（rm_lasso_pred 列）
#    インデックスを対応させる
# ==========================================

df_full = df_full.copy()  # もともとの df_full を壊さないようにコピー

df_full["rm_lasso_pred"] = np.nan
df_full.loc[idx_train, "rm_lasso_pred"] = y_train_pred
df_full.loc[idx_test,  "rm_lasso_pred"] = y_test_pred

print("\nSample of df_full[['creativity_score', 'rm_lasso_pred']]:")
print(df_full[["creativity_score", "rm_lasso_pred"]].head())

# ==========================================
# 8. スパース性の解析（どの層のどの次元が効いているか）
# ==========================================

coef = lasso.coef_  # shape: (num_layers * hidden_dim,)

nonzero_idx = np.where(coef != 0)[0]
num_nonzero = nonzero_idx.shape[0]

print("\n=== Sparsity info ===")
print("non-zero features:", num_nonzero, "/", coef.size)

# 層ごとの non-zero 数をカウント
layer_counts = {}
for k in nonzero_idx:
    layer = k // hidden_dim
    dim   = k % hidden_dim
    layer_counts[layer] = layer_counts.get(layer, 0) + 1

print("non-zero coeff per layer (layer: count):")
for l in sorted(layer_counts.keys()):
    print(f"  layer {l:2d}: {layer_counts[l]}")

# 重要度上位を見たい場合（任意）
top_k = 20
abs_coef = np.abs(coef)
top_idx = np.argsort(-abs_coef)[:top_k]

print(f"\nTop-{top_k} important neurons (global index → (layer, dim, weight)):")
for rank, k in enumerate(top_idx, start=1):
    layer = k // hidden_dim
    dim   = k % hidden_dim
    w_k   = coef[k]
    print(f"#{rank:2d} flat={k:6d}  layer={layer:2d}  dim={dim:4d}  weight={w_k:+.5f}")

# ==========================================
# 9. 報酬として使いやすいように、標準化済みスコアも持っておく
#    （PPO でそのまま reward にしてもよい形）
# ==========================================

# y_pred_std_full を df_full と整合する形で格納したい場合
y_pred_std_full = np.empty(N, dtype=np.float32)
y_pred_std_full[idx_train] = y_train_pred_std
y_pred_std_full[idx_test]  = y_test_pred_std

df_full["rm_lasso_pred_std"] = y_pred_std_full

print("\nSummary of rm_lasso_pred (original scale):")
print(df_full["rm_lasso_pred"].describe())

print("\nSummary of rm_lasso_pred_std (z-score like):")
print(df_full["rm_lasso_pred_std"].describe())

# 必要なら保存
# df_full.to_csv("df_with_lasso_rm.csv", index=False)

# ==========================================
# 10. 後で PPO などから呼べるような状態を一つにまとめる例
# ==========================================

lasso_rm_state = {
    "num_layers": num_layers,
    "hidden_dim": hidden_dim,
    "scaler_X_mean": scaler_X.mean_.astype(np.float32),
    "scaler_X_scale": scaler_X.scale_.astype(np.float32),
    "scaler_y_mean": float(scaler_y.mean_[0]),
    "scaler_y_scale": float(scaler_y.scale_[0]),
    "coef": coef.astype(np.float32),
    "intercept": float(lasso.intercept_),
}

# 例: joblib で保存したければ
# import joblib
# joblib.dump(lasso_rm_state, "lasso_creativity_rm_state.joblib")

In [None]:
torch.save(lasso_rm_state, "lasso_creativity_rm_state.pt")