In [None]:
import sys
import os
import torch
import json
import pickle
import numpy as np
from typing import Any
import builtins
from unittest.mock import MagicMock, patch
from torch import nn
from scipy.sparse import load_npz

# Add src to sys.path
current_dir = os.getcwd()
src_path = os.path.join(current_dir, 'src')
if src_path not in sys.path:
    sys.path.append(src_path)

# Imports from src
from model import (
    UserItemMemory,
    DynamicDPELLM4RecBaseModel,
    DynamicCollaborativeGPTwithItemRecommendHead,
    MSEDynamicDPELLM4RecBaseModel,
    EmbeddingMapper
)
from tokenizer import DynamicBPETokenizerBatch
from transformers import GPT2Config, AutoModel, AutoModelForSequenceClassification

In [None]:
# Configuration and Paths
root_dir = current_dir
dataset = "Beauty"
# Path to your weight file
rec_model_path = r"C:\Users\vulam\Downloads\Master_papers\code\cold_item\MI4Rec\model\Beauty\rec\rec_model_0.1_300_0.2_stella_0.001.pth"
data_root = os.path.join(root_dir, "dataset", dataset)
model_name = "gpt2" # Assumed base model

# Parameters inferred from filename / user request
lambda_V = 0.1
num_meta = 300
cold_start = 0.2
item_logits_infer = "stella"
lr = 0.001
device = "cuda" if torch.cuda.is_available() else "cpu"

print(f"Device: {device}")

In [None]:
# 1. Load Data to get shapes
train_mat_path = os.path.join(data_root, "warm_train_matrix.npz")
if not os.path.exists(train_mat_path):
    # Try alternative path if structure is different
    train_mat_path = os.path.join(data_root, "0.8", "warm_train_matrix.npz") # 1-0.2=0.8

print(f"Loading train matrix from: {train_mat_path}")
train_mat = load_npz(train_mat_path)
num_users, num_items = train_mat.shape
print(f"Num Users: {num_users}, Num Items: {num_items}")

In [None]:
# 2. Inspect Checkpoint to get Missing Dimensions (for stella embeddings)
print(f"Loading checkpoint state dict from {rec_model_path}...")
state_dict = torch.load(rec_model_path, map_location=device)

# We need to know the shape of 'base_model.item_src_embs.weight' to create a dummy initialization
item_src_embs_weight = state_dict.get('base_model.item_src_embs.weight')
if item_src_embs_weight is not None:
    stella_num_items, stella_emb_dim = item_src_embs_weight.shape
    print(f"Found item_src_embs in checkpoint: shape=({stella_num_items}, {stella_emb_dim})")
else:
    print("WARNING: item_src_embs.weight not found in checkpoint!")
    stella_emb_dim = 1024 # Fallback guess or check error
    stella_num_items = num_items

In [None]:
# 3. Mock pickle.load to bypass missing file '/shared/user/embs/...'
# The model init calls: pickle.load(open(..., 'rb'))
# We will mock 'open' and 'pickle.load' specifically for this call.

original_open = builtins.open
original_pickle_load = pickle.load

class MockFile:
    def __enter__(self): return self
    def __exit__(self, *args): pass
    def read(self): return b""

def mocked_open(file, mode='r', *args, **kwargs):
    if "item_review_embeddings.pkl" in str(file):
        print(f"Intercepted open for: {file}")
        return MockFile()
    return original_open(file, mode, *args, **kwargs)

def mocked_pickle_load(file_obj, *args, **kwargs):
    if isinstance(file_obj, MockFile):
        print("Intercepted pickle.load, returning dummy embeddings")
        # Return dummy numpy array with correct shape
        return np.random.rand(stella_num_items, stella_emb_dim).astype(np.float32)
    return original_pickle_load(file_obj, *args, **kwargs)

# Activate mocks
builtins.open = mocked_open
pickle.load = mocked_pickle_load

In [None]:
# 4. Initialize Model components

# Config
config = GPT2Config.from_pretrained('gpt2')
config.num_users = num_users
config.num_items = num_items
config.n_embd = config.hidden_size

# LLM Model (GPT2)
# Assuming internet access or cached model. 
# If offline, point to: os.path.join(root_dir, "model", "pretrained", "gpt2")
llm_model_path = 'gpt2'
local_gpt2 = os.path.join(root_dir, "model", "pretrained", "gpt2")
if os.path.exists(local_gpt2):
    llm_model_path = local_gpt2

print(f"Loading GPT2 from: {llm_model_path}")
LLMmodel = AutoModel.from_pretrained(llm_model_path)

# Memory (Empty is fine for weight loading, but class needs it)
memory = UserItemMemory()

# Base Classifier (BERT)
# Used for 'classifier' mode, but constructor might require it even for 'stella' if not careful.
# The code in model.py init:
# self.meta_logits_classifier = meta_logits_classifier.to(device) (unconditional assignment)
# So we need to pass something valid or a mock.
# Since we use 'stella', logic later: if item_logits_infer == 'stella': ...
# It doesn't use meta_logits_classifier in 'stella' mode initialization logic (lines 201-206).
# But it is assigned to self.meta_logits_classifier.
# We will pass a dummy mock to avoid loading BERT.
dummy_classifier = MagicMock()
dummy_classifier.to.return_value = dummy_classifier

print("Initializing MSEDynamicDPELLM4RecBaseModel...")
base_model = MSEDynamicDPELLM4RecBaseModel(
    config,
    LLMmodel,
    memory,
    meta_logits_tokenizer=None,
    meta_logits_classifier=dummy_classifier,
    device=device,
    num_item_meta=num_meta,
    item_logits_infer=item_logits_infer,
    dataset_name=dataset
)

# Wrap in Recommend Head
print("Initializing DynamicCollaborativeGPTwithItemRecommendHead...")
rec_model = DynamicCollaborativeGPTwithItemRecommendHead(config, base_model, device=device)

# Restore original open/pickle
builtins.open = original_open
pickle.load = original_pickle_load
print("Mocks removed.")

In [None]:
# 5. Load Weights
print("Loading state dict into model...")
rec_model.load_state_dict(state_dict, strict=False)
rec_model.eval()
print("Model loaded successfully!")

In [None]:
# 6. Extract Components
# - user embedding
# - item embedding (base_model.item_src_embs)
# - token-meta embedding (base_model.meta_item_embeddings)
# - MLP to compute logits (base_model.item_emb_mapper)

user_embeddings = rec_model.base_model.user_embeddings.weight.data
item_embeddings = rec_model.base_model.item_src_embs.weight.data
token_meta_embeddings = rec_model.base_model.meta_item_embeddings.weight.data
mlp_model = rec_model.base_model.item_emb_mapper

print("\n--- Extracted Components ---")
print(f"User Embeddings Shape: {user_embeddings.shape}")
print(f"Item Embeddings (Source/Stella) Shape: {item_embeddings.shape}")
print(f"Token-Meta Embeddings Shape: {token_meta_embeddings.shape}")
print(f"MLP Model: {mlp_model}")

# Verify MLP Computation
# logits = mlp(item_embedding) -> scaled to meta size
# weighted_emb = probs(logits) @ meta_embeddings

with torch.no_grad():
    test_item_emb = item_embeddings[0].unsqueeze(0)
    logits = mlp_model(test_item_emb)
    print(f"Test MLP Logits Shape: {logits.shape}")  # Should be (1, num_meta)


## Content-Based Evaluation

In [None]:
from sklearn.metrics.pairwise import cosine_similarity
import scipy.sparse as sp

# File Paths defined by User
TEST_MATRIX_PATH = r"C:\Users\vulam\Downloads\Master_papers\code\cold_item\MI4Rec\dataset\beauty\cold_item_test_matrix.npz"
ITEM_EMBS_PATH = r"C:\Users\vulam\Downloads\Master_papers\code\cold_item\MI4Rec\dataset\beauty\Beauty_item_review_embeddings_full.pkl"
WARM_TRAIN_MATRIX_PATH = r"C:\Users\vulam\Downloads\Master_papers\code\cold_item\MI4Rec\dataset\beauty\warm_train_matrix.npz"

print("--- Loading Evaluation Datasets ---")

# 1. Load Ground Truth (Cold Item Test Matrix)
print(f"Loading Test Matrix: {TEST_MATRIX_PATH}")
test_matrix = load_npz(TEST_MATRIX_PATH)
print(f"Test Matrix Shape (Users x Items): {test_matrix.shape}")

# 2. Load Item Embeddings
print(f"Loading Item Embeddings: {ITEM_EMBS_PATH}")
with open(ITEM_EMBS_PATH, "rb") as f:
    # These are likely numpy arrays of shape (NumItems, EmbDim)
    all_item_embeddings = pickle.load(f)
print(f"All Item Embeddings Shape: {all_item_embeddings.shape}")

# 3. Load User History (Warm Train Matrix)
print(f"Loading Warm Train Matrix: {WARM_TRAIN_MATRIX_PATH}")
warm_train_matrix = load_npz(WARM_TRAIN_MATRIX_PATH)
print(f"Warm Train Matrix Shape: {warm_train_matrix.shape}")
num_users_train, num_items_train = warm_train_matrix.shape

# --- Evaluation Logic ---

def calculate_recall_at_k(predictions, ground_truth, k=20):
    """
    predictions: list of lists or 2D array, top K item indices for each user
    ground_truth: list of lists or 2D sparse row, ground truth item indices for each user
    """
    recalls = []
    
    # Convert sparse matrix to list of lists for easier iteration if needed, 
    # or iterate rows efficiently.
    
    for user_idx in range(len(predictions)):
        pred_items = set(predictions[user_idx][:k])
        
        # Handle sparse matrix ground truth
        if sp.issparse(ground_truth):
            true_items = set(ground_truth[user_idx].indices)
        else:
            true_items = set(ground_truth[user_idx])
        
        if len(true_items) == 0:
            continue
            
        hits = len(pred_items & true_items)
        recalls.append(hits / len(true_items))
        
    return np.mean(recalls)

print("\n--- Computing User Embeddings and Evaluating ---")

# We only evaluate users who exist in the test matrix row-space.
# Assuming the row indices in test_matrix correspond to the same users in warm_train_matrix.
# (Usually indices are consistent 0..N-1).

batch_size = 100
num_test_users = test_matrix.shape[0]
k = 20

all_recalls = []

# Pre-normalize item embeddings for fast cosine similarity
# Cosine Sim(A, B) = Dot(Norm(A), Norm(B))
norm_item_embeddings = all_item_embeddings / np.linalg.norm(all_item_embeddings, axis=1, keepdims=True)
norm_item_embeddings = np.nan_to_num(norm_item_embeddings) # Handle potential zero vectors

for start_idx in range(0, num_test_users, batch_size):
    end_idx = min(start_idx + batch_size, num_test_users)
    user_indices = np.arange(start_idx, end_idx)
    
    # Get history for this batch of users
    # user_history_batch is (BatchSize, NumItems), sparse
    user_history_batch = warm_train_matrix[user_indices]
    
    batch_user_embeddings = []
    
    # For each user in batch, average the embeddings of their interacted items
    # Optimized: Batch Matrix Multiplication is possible if we treat sparse matrix as weights
    # UserEmb = (HistoryMatrix @ ItemMatrix) / Count
    
    # 1. Sum of item embeddings
    # Shape: (BatchSize, NumItems) * (NumItems, EmbDim) -> (BatchSize, EmbDim)
    batch_user_sums = user_history_batch @ all_item_embeddings
    
    # 2. Counts (interactions per user)
    user_counts = np.array(user_history_batch.sum(axis=1)).flatten()
    user_counts[user_counts == 0] = 1 # Avoid division by zero
    
    # 3. Average
    batch_user_embs = batch_user_sums / user_counts[:, np.newaxis]
    
    # Normalize user embeddings
    batch_user_embs_norm = batch_user_embs / np.linalg.norm(batch_user_embs, axis=1, keepdims=True)
    batch_user_embs_norm = np.nan_to_num(batch_user_embs_norm)
    
    # Compute Similarity scores: (BatchSize, EmbDim) @ (EmbDim, NumItems) -> (BatchSize, NumItems)
    scores = batch_user_embs_norm @ norm_item_embeddings.T
    
    # Masking: We usually want to recommend COLD items, or ALL items?
    # The request implies "cosine similarity with all item embedding".
    # Typically in RecSys, we mask out training items, but if targets are strictly cold items
    # and we look at all items, the cold items should bubble up if relevant.
    # However, to be strict, we might want to mask items already seen in training.
    # (Optional: user_history_batch > 0 -> -inf)
    # Since we are evaluating recall on specific test items, we usually just rank all and check hits.
    
    # Get Top-K indices
    # argpartition is faster than sort for top-k
    # We want indices of highest scores
    top_k_indices = np.argpartition(-scores, k, axis=1)[:, :k]
    
    # Sort within top k for exact ranking (though for Recall set intersection it doesn't matter, 
    # but argpartition doesn't guarantee order inside top k)
    # For Normalized Recall@K, set intersection is enough.
    
    # Evaluate Batch
    batch_recalls = []
    test_interactions_batch = test_matrix[user_indices]
    
    for i in range(len(user_indices)):
        pred_indices = top_k_indices[i]
        true_indices = test_interactions_batch[i].indices
        
        if len(true_indices) > 0:
            hits = np.intersect1d(pred_indices, true_indices).size
            batch_recalls.append(hits / len(true_indices))
            
    all_recalls.extend(batch_recalls)

final_recall_20 = np.mean(all_recalls)
print(f"\nFinal Content-Based Recall@20: {final_recall_20:.4f}")

## Refined Content-Based Evaluation (MLP + Meta-Item Embeddings)

In [None]:
print("--- Computing Refined Item Embeddings ---")

# 1. Convert Raw Embeddings to Tensor
raw_item_embs_tensor = torch.tensor(all_item_embeddings, dtype=torch.float32).to(device)

# 2. Calculate New Content Embeddings via Model Pipeline

# Ensure models are in evaluation mode
mlp_model.eval()

with torch.no_grad():
    # A. Pass through MLP to get Logits (n_items, n_meta)
    # mlp_model (EmbeddingMapper) is likely (EmbDim -> ... -> NumMeta)
    item_logits = mlp_model(raw_item_embs_tensor)
    print(f"Item Logits Shape: {item_logits.shape}")
    
    # B. Apply Softmax to get probabilities (Weights)
    # Consistent with model.py get_probs(self, logits) with prob_norm='softmax'
    item_probs = torch.nn.functional.softmax(item_logits, dim=-1)
    
    # C. Multiply with Meta-Item Embeddings
    # token_meta_embeddings is a Tensor (NumMeta, EmbDim)
    # weighted_item_embeddings = item_probs @ meta_item_embeddings
    # Shape: (NumItems, NumMeta) @ (NumMeta, EmbDim) -> (NumItems, EmbDim)
    if not isinstance(token_meta_embeddings, torch.Tensor):
        token_meta_embeddings = torch.tensor(token_meta_embeddings).to(device)
    
    refined_item_embeddings = torch.matmul(item_probs, token_meta_embeddings)
    print(f"Refined Item Embeddings Shape: {refined_item_embeddings.shape}")

# Convert back to Numpy for evaluation loop
refined_item_embeddings_np = refined_item_embeddings.cpu().numpy()

print("--- Evaluating with Refined Embeddings ---")

# Reuse Evaluation Loop logic, but with refined_item_embeddings_np

all_recalls_refined = []

# Normalized Refined Item Embeddings
norm_refined_item_embeddings = refined_item_embeddings_np / np.linalg.norm(refined_item_embeddings_np, axis=1, keepdims=True)
norm_refined_item_embeddings = np.nan_to_num(norm_refined_item_embeddings)

for start_idx in range(0, num_test_users, batch_size):
    end_idx = min(start_idx + batch_size, num_test_users)
    user_indices = np.arange(start_idx, end_idx)
    
    # Input User History is still the same warm interactions
    user_history_batch = warm_train_matrix[user_indices]
    
    # Calculate User Embeddings using REFINED item embeddings
    # BatchUserSums = History @ RefinedItemMatrix
    batch_user_sums = user_history_batch @ refined_item_embeddings_np
    
    user_counts = np.array(user_history_batch.sum(axis=1)).flatten()
    user_counts[user_counts == 0] = 1
    
    batch_user_embs = batch_user_sums / user_counts[:, np.newaxis]
    
    # Norm
    batch_user_embs_norm = batch_user_embs / np.linalg.norm(batch_user_embs, axis=1, keepdims=True)
    batch_user_embs_norm = np.nan_to_num(batch_user_embs_norm)
    
    # Scores: Users @ RefinedItems.T
    scores = batch_user_embs_norm @ norm_refined_item_embeddings.T
    
    # Top K
    top_k_indices = np.argpartition(-scores, k, axis=1)[:, :k]
    
    # Evaluate against SAME ground truth
    batch_recalls = []
    test_interactions_batch = test_matrix[user_indices]
    
    for i in range(len(user_indices)):
        pred_indices = top_k_indices[i]
        true_indices = test_interactions_batch[i].indices
        
        if len(true_indices) > 0:
            hits = np.intersect1d(pred_indices, true_indices).size
            batch_recalls.append(hits / len(true_indices))
            
    all_recalls_refined.extend(batch_recalls)

final_recall_20_refined = np.mean(all_recalls_refined)
print(f"\nFinal Refined Content-Based Recall@20: {final_recall_20_refined:.4f}")

## Evaluate Warm Test Data using Model Embeddings

In [None]:
WARM_TEST_MATRIX_PATH = r"C:\Users\vulam\Downloads\Master_papers\code\cold_item\MI4Rec\dataset\beauty\warm_test_matrix.npz"

print("--- Evaluating Warm Test Data with Model Learned Embeddings ---")

# 1. Load Warm Test Matrix
print(f"Loading Warm Test Matrix: {WARM_TEST_MATRIX_PATH}")
warm_test_matrix = load_npz(WARM_TEST_MATRIX_PATH)
print(f"Warm Test Matrix Shape: {warm_test_matrix.shape}")

# 2. Use Model Embeddings directly
# user_embeddings: Learned User Embeddings from model (NumUsers, EmbDim) (Tensor)
# refined_item_embeddings_np: Refined Item Embeddings (NumItems, EmbDim) (Numpy)

if isinstance(user_embeddings, torch.Tensor):
    user_embeddings_np = user_embeddings.cpu().numpy()
else:
    user_embeddings_np = user_embeddings

# Normalize for Cosine Similarity
user_embeddings_norm = user_embeddings_np / np.linalg.norm(user_embeddings_np, axis=1, keepdims=True)
user_embeddings_norm = np.nan_to_num(user_embeddings_norm)

norm_refined_item_embeddings = refined_item_embeddings_np / np.linalg.norm(refined_item_embeddings_np, axis=1, keepdims=True)
norm_refined_item_embeddings = np.nan_to_num(norm_refined_item_embeddings)

all_warm_recalls = []
num_users = warm_test_matrix.shape[0]

for start_idx in range(0, num_users, batch_size):
    end_idx = min(start_idx + batch_size, num_users)
    user_indices = np.arange(start_idx, end_idx)
    
    # Batch User Embeddings
    batch_user_embs = user_embeddings_norm[user_indices]
    
    # Calculate Scores
    scores = batch_user_embs @ norm_refined_item_embeddings.T
    
    # Top K
    top_k_indices = np.argpartition(-scores, k, axis=1)[:, :k]
    
    # Evaluation Match
    warm_test_batch = warm_test_matrix[user_indices]
    
    batch_recalls = []
    for i in range(len(user_indices)):
        pred_indices = top_k_indices[i]
        true_indices = warm_test_batch[i].indices
        
        if len(true_indices) > 0:
            hits = np.intersect1d(pred_indices, true_indices).size
            batch_recalls.append(hits / len(true_indices))
            
    all_warm_recalls.extend(batch_recalls)

final_warm_recall_20 = np.mean(all_warm_recalls)
print(f"\nFinal Warm Test Recall@20 (Model Embeddings): {final_warm_recall_20:.4f}")