In [1]:
# SeqRecon-AD-v2: 无监督序列重构异常检测（新完整模型）
# 相对原 Full：finetune embedding + 无 self-clean，异常分数不做账户内 z-score 标准化

import math
import json
import numpy as np
import pandas as pd
from tqdm import tqdm

import torch
import torch.nn as nn
import torch.nn.functional as F
from torch.utils.data import Dataset, DataLoader
from sklearn.model_selection import train_test_split
from sklearn.metrics import roc_auc_score, precision_recall_curve, auc, f1_score, precision_score, recall_score

device = 'cuda' if torch.cuda.is_available() else 'cpu'
print('Using device:', device)

Using device: cuda


In [2]:
# 1. 数据加载与预处理（与 SeqRecon-AD 一致）

card_item = pd.read_csv('card_item.csv')
card_feats = pd.read_csv('card_feats.csv', usecols=['label','card_id','name','身份证号','age'])
dataset = pd.concat([card_item, card_feats], axis=1)

import ast
if isinstance(dataset['明细项目名称'].iloc[0], str):
    dataset['明细项目名称'] = dataset['明细项目名称'].apply(ast.literal_eval)

print('数据规模:', len(dataset))

with open('item2id.json', 'r', encoding='utf-8') as f:
    item2id = json.load(f)
id2item = {v: k for k, v in item2id.items()}
num_items = len(item2id)

def map_items_to_ids(items, item2id):
    return [item2id[item] for item in items if item in item2id]

dataset['明细项目ID'] = dataset['明细项目名称'].apply(lambda x: map_items_to_ids(x, item2id))

train_df, temp_df = train_test_split(dataset, test_size=0.2, random_state=42)
val_df, test_df   = train_test_split(temp_df, test_size=0.5, random_state=42)
print(f"train={len(train_df)}, val={len(val_df)}, test={len(test_df)}")

数据规模: 8917
train=7133, val=892, test=892


In [3]:
# 2. 位置感知 Transformer（无监督 next-item）；v2：embedding 可微调，异常分数不 z-score

class RelativePositionalEncoding(nn.Module):
    def __init__(self, num_heads, max_len=512):
        super().__init__()
        self.rel_pos_table = nn.Parameter(torch.randn(2 * max_len - 1, num_heads))
        self.max_len = max_len

    def forward(self, q_len, k_len):
        range_q = torch.arange(q_len)[:, None]
        range_k = torch.arange(k_len)[None, :]
        distance_mat = range_k - range_q
        distance_mat = distance_mat.clamp(-self.max_len + 1, self.max_len - 1)
        distance_mat += self.max_len - 1
        rel_bias = self.rel_pos_table[distance_mat].permute(2, 0, 1)
        return rel_bias


class RelativeMultiheadAttention(nn.Module):
    def __init__(self, embed_dim, num_heads, dropout=0.2):
        super().__init__()
        self.embed_dim = embed_dim
        self.num_heads = num_heads
        self.dropout = dropout
        self.q_proj = nn.Linear(embed_dim, embed_dim)
        self.k_proj = nn.Linear(embed_dim, embed_dim)
        self.v_proj = nn.Linear(embed_dim, embed_dim)
        self.out_proj = nn.Linear(embed_dim, embed_dim)

    def forward(self, query, key, value, attn_mask=None, key_padding_mask=None, pos_bias=None):
        B, L, D = query.shape
        H, d = self.num_heads, D // self.num_heads
        q = self.q_proj(query).view(B, L, H, d).transpose(1, 2)
        k = self.k_proj(key).view(B, L, H, d).transpose(1, 2)
        v = self.v_proj(value).view(B, L, H, d).transpose(1, 2)
        scores = torch.matmul(q, k.transpose(-2, -1)) / math.sqrt(d)
        if pos_bias is not None:
            scores = scores + pos_bias.unsqueeze(0)
        if attn_mask is not None:
            scores = scores + attn_mask.unsqueeze(0).unsqueeze(0)
        if key_padding_mask is not None:
            scores = scores.masked_fill(key_padding_mask.unsqueeze(1).unsqueeze(2), float('-inf'))
        attn_weights = F.dropout(torch.softmax(scores, dim=-1), p=self.dropout, training=self.training)
        out = torch.matmul(attn_weights, v).transpose(1, 2).contiguous().view(B, L, D)
        return self.out_proj(out)


class CustomTransformerEncoderLayer(nn.Module):
    def __init__(self, d_model, nhead, dropout=0.2):
        super().__init__()
        self.self_attn = RelativeMultiheadAttention(d_model, nhead, dropout=dropout)
        self.linear1 = nn.Linear(d_model, d_model * 4)
        self.dropout = nn.Dropout(dropout)
        self.linear2 = nn.Linear(d_model * 4, d_model)
        self.norm1 = nn.LayerNorm(d_model)
        self.norm2 = nn.LayerNorm(d_model)
        self.dropout1 = nn.Dropout(dropout)
        self.dropout2 = nn.Dropout(dropout)

    def forward(self, src, src_mask=None, src_key_padding_mask=None, pos_bias=None):
        src2 = self.self_attn(src, src, src, attn_mask=src_mask,
                              key_padding_mask=src_key_padding_mask, pos_bias=pos_bias)
        src = src + self.dropout1(src2)
        src = self.norm1(src)
        src = src + self.dropout2(self.linear2(F.relu(self.linear1(src))))
        return self.norm2(src)


class TransformerAnomalyDetectorV2(nn.Module):
    """无监督 v2：finetune embedding + 无 self-clean；异常分数不做账户内 z-score（use_score_norm=False），Top-K 聚合。"""
    def __init__(self, embedding_matrix, d_model=512, nhead=4, num_layers=6, dropout=0.2, pad_idx=0, freeze_embedding=False):
        super().__init__()
        self.d_model = d_model
        self.pad_idx = pad_idx
        self.use_score_norm = False
        self.use_topk_agg = True

        num_items, embedding_dim = embedding_matrix.size()
        emb = (embedding_matrix - embedding_matrix.mean()) / (embedding_matrix.std() + 1e-8)
        self.embedding = nn.Embedding(num_items, embedding_dim)
        self.embedding.weight = nn.Parameter(emb.clone().detach())
        self.embedding.weight.requires_grad = not freeze_embedding

        self.embed_proj = nn.Linear(embedding_dim, d_model)
        self.pos_encoder = RelativePositionalEncoding(num_heads=nhead, max_len=512)
        self.layers = nn.ModuleList([
            CustomTransformerEncoderLayer(d_model=d_model, nhead=nhead, dropout=dropout)
            for _ in range(num_layers)
        ])
        self.final_norm = nn.LayerNorm(d_model)
        self.predictor = nn.Linear(d_model, num_items)
        self._init_weights()

    def _init_weights(self):
        initrange = 0.1
        self.embed_proj.bias.data.zero_()
        self.embed_proj.weight.data.uniform_(-initrange, initrange)
        nn.init.xavier_uniform_(self.predictor.weight)
        self.predictor.bias.data.zero_()

    def generate_mask(self, seq_len, device):
        m = (torch.triu(torch.ones(seq_len, seq_len)) == 1).transpose(0, 1)
        m = m.float().masked_fill(m == 0, float('-inf')).masked_fill(m == 1, 0.0)
        return m.to(device)

    def forward(self, src, src_mask=None):
        B, L = src.size()
        src_emb = self.embedding(src)
        src_emb = self.embed_proj(src_emb) * math.sqrt(self.d_model)
        src_emb = F.layer_norm(src_emb, src_emb.shape[-1:])
        pos_bias = self.pos_encoder(L, L)
        pad_mask = (src == self.pad_idx)
        out = src_emb
        for layer in self.layers:
            out = layer(out, src_mask=src_mask, src_key_padding_mask=pad_mask, pos_bias=pos_bias)
        return self.predictor(self.final_norm(out))

    def compute_loss(self, src, tgt, mask=None):
        seq_len = src.size(1)
        causal_mask = self.generate_mask(seq_len, src.device)
        predictions = self.forward(src, src_mask=causal_mask)[:, :-1, :].contiguous()
        tgt = tgt[:, 1:].contiguous()
        if mask is not None:
            mask = mask[:, 1:].contiguous()
            loss = F.cross_entropy(predictions.view(-1, predictions.size(-1)), tgt.view(-1), reduction='none')
            loss = loss[mask.view(-1) == 1].mean()
        else:
            loss = F.cross_entropy(predictions.view(-1, predictions.size(-1)), tgt.view(-1), ignore_index=self.pad_idx)
        if torch.isnan(loss) or torch.isinf(loss):
            return torch.tensor(0.0, requires_grad=True).to(loss.device)
        return loss

    def compute_anomaly_score(self, sequences, mask=None, topk_ratio=0.2, return_token_level=False):
        with torch.no_grad():
            seq_len = sequences.size(1)
            causal_mask = self.generate_mask(seq_len, sequences.device)
            predictions = self.forward(sequences, src_mask=causal_mask)[:, :-1, :].contiguous()
            targets = sequences[:, 1:].contiguous()
            per_position_loss = F.cross_entropy(
                predictions.view(-1, predictions.size(-1)), targets.view(-1), reduction='none'
            ).view_as(targets)
            if mask is not None:
                mask_cut = mask[:, 1:].contiguous()
                per_position_loss = per_position_loss * mask_cut
            if self.use_score_norm:
                mean = per_position_loss.mean(dim=1, keepdim=True)
                std = per_position_loss.std(dim=1, keepdim=True) + 1e-8
                normalized_loss = (per_position_loss - mean) / std
            else:
                normalized_loss = per_position_loss
            if self.use_topk_agg:
                k = max(1, int(topk_ratio * (seq_len - 1)))
                topk_values, _ = torch.topk(normalized_loss, k=k, dim=1)
                scores = topk_values.mean(dim=1)
            else:
                if mask is not None:
                    scores = (normalized_loss * mask_cut).sum(dim=1) / (mask_cut.sum(dim=1) + 1e-8)
                else:
                    scores = normalized_loss.mean(dim=1)
            if return_token_level:
                return scores.cpu().numpy(), normalized_loss.cpu().numpy()
            return scores.cpu().numpy()

print('TransformerAnomalyDetectorV2 ready.')

TransformerAnomalyDetectorV2 ready.


In [4]:
# 3. Dataset / DataLoader（与 SeqRecon-AD 一致）

class PrescriptionDataset(Dataset):
    def __init__(self, dataframe, max_length=517, pad_idx=0):
        self.data = dataframe.reset_index(drop=True)
        self.max_length = max_length
        self.pad_idx = pad_idx

    def __len__(self):
        return len(self.data)

    def __getitem__(self, idx):
        sequence = self.data.iloc[idx]['明细项目ID']
        if len(sequence) > self.max_length:
            sequence = sequence[:self.max_length]
            original_len = self.max_length
        else:
            original_len = len(sequence)
            sequence = sequence + [self.pad_idx] * (self.max_length - len(sequence))
        mask = [1] * original_len + [0] * (self.max_length - original_len)
        label = self.data.iloc[idx]['label']
        return {
            'input_seq': torch.tensor(sequence, dtype=torch.long),
            'target_seq': torch.tensor(sequence, dtype=torch.long),
            'mask': torch.tensor(mask, dtype=torch.float),
            'label': torch.tensor(label, dtype=torch.float),
        }


def custom_collate_fn(batch):
    return {
        'input_seq': torch.stack([b['input_seq'] for b in batch]),
        'target_seq': torch.stack([b['target_seq'] for b in batch]),
        'mask': torch.stack([b['mask'] for b in batch]),
        'label': torch.stack([b['label'] for b in batch]),
    }

train_dataset = PrescriptionDataset(train_df)
val_dataset   = PrescriptionDataset(val_df)
test_dataset  = PrescriptionDataset(test_df)
batch_size = 128
train_loader = DataLoader(train_dataset, batch_size=batch_size, shuffle=True,  collate_fn=custom_collate_fn)
val_loader   = DataLoader(val_dataset,   batch_size=batch_size, shuffle=False, collate_fn=custom_collate_fn)
test_loader  = DataLoader(test_dataset,  batch_size=batch_size, shuffle=False, collate_fn=custom_collate_fn)

In [5]:
# 4. 评估函数：Recall@10 / NDCG@10 & 异常检测（AUC, PR-AUC, F1）

def evaluate_retrieval(model, data_loader, device, k=10):
    model.eval()
    total_recall, total_ndcg, total_cnt = 0.0, 0.0, 0
    with torch.no_grad():
        for batch in data_loader:
            input_seq = batch['input_seq'].to(device)
            target_seq = batch['target_seq'].to(device)
            mask = batch['mask'].to(device)
            B, L = input_seq.size()
            causal_mask = model.generate_mask(L, device)
            logits = model(input_seq, src_mask=causal_mask)
            last_logits = logits[:, -1, :].clone()
            last_logits[:, 0] = -float('inf')
            _, topk_indices = torch.topk(last_logits, k=k, dim=-1)
            lengths = mask.sum(dim=1).long()
            next_indices = (lengths - 1).clamp(min=0)
            next_item = target_seq.gather(1, next_indices.view(-1, 1)).squeeze(1)
            gt = next_item.cpu().numpy()
            pred = topk_indices.cpu().numpy()
            for g, p in zip(gt, pred):
                if np.any(p == g):
                    total_recall += 1.0
                    rank = np.where(p == g)[0][0] + 1
                    total_ndcg += 1.0 / math.log2(rank + 1)
                total_cnt += 1
    return total_recall / total_cnt, total_ndcg / total_cnt


def evaluate_model_top(model, loader, device, top_percent=0.2):
    """按固定比例 top_percent 打标异常，返回 AUC/PR-AUC/F1 等"""
    all_scores, all_labels = [], []
    with torch.no_grad():
        for batch in tqdm(loader, desc='Eval'):
            scores = model.compute_anomaly_score(batch['input_seq'].to(device), batch['mask'].to(device), topk_ratio=0.2)
            scores = np.nan_to_num(scores, nan=0.0)
            all_scores.extend(scores)
            all_labels.extend(batch['label'].numpy())
    all_scores = np.asarray(all_scores)
    all_labels = np.asarray(all_labels)
    cutoff = np.percentile(all_scores, 100 * (1 - top_percent))
    pred_labels = (all_scores >= cutoff).astype(int)
    prec, rec, _ = precision_recall_curve(all_labels, all_scores)
    return {
        'scores': all_scores,
        'labels': all_labels,
        'auc': roc_auc_score(all_labels, all_scores),
        'pr_auc': auc(rec, prec),
        'cutoff': cutoff,
        'top_percent': top_percent,
        'f1': f1_score(all_labels, pred_labels, zero_division=0),
        'precision': precision_score(all_labels, pred_labels, zero_division=0),
        'recall': recall_score(all_labels, pred_labels, zero_division=0),
    }

In [6]:
# 5. 训练循环：无 self-clean，仅 next-item 损失 + 按 Val Recall@k 早停

def train_no_cleaning(model, train_loader, val_loader, device, max_epochs=150, patience=5, batch_size=128, k=10,
                      eval_fn=evaluate_retrieval, save_path='seqrecon_v2_best.pt'):
    optimizer = torch.optim.Adam(model.parameters(), lr=1e-4, betas=(0.9, 0.98), eps=1e-9)
    scheduler = torch.optim.lr_scheduler.StepLR(optimizer, step_size=3, gamma=0.9)
    best_recall, best_ndcg, best_result, epochs_no_improve = 0.0, 0.0, None, 0
    for epoch in range(max_epochs):
        model.train()
        total_loss, total_batches = 0.0, 0
        for batch in tqdm(train_loader, desc=f'Epoch {epoch+1}/{max_epochs}'):
            input_seq = batch['input_seq'].to(device)
            target_seq = batch['target_seq'].to(device)
            mask = batch['mask'].to(device)
            optimizer.zero_grad()
            loss = model.compute_loss(input_seq, target_seq, mask)
            loss.backward()
            torch.nn.utils.clip_grad_norm_(model.parameters(), max_norm=1.0)
            optimizer.step()
            total_loss += loss.item()
            total_batches += 1
        scheduler.step()
        avg_loss = total_loss / total_batches
        model.eval()
        recall_k, ndcg_k = eval_fn(model, val_loader, device, k=k)
        print(f'Epoch {epoch+1} Loss: {avg_loss:.4f} Val R@{k}: {recall_k:.4f} NDCG@{k}: {ndcg_k:.4f}')
        if recall_k > best_recall:
            best_recall, best_ndcg = recall_k, ndcg_k
            best_result = {'epoch': epoch+1, 'avg_loss': avg_loss, 'recall': recall_k, 'ndcg': ndcg_k}
            torch.save(model.state_dict(), save_path)
            epochs_no_improve = 0
        else:
            epochs_no_improve += 1
            if epochs_no_improve >= patience:
                print(f'Early stop at epoch {epoch+1}.')
                break
    model.best_result = best_result
    return model

In [7]:
# 6. 训练入口（finetune embedding + 无 self-clean）

embedding_dim = 4096
pretrained_emb = nn.Embedding(num_items, embedding_dim)
pretrained_emb.load_state_dict(torch.load('item_embedding.pt'))
with torch.no_grad():
    embedding_matrix = pretrained_emb.weight.clone().detach()

model_v2 = TransformerAnomalyDetectorV2(
    embedding_matrix=embedding_matrix,
    d_model=512,
    nhead=2,
    num_layers=8,
    dropout=0.1,
    pad_idx=0,
    freeze_embedding=False,
).to(device)

trained_model = train_no_cleaning(
    model_v2,
    train_loader=train_loader,
    val_loader=val_loader,
    device=device,
    max_epochs=150,
    patience=5,
    batch_size=batch_size,
    k=10,
    eval_fn=evaluate_retrieval,
    save_path='seqrecon_v2_best.pt',
)
trained_model

  pretrained_emb.load_state_dict(torch.load('item_embedding.pt'))
Epoch 1/150: 100%|██████████| 56/56 [00:43<00:00,  1.29it/s]


Epoch 1 Loss: 6.7990 Val R@10: 0.1312 NDCG@10: 0.0666


Epoch 2/150: 100%|██████████| 56/56 [00:42<00:00,  1.32it/s]


Epoch 2 Loss: 5.8525 Val R@10: 0.1513 NDCG@10: 0.0881


Epoch 3/150: 100%|██████████| 56/56 [00:42<00:00,  1.31it/s]


Epoch 3 Loss: 5.6360 Val R@10: 0.1648 NDCG@10: 0.1042


Epoch 4/150: 100%|██████████| 56/56 [00:43<00:00,  1.30it/s]


Epoch 4 Loss: 5.5407 Val R@10: 0.1827 NDCG@10: 0.1152


Epoch 5/150: 100%|██████████| 56/56 [00:42<00:00,  1.31it/s]


Epoch 5 Loss: 5.4609 Val R@10: 0.1973 NDCG@10: 0.1238


Epoch 6/150: 100%|██████████| 56/56 [00:43<00:00,  1.30it/s]


Epoch 6 Loss: 5.3920 Val R@10: 0.2063 NDCG@10: 0.1243


Epoch 7/150: 100%|██████████| 56/56 [00:43<00:00,  1.29it/s]


Epoch 7 Loss: 5.3256 Val R@10: 0.2209 NDCG@10: 0.1328


Epoch 8/150: 100%|██████████| 56/56 [00:42<00:00,  1.30it/s]


Epoch 8 Loss: 5.2601 Val R@10: 0.2287 NDCG@10: 0.1433


Epoch 9/150: 100%|██████████| 56/56 [00:42<00:00,  1.32it/s]


Epoch 9 Loss: 5.2051 Val R@10: 0.2410 NDCG@10: 0.1462


Epoch 10/150: 100%|██████████| 56/56 [00:42<00:00,  1.30it/s]


Epoch 10 Loss: 5.1513 Val R@10: 0.2601 NDCG@10: 0.1537


Epoch 11/150: 100%|██████████| 56/56 [00:43<00:00,  1.29it/s]


Epoch 11 Loss: 5.1078 Val R@10: 0.2623 NDCG@10: 0.1541


Epoch 12/150: 100%|██████████| 56/56 [00:41<00:00,  1.36it/s]


Epoch 12 Loss: 5.0622 Val R@10: 0.2803 NDCG@10: 0.1682


Epoch 13/150: 100%|██████████| 56/56 [00:40<00:00,  1.37it/s]


Epoch 13 Loss: 5.0208 Val R@10: 0.2825 NDCG@10: 0.1708


Epoch 14/150: 100%|██████████| 56/56 [00:42<00:00,  1.31it/s]


Epoch 14 Loss: 4.9829 Val R@10: 0.2803 NDCG@10: 0.1704


Epoch 15/150: 100%|██████████| 56/56 [00:42<00:00,  1.33it/s]


Epoch 15 Loss: 4.9473 Val R@10: 0.2870 NDCG@10: 0.1760


Epoch 16/150: 100%|██████████| 56/56 [00:42<00:00,  1.31it/s]


Epoch 16 Loss: 4.9133 Val R@10: 0.2836 NDCG@10: 0.1760


Epoch 17/150: 100%|██████████| 56/56 [00:43<00:00,  1.30it/s]


Epoch 17 Loss: 4.8812 Val R@10: 0.2870 NDCG@10: 0.1746


Epoch 18/150: 100%|██████████| 56/56 [00:42<00:00,  1.32it/s]


Epoch 18 Loss: 4.8534 Val R@10: 0.2881 NDCG@10: 0.1776


Epoch 19/150: 100%|██████████| 56/56 [00:42<00:00,  1.31it/s]


Epoch 19 Loss: 4.8253 Val R@10: 0.3016 NDCG@10: 0.1831


Epoch 20/150: 100%|██████████| 56/56 [00:42<00:00,  1.31it/s]


Epoch 20 Loss: 4.7986 Val R@10: 0.3004 NDCG@10: 0.1816


Epoch 21/150: 100%|██████████| 56/56 [00:41<00:00,  1.35it/s]


Epoch 21 Loss: 4.7723 Val R@10: 0.2982 NDCG@10: 0.1808


Epoch 22/150: 100%|██████████| 56/56 [00:40<00:00,  1.38it/s]


Epoch 22 Loss: 4.7449 Val R@10: 0.3038 NDCG@10: 0.1836


Epoch 23/150: 100%|██████████| 56/56 [00:40<00:00,  1.37it/s]


Epoch 23 Loss: 4.7263 Val R@10: 0.2971 NDCG@10: 0.1815


Epoch 24/150: 100%|██████████| 56/56 [00:42<00:00,  1.31it/s]


Epoch 24 Loss: 4.6992 Val R@10: 0.3128 NDCG@10: 0.1832


Epoch 25/150: 100%|██████████| 56/56 [00:41<00:00,  1.34it/s]


Epoch 25 Loss: 4.6770 Val R@10: 0.3173 NDCG@10: 0.1876


Epoch 26/150: 100%|██████████| 56/56 [00:42<00:00,  1.32it/s]


Epoch 26 Loss: 4.6576 Val R@10: 0.3094 NDCG@10: 0.1850


Epoch 27/150: 100%|██████████| 56/56 [00:42<00:00,  1.33it/s]


Epoch 27 Loss: 4.6359 Val R@10: 0.3128 NDCG@10: 0.1894


Epoch 28/150: 100%|██████████| 56/56 [00:41<00:00,  1.35it/s]


Epoch 28 Loss: 4.6149 Val R@10: 0.3105 NDCG@10: 0.1869


Epoch 29/150: 100%|██████████| 56/56 [00:41<00:00,  1.34it/s]


Epoch 29 Loss: 4.6019 Val R@10: 0.3117 NDCG@10: 0.1842


Epoch 30/150: 100%|██████████| 56/56 [00:40<00:00,  1.37it/s]


Epoch 30 Loss: 4.5801 Val R@10: 0.3139 NDCG@10: 0.1870
Early stop at epoch 30.


TransformerAnomalyDetectorV2(
  (embedding): Embedding(4119, 4096)
  (embed_proj): Linear(in_features=4096, out_features=512, bias=True)
  (pos_encoder): RelativePositionalEncoding()
  (layers): ModuleList(
    (0-7): 8 x CustomTransformerEncoderLayer(
      (self_attn): RelativeMultiheadAttention(
        (q_proj): Linear(in_features=512, out_features=512, bias=True)
        (k_proj): Linear(in_features=512, out_features=512, bias=True)
        (v_proj): Linear(in_features=512, out_features=512, bias=True)
        (out_proj): Linear(in_features=512, out_features=512, bias=True)
      )
      (linear1): Linear(in_features=512, out_features=2048, bias=True)
      (dropout): Dropout(p=0.1, inplace=False)
      (linear2): Linear(in_features=2048, out_features=512, bias=True)
      (norm1): LayerNorm((512,), eps=1e-05, elementwise_affine=True)
      (norm2): LayerNorm((512,), eps=1e-05, elementwise_affine=True)
      (dropout1): Dropout(p=0.1, inplace=False)
      (dropout2): Dropout(p=0.1

In [8]:
# 7. 测试集评测（Avg_Loss, Recall@10, NDCG@10, 异常检测指标）与全量推理导出

best_model = TransformerAnomalyDetectorV2(
    embedding_matrix=embedding_matrix,
    d_model=512,
    nhead=2,
    num_layers=8,
    dropout=0.1,
    pad_idx=0,
    freeze_embedding=False,
).to(device)
best_model.load_state_dict(torch.load('seqrecon_v2_best.pt'))
best_model.eval()

test_loss_sum, test_n = 0.0, 0
with torch.no_grad():
    for batch in tqdm(test_loader, desc='Test Avg_Loss'):
        input_seq = batch['input_seq'].to(device)
        target_seq = batch['target_seq'].to(device)
        mask = batch['mask'].to(device)
        loss = best_model.compute_loss(input_seq, target_seq, mask)
        test_loss_sum += loss.item() * input_seq.size(0)
        test_n += input_seq.size(0)
avg_loss_test = test_loss_sum / max(test_n, 1)
recall10_test, ndcg10_test = evaluate_retrieval(best_model, test_loader, device, k=10)
ad_test = evaluate_model_top(best_model, test_loader, device, top_percent=0.2)


print('=== 测试集评测（SeqRecon-AD-v2：finetune emb + 无 self-clean）===')
print(f'Avg_Loss:    {avg_loss_test:.4f}')
print(f'Recall@10:   {recall10_test:.4f}')
print(f'NDCG@10:     {ndcg10_test:.4f}')
print(f'AUC:         {ad_test["auc"]:.4f}')
print(f'PR-AUC:      {ad_test["pr_auc"]:.4f}')
print(f'Precision:   {ad_test["precision"]:.4f}')
print(f'Recall:      {ad_test["recall"]:.4f}')
print(f'F1:          {ad_test["f1"]:.4f}')

all_dataset = PrescriptionDataset(dataset)
all_loader = DataLoader(all_dataset, batch_size=128, shuffle=False, collate_fn=custom_collate_fn)
results = evaluate_model_top(best_model, all_loader, device, top_percent=0.2)
cutoff = results['cutoff']
all_scores = results['scores']
pred_labels = (all_scores >= cutoff).astype(int)

base_df = all_dataset.data.reset_index(drop=True)
result_df = pd.concat([
    base_df[['card_id','name','身份证号','age']].reset_index(drop=True),
    pd.Series(all_scores, name='anomaly_score'),
    pd.Series(pred_labels, name='pred_label'),
], axis=1)
print(result_df.head())
result_df.to_csv('SeqRecon_AD_v2_scores.csv', index=False, encoding='utf-8-sig')

  best_model.load_state_dict(torch.load('seqrecon_v2_best.pt'))
Test Avg_Loss: 100%|██████████| 7/7 [00:02<00:00,  3.48it/s]
Eval: 100%|██████████| 7/7 [00:02<00:00,  3.48it/s]


=== 测试集评测（SeqRecon-AD-v2：finetune emb + 无 self-clean）===
Avg_Loss:    5.1027
Recall@10:   0.3475
NDCG@10:     0.2007
AUC:         0.8122
PR-AUC:      0.5349
Precision:   0.4749
Recall:      0.4570
F1:          0.4658


Eval: 100%|██████████| 70/70 [00:20<00:00,  3.48it/s]


                                card_id name                身份证号  age  \
0  00022092-02fc-45e0-83f2-c51a0d02f2d0  袁**  3101051949********   74   
1  000e9b7e-6a96-4eda-947b-425e964e1212  贺**  3101081952********   71   
2  000f8286-aa23-42d7-8510-2fab100bcc7b  蒋**  3101081932********   91   
3  00117f6c-e739-4913-b453-85a118a47123  刘**  3101021940********   83   
4  001c5c03-1db7-4303-934e-21decf219ab1   陈*  3101061948********   75   

   anomaly_score  pred_label  
0       7.025209           1  
1       6.901599           1  
2       7.523474           1  
3       5.294207           0  
4       5.425012           0  
