In [1]:
import pandas as pd
import torch

for data_name in ['Dai']:
    mu_embeddings = []
    sigma_embeddings = []
    for month in range(4,8):
        file_path = f'../outputs/new_time_embedding/task2/{data_name}/train/{month}/epoch_500_k_1_lr_0.0001_initalembed_no_seed_10/rglossfn_tweedie_activate_softplus_rgweight_1.0_lpweight_0.0_rankweight_0.0_conweight_0.0_diffweight_1.0_gaussian_yes_crossattn_yes_bias_yes_node.pt'
        x = torch.load(file_path)
        data = pd.read_csv(f'../data/{data_name}/task2/{month}/triplet_percentage.tsv', header=None, sep='\t')
        entities = pd.read_csv(f'../data/{data_name}/task1/entities.dict', sep='\t', header=None)
        user_num = sum(entities[1] < 30000)
        item_num = sum(entities[1] >= 30000)
        mu_embedding = torch.randn(len(entities), 768).to(x[0].device)
        mu_embedding[torch.cat((torch.from_numpy(data[0].unique()) - 20000, torch.from_numpy(data[2].unique()) - 30000 + user_num))] = x[0]
        sigma_embedding = torch.randn(len(entities), 768).to(x[0].device)
        sigma_embedding[torch.cat((torch.from_numpy(data[0].unique()) - 20000, torch.from_numpy(data[2].unique()) - 30000 + user_num))] = x[1]
        mu_embeddings.append(mu_embedding.unsqueeze(0))
        sigma_embeddings.append(sigma_embedding.unsqueeze(0))
    mu_embeddings = torch.cat(mu_embeddings, dim=0)
    sigma_embeddings = torch.cat(sigma_embeddings, dim=0)

In [30]:
import torch
import torch.nn as nn
import torch.optim as optim

class TimeSeriesAutoEncoder(nn.Module):
    def __init__(self, dim):
        super(TimeSeriesAutoEncoder, self).__init__()
        # 稳定项MLP
        self.stable_mlp = nn.Sequential(
            nn.Linear(dim, dim * 2),
            nn.ReLU(),
            nn.Linear(dim * 2, dim)
        )
        # 趋势项MLP
        self.trend_mlp = nn.Sequential(
            nn.Linear(dim, dim * 2),
            nn.ReLU(),
            nn.Linear(dim * 2, dim)
        )
        # 预测下一时间片的MLP
        self.next_step_mlp = nn.Sequential(
            nn.Linear(dim, dim * 2),
            nn.ReLU(),
            nn.Linear(dim * 2, dim)
        )

    def forward(self, x):
        stable = self.stable_mlp(x)
        trend = self.trend_mlp(x)
        reconstructed = stable + trend  # 重构原始embedding
        next_step = self.next_step_mlp(trend) + stable  # 预测下一个时间片
        return reconstructed, next_step, stable, trend

# 超参数
dim = 768  # 示例embedding维度
learning_rate = 0.001
epochs = 100  # 示例训练轮数

model = TimeSeriesAutoEncoder(dim).to('cuda')
optimizer = optim.Adam(model.parameters(), lr=learning_rate)
criterion = nn.MSELoss()

train_data = mu_embeddings
temperature = 10

import torch
import torch.nn.functional as F

def compute_time_slice_similarity(embeddings):
    T, num, dim = embeddings.shape
    embeddings_flat = embeddings.reshape(T, -1)  # 将每个时间片的embeddings展平
    sim = F.cosine_similarity(embeddings_flat[:, None, :], embeddings_flat[None, :, :], dim=2)
    return sim

def attraction_loss(embeddings, margin=2.0, temperature=2):
    sim = compute_time_slice_similarity(embeddings) / temperature
    mask = ~torch.eye(sim.size(0), dtype=torch.bool)  # 排除自相似
    return (margin - sim[mask]).mean()

def repulsion_loss(embeddings, margin=2.0, temperature=2):
    sim = compute_time_slice_similarity(embeddings) / temperature
    mask = ~torch.eye(sim.size(0), dtype=torch.bool)  # 排除自距离
    return (sim[mask] + margin).mean()

# 训练过程
from tqdm import tqdm
with tqdm(range(epochs)) as bar:
    for epoch in range(epochs):
        reconstructed, predicted_next_step, stable, trend  = model(train_data)
        loss_reconstruction = criterion(reconstructed, train_data)
        loss_prediction = criterion(predicted_next_step[:-1], train_data[1:])
        loss_attract = attraction_loss(stable, margin=1/temperature, temperature=temperature)
        loss_repel = repulsion_loss(trend, margin=1/temperature, temperature=temperature)
        loss = loss_reconstruction + loss_prediction + loss_attract + loss_repel
        optimizer.zero_grad()
        loss.backward()
        optimizer.step()
        bar.set_description(f"Epoch [{epoch+1}/{epochs}], loss_reconstruction: {loss_reconstruction.item()}, loss_prediction: {loss_prediction.item()}, loss_attract: {loss_attract.item()}, loss_repel: {loss_repel.item()}")

Epoch [100/100], loss_reconstruction: 0.005229885224252939, loss_prediction: 0.049811359494924545, loss_attract: 0.005293694790452719, loss_repel: 0.07053935527801514:   0%|          | 0/100 [00:03<?, ?it/s]


In [31]:
_, test_predicted_next_step, _, _  = model(train_data)
test_predicted_next_step[-1]

tensor([[-0.2001, -0.1920, -0.2176,  ..., -0.0677,  0.0109,  0.1722],
        [-0.2562, -0.1960, -0.3001,  ..., -0.1479,  0.0238,  0.2247],
        [-0.2086, -0.1850, -0.1926,  ..., -0.0231,  0.0203,  0.1444],
        ...,
        [-0.3193,  0.0074, -0.2818,  ..., -0.1074,  0.0467,  0.0702],
        [-0.2678, -0.0127, -0.2537,  ..., -0.1075,  0.0036,  0.0861],
        [-0.2355, -0.0152, -0.2440,  ..., -0.0941,  0.0083,  0.0781]],
       device='cuda:0', grad_fn=<SelectBackward0>)