In [1]:
from src.data import data_split, WrapBatch
data_type = "movielens"
user_num, item_num, Seq_train, Seq_val, Seq_test = data_split(data_type)

sampler = WrapBatch(
    Seq_train,
    user_num,
    item_num,
    batch_size = 16,
    max_len = 200,
    n_workers = 2
)

u, seq, pos, neg = sampler.next_batch()

In [None]:
# 1) Item Embedding (batch_size, max_len, K)
# 2) Position Embedding (0 ~ 199) if max_len == 200 (batchsize, max_len, K)
# 3) Zero-padding 

In [4]:
import numpy as np

import torch
import torch.nn as nn


In [None]:
class PointWiseFeedForward(nn.Module):
    def __init__(self, 
                hidden_units,
                dropout_rate):
        super(PointWiseFeedForward, self).__init__()

        self.conv1 = nn.Conv1d(hidden_units, hidden_units, kernel_size = 1) # 입력(A, B, C) -> 출력(A, B, C)
        self.dropout1 = nn.Dropout(p = dropout_rate)
        self.relu = nn.relu()
        self.conv2 = nn.Conv2d(hidden_units, hidden_units, kernel_size = 1)
        self.dropout2 = nn.Dropout(p = dropout_rate)

    def forward(self, inputs): # inputs: (batch_size, max_len, K)
        inputs = inputs.transpose(-1, -2) # 마지막 차원 K와 마지막에서 두 번째 차원 max_len 변경 -> (B, K, max_len)
        inputs = self.conv1(inputs)
        inputs = self.dropout1(inputs)
        inputs = self.relu(inputs)
        inputs = self.conv2(inputs)
        inputs = self.dropout2(inputs)
        outputs = inputs.transpose(-1, -2) # 복원 (batch_size, max_len, K)
        return outputs

In [None]:
class SASRec(nn.module):
    def __init__(self,
                user_num, 
                item_num, 
                K, 
                max_len, 
                dropout_rate,
                num_blocks, 
                num_heads,
                hidden_units,
                device):

        assert K % num_heads != 0

        self.user_num = user_num
        self.item_num = item_num
        self.K = K
        self.max_len = max_len
        self.dropout_rate = dropout_rate
        self.num_blocks = num_blocks # MHA 개수
        self.num_heads = num_heads # Head width
        self.hidden_units = hidden_units
        self.device = device

        self.item_emb = nn.Embedding(self.item_num + 1, self.K, padding_idx = 0)
        self.pos_emb = nn.Embedding(self.max_len + 1, self.K, padding_idx = 0)
        self.emb_dropout = nn.Dropout(p = self.dropout_rate)

        self.attention_layernorms = nn.ModuleList()
        self.attention_layers = nn.ModuleList()
        self.forward_layernorms = nn.ModuleList()
        self.forward_layers = nn.ModuleList()

        for _ in range(self.num_blocks):
            new_attn_laternorm = nn.LayerNorm(self.hidden_units, eps = 1e-8)
            self.attention_layernorms.append(new_attn_laternorm)

            new_attn_layer = nn.MultiheadAttention(
                self.hidden_units,
                self.num_heads,
                self.dropout_rate
            )
            self.attention_layers.append(new_attn_layer)

            new_fwd_layer_norm = nn.LayerNorm(self.hidden_units, eps = 1e-8)
            self.forward_layernorms.append(new_fwd_layer_norm)

            new_fwd_layer = PointWiseFeedForward(self.hidden_units, self.dropout_rate)
            self.forward_layers.append(new_fwd_layer)

    def log2feats(self, logs):
        seqs = self.item_emb(torch.LongTensor(logs).to(self.device))
        seqs *= self.item_emb.embedding_dim ** 0.5 # normalization
        
        poss = np.tile(np.arange(1, logs.shape[1] + 1, [logs.shpe[0], 1]))
        poss *= (logs != 0) # 로그 기록에서 padding 반영

        seqs += self.pos_emb(torch.LongTensor(poss).to(self.device)) # 로그 임베딩 + 포지션 임베딩(learnable paramter)
        seqs = self.emb_dropout(seqs)

        seq_len = seqs.shape[1]

        

            

In [22]:
max_len = 200

item_emb = nn.Embedding(item_num + 1, 8, padding_idx = 0)
pos_emb = nn.Embedding(max_len + 1, 8, padding_idx=0)

In [35]:
seq = np.array(seq)

In [37]:
seq.shape[0]

16

In [30]:
torch.LongTensor(seq)

tensor([[     0,      0,      0,  ...,    798,     66,    889],
        [     0,      0,      0,  ...,    342,      7,    552],
        [     0,      0,      0,  ...,   1136,  98809, 170939],
        ...,
        [     0,      0,      0,  ...,   3994,   5989,  54004],
        [     0,      0,      0,  ...,      2,    150,    333],
        [ 94777,  95307,  97921,  ...,  65130,   1721,   4022]])

In [39]:
position = np.tile(np.arange(1, seq.shape[1] + 1), [seq.shape[0], 1]) # 동일한 차원의 포지션 인코딩

In [40]:
position *= (seq != 0)

In [41]:
position

array([[  0,   0,   0, ..., 198, 199, 200],
       [  0,   0,   0, ..., 198, 199, 200],
       [  0,   0,   0, ..., 198, 199, 200],
       ...,
       [  0,   0,   0, ..., 198, 199, 200],
       [  0,   0,   0, ..., 198, 199, 200],
       [  1,   2,   3, ..., 198, 199, 200]], shape=(16, 200))

In [24]:
item_emb(torch.LongTensor(seq)).shape

torch.Size([16, 200, 8])

In [26]:
pos_emb(torch.LongTensor(pos)).shape

IndexError: index out of range in self

In [5]:
a = np.array([5, 6])

In [6]:
a

array([5, 6])

In [7]:
np.tile(a, 3)

array([5, 6, 5, 6, 5, 6])