In [17]:
import torch
import torch.nn as nn
import numpy as np
import copy
import torch.nn.functional as F
import torch.optim as optim
import pickle

import torch
import torch.nn as nn
import numpy as np
import copy
import time
import torch.nn.functional as F
import torch.optim as optim
from itertools import repeat
from tqdm.notebook import tqdm

In [18]:
import sys
sys.path.append('./SR-GNN-master/pytorch_code')

In [19]:
from model import *
from utils import *

In [20]:
class Opt():
    def __init__(self, ):
        self.dataset = 'yoochoose1_64'
        self.batchSize = 100
        self.hiddenSize = 100
        self.epoch = 30
        self.lr = 0.001
        self.lr_dc = 0.1
        self.lr_dc_step = 3
        self.l2 = 1e-5
        self.step = 1
        self.patience = 10
        self.nonhybrid = False
        self.validation = False
        self.valid_portion = 0.001
        
opt = Opt()

In [21]:
train_data = pickle.load(open('./SR-GNN-master/datasets/' + opt.dataset + '/train.txt', 'rb'))
if opt.validation:
    train_data, valid_data = split_validation(train_data, opt.valid_portion)
    test_data = valid_data
else:
    test_data = pickle.load(open('./SR-GNN-master/datasets/' + opt.dataset + '/test.txt', 'rb'))

if opt.dataset == 'diginetica':
    n_node = 43098
elif opt.dataset == 'yoochoose1_64' or opt.dataset == 'yoochoose1_4':
    n_node = 37484
else:
    n_node = 310

In [22]:
train_seqs = pickle.load(open('./SR-GNN-master/datasets/' + opt.dataset + '/all_train_seq.txt', 'rb'))

In [25]:
set_ = set()
for i in train_data[0]:
    for j in i:
        set_.add(j)
for i in train_data[1]:
    set_.add(i)
item_num = len(set_)

item_mapping = {}
for i, item in enumerate(set_):
    item_mapping[item] = i

train_data = list(zip(*train_data))
n = int((1-opt.valid_portion) * len(train_seqs))
d_basket_train = {k: [item_mapping[item] for item in (items + [last_item])] for k, (items, last_item) in enumerate(train_data[:n])}
d_basket_val = {k: [item_mapping[item] for item in (items + [last_item])] for k, (items, last_item) in enumerate(train_data[n:])}

In [26]:
class DataLoaderStoch():
    '''
    Класс автоматическрого создания батчей для трейна.
    '''
    def __init__(self, trans,
                 basket,
                 item_num,
                 batchsize=128,
                 max_basket_size=23,
                 shuffle=True):
        ''''
        На вход:
        trans          - список id чеков
        basket         - словарь товаров в коризне по чеку,
                         в виде списка из номеров товаров
        item_num  - кол-во товаров
        batchsize - размер батча
        shuffle   - перемешивать ли семплы
        На выход:
        батч в виде [(
                      корзины: list[list],
                      контекст: Tensor, size=(batchsize, context_dim),
                      таргетные продукты: LongTensor, size=(batchsize,),
                      клиенты: LongTensor, size=(batchsize,)
                     ),
                     таргет: Tensor, size=(batchsize,)]
        '''
        self.trans = trans
        self.basket = basket
        self.max_basket_size = max_basket_size
        self.batchsize = batchsize
        self.shuffle = shuffle
        self.prod_num = item_num
        
    def __iter__(self):  
        '''
        Метод вызывается при итерировании по объекту,
        например, через for.
        '''
        self.ids = set(range(len(self.trans)))
        return self._contaner_()
    
    def __len__(self):
        '''
        Возвращает кол-во батчей.
        '''
        batch_num = np.ceil(len(self.trans)/self.batchsize)
        return int(batch_num)
        
    def _contaner_(self):
        '''
        Метод берет подвыборку и формирует батч.
        '''
        while len(self.ids) != 0:
            if self.shuffle:
                size = min(len(self.ids), self.batchsize)
                idx_curr = np.random.choice(list(self.ids), size,
                                            replace=False)
            else:
                idx_curr = np.array(list(self.ids))[:self.batchsize]
            self.ids = self.ids.difference(idx_curr)
            yield self._make_sample_(self.trans[idx_curr])
        
    def foo(self, trans_products, max_basket_size):
        '''
        Вспомогательная функция. см _make_sample_.
        '''
        without_target = trans_products[:-1]
        target = trans_products[-1]
        padding = [self.prod_num]*(max_basket_size - len(trans_products))
        return (without_target+padding), target
        
    def _make_sample_(self, X):
        '''
        Метод возвращает готовый батч.
        '''
        self.X = X
        baskets = list(map(self.basket.get, X))
        max_basket_size = max(map(len, baskets))
        temp = list(map(lambda x: self.foo(x, max_basket_size), baskets))

        return [torch.LongTensor(np.array([row[0] for row in temp])),
                torch.LongTensor(np.array([row[1] for row in temp]))]

def generate_square_subsequent_mask(sz: int) -> torch.Tensor:
    """Generates an upper-triangular matrix of -inf, with zeros on diag."""
    return torch.triu(torch.ones(sz, sz) * float('-inf'), diagonal=1)

In [27]:
#функция тестирования
def test(model, test_data):
    model.eval()
    hit, mrr = [], []
    batch_size = min(opt.batchSize, len(test_data[0]))
    for i in tqdm(range(0, len(test_data[0]), batch_size)):
        scores = model.predict(test_data[0][i:i+batch_size])
        sub_scores = np.array(scores.topk(20)[1])
        targets = test_data[1][i:i+batch_size]
        for score, target in zip(sub_scores, targets):
            target = model.item_mapping.get(target)
            hit.append(np.isin(target, score))
            if len(np.where(score == target)[0]) == 0:
                mrr.append(0)
            else:
                mrr.append(1 / (np.where(score == target)[0][0] + 1))
    hit = np.mean(hit) * 100
    mrr = np.mean(mrr) * 100
    return hit, mrr

#функция тестирования
def evaluate_net(net, testloader, use_cuda=True):
    net = net.eval()
    running_loss = 0.0
    # цикл по батчам внутри эпохи
    for i, data in enumerate(tqdm(testloader)):
        # берем очередной батч и его лейблы
        prods = data[0].to(device)
        labels = data[1].to(device)
        
        # получили выход сетки
        outputs = net(prods)
        
        # посчитали для этого выхода лосс
        loss = criterion(outputs, labels)
        
        #суммируемый лосс на обучении
        running_loss += float(loss)
        
    return running_loss/len(testloader)

#основная функция для обучения сети
def train_net(n_epochs, 
              net, 
              optimizer, 
              scheduler,
              criterion, 
              trainloader,
              testloader,
              test_data,
              prod_num,
              use_cuda=False,
             ):
    '''
    Функция обучения нейронной сети.
    На вход:
    n_epochs      - кол-во эпох
    net           - сеть для обучения
    optimizer     - оптимизатор для обучения
    criterion     - критерий оптимизации
    trainloader   - даталоадер для трейна
    testloader    - даталоадер для теста
    d_food_cost_idx - цены на товары
    prod_num      - кол-во продуктов
    use_cuda      - использовать ли cuda
    verbose       - если 0, то не выводит 
                    качество на валидации,
                    если > 0 выводит качество
                    на валлидации каждые verbose
                    эпох
    early_stopping_len - после какого кол-ва эпох
                    без улучшения качества 
                    надо прекратить обучение
    '''
    
    if use_cuda:
        net = net.cuda()
    
    test_loss = 0
    
    # основной цикл по всем эпохам
    for epoch in range(n_epochs):
        net = net.train()
    
        running_loss = 0.0
        # цикл по батчам внутри эпохи
        for i, data in enumerate(tqdm(trainloader)):
            # берем очередной батч и его лейблы
            prods = data[0].to(device)
            labels = data[1].to(device)

            # всегда перед вычислением градиентов зануляем их, чтобы не накапливались
            optimizer.zero_grad()

            # получили выход сетки
            outputs = net(prods)
            
            # посчитали для этого выхода лосс
            loss = criterion(outputs, labels)
            
            # вычислили градиенты loss по параметрам сети (w)
            loss.backward()
    
            #далем шаг по антиградиенту - обновляем веса сети
            optimizer.step()
            
            #суммируемый лосс на обучении
            running_loss += float(loss)
        
        # валидируемся
        test_loss = evaluate_net(net, testloader, use_cuda=use_cuda)
        hit, mrr = test(net, test_data)
        torch.save(net.state_dict(), 'net.model')
        # логируем после каждой эпохи        
        print('Epoch {}. \nTrain_loss: {:.6f}' .format(epoch + 1, running_loss / len(trainloader)))
        print(f'Test_loss: {test_loss}')
        print(f'Test eval: hit - {hit}, mrr - {mrr}')
        print('------------------------------')
        scheduler.step()

    print('Finished Training')
    return net

In [36]:
import torch
import torch.nn as nn
import numpy as np
import copy
import torch.nn.functional as F
import torch.optim as optim
from sklearn.utils.extmath import randomized_svd

class Embedding_prod(nn.Module):
    '''
    Нейросеть для эмбеддинга товара.
    '''
    def __init__(self, prod_num, d, dropout=0.1):
        super(Embedding_prod, self).__init__()
        self.prod_embedd = nn.Embedding(prod_num+1, d)
        self.pos_encoder = PositionalEncoding(d, dropout)
        
    def forward(self, prods):
        prod_embedd = self.prod_embedd(prods)
        prod_embedd = self.pos_encoder(prod_embedd)
        return prod_embedd

class PositionalEncoding(nn.Module):

    def __init__(self, d_model: int, dropout: float = 0.1, max_len: int = 5000):
        super().__init__()
        self.dropout = nn.Dropout(p=dropout)

        position = torch.arange(max_len).unsqueeze(1)
        div_term = torch.exp(torch.arange(0, d_model, 2) * (-math.log(10000.0) / d_model))
        pe = torch.zeros(max_len, 1, d_model)
        pe[:, 0, 0::2] = torch.sin(position * div_term)
        pe[:, 0, 1::2] = torch.cos(position * div_term)
        self.register_buffer('pe', pe)

    def forward(self, x: torch.Tensor) -> torch.Tensor:
        """
        Args:
            x: Tensor, shape [seq_len, batch_size, embedding_dim]
        """
        x = x.permute(1, 0, 2)
        x = x + self.pe[:x.size(0)]
        return self.dropout(x).permute(1, 0, 2)

class Embedding_transformer(nn.Module):
    '''
    Нейросеть для эмбеддинга товара.
    '''
    def __init__(self, d, layer_num=2, layer_type='transformer', dim_feedforward=2048, dropout=0.1, max_basket_size=146):
        '''
        На вход:
        X - матрица (товары, эмбеддинг)
        use_cuda - использовать ли cuda
        
        Forward принимает на вход или
        список, содержащий номера товаров,
        или просто номер товара.
        '''
        super(Embedding_transformer, self).__init__()
        self.layer_type = layer_type
        if layer_type == 'attention':
            self.transformer = nn.ModuleList([nn.MultiheadAttention(d, 
                                                                    num_heads=1, 
                                                                    batch_first=True)
                                          for _ in range(layer_num)])
        elif layer_type == 'transformer':
            # self.transformer = nn.ModuleList([nn.TransformerEncoderLayer(d,
            #                                                    nhead=1,
            #                                                    dim_feedforward=dim_feedforward,
            #                                                    batch_first=True)
            #                               for _ in range(layer_num)])
            encoder_layer = nn.TransformerEncoderLayer(d_model=d,
                                                       nhead=1,
                                                       dropout=dropout,
                                                       dim_feedforward=dim_feedforward,
                                                       batch_first=True
                                                      )
            self.transformer = nn.TransformerEncoder(encoder_layer, num_layers=layer_num)
        elif layer_type == 'gru':
            self.transformer = nn.GRU(input_size=d,
                                      hidden_size=d,
                                      num_layers=layer_num,
                                      batch_first=True,
                                      dropout=dropout,
                                     )
        self.gru = nn.GRU(input_size=d,
                          hidden_size=d,
                          num_layers=1,
                          batch_first=True,
                         )
        self.linear = nn.Linear(d, d)
        self.linear_one = nn.Linear(d, d)
        self.linear_two = nn.Linear(d, d)
        self.linear_three = nn.Linear(d, 1, bias=False)
        self.linear_transform = nn.Linear(d * 2, d, bias=True)

    def forward(self, batch, padding_mask):
        #t = self.transformer(batch)
        t = batch
        mask = nn.Transformer.generate_square_subsequent_mask(padding_mask.shape[1])
        if self.layer_type == 'attention':
            for layer in self.transformer:
                t_ = t
                t, _ = layer(t, t, t, attn_mask=mask, key_padding_mask=padding_mask)
                t = t_ + t
        elif self.layer_type == 'transformer':
            t = self.transformer(t, mask=mask, src_key_padding_mask=padding_mask)
        elif self.layer_type == 'gru':
            t = self.transformer(t)[0]
        hidden = t
        mask = ~padding_mask
        ht = hidden[torch.arange(mask.shape[0]).long(), torch.sum(mask, 1) - 1]  # batch_size x latent_size
        q1 = self.linear_one(ht).view(ht.shape[0], 1, ht.shape[1])  # batch_size x 1 x latent_size
        q2 = self.linear_two(hidden)  # batch_size x seq_length x latent_size
        alpha = self.linear_three(torch.sigmoid(q1 + q2))
        a = torch.sum(alpha * hidden * mask.view(mask.shape[0], -1, 1).float(), 1)
        return self.linear_transform(torch.cat([a, ht], 1))

class Net(nn.Module):
    def __init__(self, products_embedd, transformer,
                 d, prod_num, item_mapping=item_mapping):
        '''
        На вход:
        products_embedd - объект класса Embedding_prod
        '''
        super(Net, self).__init__()
        self.d = d
        self.embedding = products_embedd
        self.transformer = transformer
        self.prod_num = prod_num
        self.item_mapping = item_mapping
        self.all_items = torch.LongTensor(range(prod_num+1))
        self._reset_parameters()
    
    def _reset_parameters(self):
        stdv = 1.0 / math.sqrt(self.d)
        for weight in self.parameters():
            weight.data.uniform_(-stdv, stdv)
    
    def predict(self, baskets):
        rows = []
        range_batch = range(len(baskets))
        range_items = []
        max_basket_size = max(map(len, baskets))
        for basket in baskets:
            basket_ = [self.item_mapping[item] for item in basket if item in item_mapping]
            padding = [self.prod_num]*(max_basket_size - len(basket_))
            rows.append(basket_+padding)
            range_items.append(len(basket_)-1)
        return self.forward(torch.LongTensor(np.array(rows)))[range_batch,
                                                              :,
                                                              range_items]
    
    def forward(self, prods):
        # эмделлинг товара [batch_dim, max_basket_size, embedd_dim]
        embedd = self.embedding(prods)
        # пропускаем через череду attention
        padding_mask = (prods == self.prod_num)
        embedd = self.transformer(embedd, padding_mask)
        all_embedd = self.embedding(self.all_items)
        return torch.matmul(embedd, all_embedd.transpose(1, 0))#.permute(0, 2, 1)

In [37]:
device = 'cpu'

In [38]:
# создаем сеть
embedd_dim = 100 # 512
prod_embedd = nn.Embedding(item_num+1, embedd_dim)#Embedding_prod(item_num, embedd_dim)
transformer = Embedding_transformer(embedd_dim,
                                    layer_num=1,
                                    layer_type='gru',
                                    dim_feedforward=512,
                                    dropout=0.1,)
net_model = Net(prod_embedd, transformer, embedd_dim, item_num)
net_model.to(device)

Net(
  (embedding): Embedding(17377, 100)
  (transformer): Embedding_transformer(
    (transformer): GRU(100, 100, num_layers=2, batch_first=True, dropout=0.1)
    (gru): GRU(100, 100, batch_first=True)
    (linear): Linear(in_features=100, out_features=100, bias=True)
    (linear_one): Linear(in_features=100, out_features=100, bias=True)
    (linear_two): Linear(in_features=100, out_features=100, bias=True)
    (linear_three): Linear(in_features=100, out_features=1, bias=False)
    (linear_transform): Linear(in_features=200, out_features=100, bias=True)
  )
)

In [39]:
# net_model.load_state_dict(torch.load('net.model'))

In [40]:
learning_rate= 1e-3 # 1e-4
optimizer = optim.Adam(net_model.parameters(), lr=learning_rate, weight_decay=1e-5)
scheduler = torch.optim.lr_scheduler.ExponentialLR(optimizer, gamma=1, verbose=True) # 0.8

# критерий оптимизации
#weight = torch.ones(item_num+1)
#weight[-1] = 0
#criterion = nn.CrossEntropyLoss(weight=weight)

Adjusting learning rate of group 0 to 1.0000e-03.


In [41]:
criterion = nn.CrossEntropyLoss(ignore_index=item_num)

In [42]:
torch.set_num_threads(32)

In [44]:
# создаем даталоадер для обучения
dataloader_train = DataLoaderStoch(np.array(list(d_basket_train.keys())),
                      d_basket_train,
                      item_num=item_num,
                      batchsize=100,
                      shuffle=True)

# создаем даталоадер для валидации
dataloader_val = DataLoaderStoch(np.array(list(d_basket_val.keys())),
                      d_basket_val,
                      item_num=item_num,
                      batchsize=64,
                      shuffle=False)

# учим сеть
net_model = train_net(50,
                   net_model,
                   optimizer,
                   scheduler,
                   criterion,
                   dataloader_train,
                   dataloader_val,
                   test_data,
                   item_num,
                   use_cuda=False,
                  )

  0%|          | 0/1161 [00:00<?, ?it/s]

KeyboardInterrupt: 

In [None]:
gen = next(iter(dataloader_val))

In [33]:
net_model(gen[0]).argmax(1)[0]

tensor([4127, 4215, 1392, 4352, 4312, 4312, 4312, 1392, 4312, 4352, 1392, 4312,
        4352, 4352, 4352, 4352, 4312, 4352, 1392, 4312, 1392, 4350, 4352, 4352,
        4352, 4352, 4352, 4127, 4350, 4312, 4352, 4352, 4352, 4312, 4350, 4350,
        4352, 4352, 4352, 4312, 1392, 4350, 1392, 4352, 4312, 1392, 4312, 1392,
        1392, 1392, 1392, 4352, 4350, 4352, 1392, 3088, 4350, 1392, 4312, 4352,
        4352, 4352, 4352, 1862, 4350, 1392, 1392, 1392, 4352, 4312, 4352, 4350,
        4352, 1392, 4312, 4350, 4350, 1392, 4352, 4352, 4350, 4352, 4352, 4312,
        4352, 1392, 4312, 4312, 1392, 4312, 4352, 4350, 4352, 4312, 4312, 4352,
        4352, 4312, 1392, 1392, 4312, 4352, 1392, 4352, 1392, 4352, 4352, 1392,
        4352, 4312, 1392, 4312, 4352, 1392, 4312, 4352, 4312, 1392, 1392, 4352,
        4312, 4312, 1392, 4312, 4127, 4352, 4352, 4350, 1392, 4352, 4352, 4312,
        4312, 4352, 4352, 4352, 4352, 2682, 1392, 4350, 4352, 1392, 4352, 4350,
        4352, 4312])

In [34]:
d_basket_val[0]

[4312, 4312, 1392, 1392, 4150, 4150]

In [35]:
gen[1][0]

tensor([ 4312,  1392,  1392,  4150,  4150, 17376, 17376, 17376, 17376, 17376,
        17376, 17376, 17376, 17376, 17376, 17376, 17376, 17376, 17376, 17376,
        17376, 17376, 17376, 17376, 17376, 17376, 17376, 17376, 17376, 17376,
        17376, 17376, 17376, 17376, 17376, 17376, 17376, 17376, 17376, 17376,
        17376, 17376, 17376, 17376, 17376, 17376, 17376, 17376, 17376, 17376,
        17376, 17376, 17376, 17376, 17376, 17376, 17376, 17376, 17376, 17376,
        17376, 17376, 17376, 17376, 17376, 17376, 17376, 17376, 17376, 17376,
        17376, 17376, 17376, 17376, 17376, 17376, 17376, 17376, 17376, 17376,
        17376, 17376, 17376, 17376, 17376, 17376, 17376, 17376, 17376, 17376,
        17376, 17376, 17376, 17376, 17376, 17376, 17376, 17376, 17376, 17376,
        17376, 17376, 17376, 17376, 17376, 17376, 17376, 17376, 17376, 17376,
        17376, 17376, 17376, 17376, 17376, 17376, 17376, 17376, 17376, 17376,
        17376, 17376, 17376, 17376, 17376, 17376, 17376, 17376, 