<a href="https://colab.research.google.com/github/claude9493/DSAA5002/blob/main/notebook/BetaRec.ipynb" target="_parent"><img src="https://colab.research.google.com/assets/colab-badge.svg" alt="Open In Colab"/></a>

In [1]:
!git clone https://github.com/claude9493/dsaa5002.git
!mv ./dsaa5002/dataset/ ./
!mv ./dsaa5002/utils/ ./

Cloning into 'dsaa5002'...
remote: Enumerating objects: 60, done.[K
remote: Counting objects: 100% (60/60), done.[K
remote: Compressing objects: 100% (55/55), done.[K
remote: Total 60 (delta 16), reused 4 (delta 1), pack-reused 0[K
Unpacking objects: 100% (60/60), done.


In [2]:
!pip install geomloss

Collecting geomloss
  Downloading geomloss-0.2.4-py3-none-any.whl (17 kB)
Installing collected packages: geomloss
Successfully installed geomloss-0.2.4


In [3]:
%%time

from utils.utils import create_dataset, Trainer
# from layer.layer import Embedding, FeaturesEmbedding, EmbeddingsInteraction, MultiLayerPerceptron

import math
import torch
import torch.nn as nn
import torch.optim as optim

from torch.utils.data import TensorDataset, DataLoader

from geomloss import SamplesLoss

device = torch.device('cuda:0' if torch.cuda.is_available() else 'cpu')
print('Training on [{}].'.format(device))

Training on [cuda:0].
CPU times: user 1.04 s, sys: 772 ms, total: 1.81 s
Wall time: 6.35 s


In [4]:
%%time
SAMPLE_NUM = 10000
task = 'regression'  # 'classification'
dataset = create_dataset('movielens', sample_num=SAMPLE_NUM, task=task, device=device)
field_dims, (train_X, train_y), (valid_X, valid_y), (test_X, test_y) = dataset.train_valid_test_split()

CPU times: user 1.75 s, sys: 1.03 s, total: 2.78 s
Wall time: 10 s


In [4]:
# train_iterator = SingledirectionalOneShotIterator(DataLoader(
#     TensorDataset(train_X, train_y),
#     # TrainDataset(train_path_queries, nentity, nrelation, args.negative_sample_size, train_answers),
#     batch_size=BATCH_SIZE,
#     shuffle=True
#     # num_workers=args.cpu_num
#     # collate_fn=TrainDataset.collate_fn
# ))

In [34]:
sl = SamplesLoss(loss="sinkhorn", blur=0.001)
params = [(.5, .5), (5, 1), (1,3), (2,2), (2,5)]

def w_dist(p1, p2):
  x = torch.arange(0.01, 1.01, 0.05).view(-1,1)
  d1 = torch.distributions.beta.Beta(*p1)
  d2 = torch.distributions.beta.Beta(*p2)
  u_ref = torch.exp(d1.log_prob(x))
  m_ref = torch.exp(d2.log_prob(x))
  return sl.forward(u_ref, m_ref)

import itertools
for params in itertools.combinations(params, 2):
  print(f"{params}: {w_dist(params[0], params[1]):.4}")

((0.5, 0.5), (5, 1)): 0.2511
((0.5, 0.5), (1, 3)): 0.1057
((0.5, 0.5), (2, 2)): 0.06067
((0.5, 0.5), (2, 5)): 0.1014
((5, 1), (1, 3)): 0.0807
((5, 1), (2, 2)): 0.3799
((5, 1), (2, 5)): 0.08919
((1, 3), (2, 2)): 0.1298
((1, 3), (2, 5)): 0.004072
((2, 2), (2, 5)): 0.1171


In [35]:
class SingledirectionalOneShotIterator(object):
    def __init__(self, dataloader):
        self.iterator = self.one_shot_iterator(dataloader)
        self.step = 0
        
    def __next__(self):
        self.step += 1
        data = next(self.iterator)
        return data
    
    @staticmethod
    def one_shot_iterator(dataloader):
        while True:
            for data in dataloader:
                yield data

class Beta_Loss:
    def __init__(self, model, reg_biase=0.005, reg_lambda=0.005):
        self.reg_biase = reg_biase
        self.reg_lambda = reg_lambda
        self.model = model

    def __call__(self, pred, target):
        loss = (1-self.reg_biase) * nn.MSELoss()(pred.view(-1,1), target)
        loss += self.reg_biase * self.model.loss[0]
        # loss += self.reg_lambda * self.model.loss[1]
        # loss = nn.MSELoss(reduction='sum')(pred.view(-1,1), target)
        loss = torch.nan_to_num(loss)
        return loss

class Regularizer():
    def __init__(self, base_add, min_val, max_val):
        self.base_add = base_add
        self.min_val = min_val
        self.max_val = max_val

    def __call__(self, entity_embedding):
        return torch.clamp(entity_embedding + self.base_add, self.min_val, self.max_val)

In [40]:
class BetaRecommendation(nn.Module):

    def __init__(self, field_dims, embed_dim=4, **kwargs):
        super(BetaRecommendation, self).__init__()
        n_users, n_movies = field_dims[0], field_dims[1]

        self.gamma = nn.Parameter(
            torch.Tensor([kwargs.get('gamma', 12)]), 
            requires_grad=False)
        self.lb = kwargs.get('lb', 0.05)
        self.ub = kwargs.get('ub', 1e9)

        self.Bu = nn.Parameter(torch.randn(n_users), requires_grad=True)
        self.Bm = nn.Parameter(torch.randn(n_movies), requires_grad=True)

        self.u = nn.Embedding(n_users, embed_dim * 2)
        # self.u = nn.Parameter(torch.zeros(n_users, embed_dim * 2))
        self.m = nn.Embedding(n_movies, embed_dim * 2)
        # self.m = nn.Parameter(torch.zeros(n_movies, embed_dim * 2))
        
        self.u.weight.data.uniform_(self.lb, self.ub)
        self.m.weight.data.uniform_(self.lb, self.ub)
        
        self.regularizer = Regularizer(1, self.lb, self.ub)
        self.loss = [0,0]

        self.sample_loss = SamplesLoss(loss="sinkhorn", blur=0.001)

        
    def forward(self, x, global_mean=0):
      # Predict rating
        users, movies = x[:,0], x[:,1]
        u, m = self.u(users), self.m(movies)
        Bu, Bm = self.Bu[users], self.Bm[movies]
        u[torch.isnan(u)] = 0.05
        m[torch.isnan(m)] = 0.05

        alpha_u, beta_u = torch.chunk(self.regularizer(u), 2, dim=-1)
        alpha_m, beta_m = torch.chunk(self.regularizer(m), 2, dim=-1)

        u_dist = torch.distributions.beta.Beta(alpha_u, beta_u)
        m_dist = torch.distributions.beta.Beta(alpha_m, beta_m)

        # distance = self.distance(u_dist, m_dist)
        distance = self.Wasserstein_distance(u_dist, m_dist)
    
        output = Bu + Bm - distance
        
        self.loss[0] = torch.norm(Bu) + torch.norm(Bm)
        return output
      
    def KL_Distance(self, u_dist, m_dist):
      # return torch.norm(torch.distributions.kl.kl_divergence(u_dist, m_dist), p=1, dim=-1)
      
      # print([u_dist, m_dist, 
            #  torch.norm(torch.nan_to_num(torch.log(torch.distributions.kl.kl_divergence(u_dist, m_dist)), 
                                        #  nan=1.0, posinf=1.0), p=1, dim=-1)])

      return torch.nan_to_num(self.gamma - torch.norm(torch.nan_to_num(torch.log(torch.distributions.kl.kl_divergence(u_dist, m_dist)), 
                                                                       nan=1, posinf=1), p=1, dim=-1))
      # return torch.norm(torch.pi/2.0 * torch.atan(torch.distributions.kl.kl_divergence(u_dist, m_dist)), p=1, dim=-1)

    def Wasserstein_distance(self, u_dist, m_dist):
      # Generate reference points
      x = torch.arange(0.01, 1.01, 0.05).view(-1,1)
      u_ref = torch.exp(u_dist.log_prob(x))
      m_ref = torch.exp(m_dist.log_prob(x))
      return torch.nan_to_num(torch.norm(self.sample_loss.forward(u_ref, m_ref), p=1, dim=-1))

    #staticmethod
    # def train_step(model, optimizer, train_iterator, args, step):
    #   model.train()
    #   optimizer.zero_grad()
    #   x, y = next(train_iterator)
    #   users, movies = x[:,0], x[:,1]

    #   batch_queries_dict = collections.defaultdict(list)
    #   batch_idxs_dict = collections.defaultdict(list)


In [41]:
%%time

EMBEDDING_DIM = 16
LEARNING_RATE = 1e-4
REGULARIZATION = 1e-6
BATCH_SIZE = 1024
EPOCH = 500
TRIAL = 100

br = BetaRecommendation(field_dims, EMBEDDING_DIM).to(device)

optimizer = optim.Adam(br.parameters(), lr=LEARNING_RATE, weight_decay=REGULARIZATION)
# criterion = nn.BCELoss()
# criterion = nn.CrossEntropyLoss()
criterion = Beta_Loss(model=br)

trainer = Trainer(br, optimizer, criterion, BATCH_SIZE, task=task)
trainer.train(train_X, train_y, epoch=EPOCH, trials=TRIAL, valid_X=valid_X, valid_y=valid_y)
test_loss, test_metric = trainer.test(test_X, test_y)
print('test_loss:  {:.5f} | test_metric:  {:.5f}'.format(test_loss, test_metric))

  0%|          | 0/500 [00:00<?, ?it/s]


ValueError: ignored