In [1]:
import torch
import tqdm
from sklearn.metrics import roc_auc_score
from torch.utils.data import DataLoader

from torchfm.dataset.avazu import AvazuDataset
from torchfm.dataset.criteo import CriteoDataset
from torchfm.dataset.movielens import MovieLens1MDataset, MovieLens20MDataset

In [2]:
from torchfm.model.mhafm import MultiheadAttentionalFactorizationMachineModel

In [3]:
def get_dataset(name, path):
    if name == 'movielens1M':
        return MovieLens1MDataset(path)
    elif name == 'movielens20M':
        return MovieLens20MDataset(path)
    elif name == 'criteo':
        return CriteoDataset(path)
    elif name == 'avazu':
        return AvazuDataset(path)
    else:
        raise ValueError('unknown dataset name: ' + name)

In [5]:
dataset_name, dataset_path = 'avazu', './data/avazu/train.csv'
dataset = get_dataset(dataset_name, dataset_path)

In [6]:
batch_size = 4096

train_length = int(len(dataset) * 0.8)
valid_length = int(len(dataset) * 0.1)
test_length = len(dataset) - train_length - valid_length
# test_length = int(len(dataset) * 0.01)
train_dataset, valid_dataset, test_dataset = torch.utils.data.random_split(
    dataset, (train_length, valid_length, test_length))
train_data_loader = DataLoader(train_dataset, batch_size=batch_size, num_workers=8)
valid_data_loader = DataLoader(valid_dataset, batch_size=batch_size, num_workers=8)
test_data_loader = DataLoader(test_dataset, batch_size=batch_size, num_workers=8)

In [6]:
for fields, target in train_data_loader:
    break

In [7]:
field_dims = dataset.field_dims
num_fields = len(field_dims)

In [11]:
print(fields.size())

torch.Size([4096, 39])


In [8]:
model = MultiheadAttentionalFactorizationMachineModel(field_dims, embed_dim=16, 
                                                      num_heads=4, ffn_embed_dim=64,
                                                      num_layers=3, mlp_dims=(16, 16), dropout=0.2)

In [9]:
y = model(fields)

In [10]:
print(y.size())

torch.Size([2048])


In [22]:
mask = (torch.triu(torch.ones(6, 6)) == 1)
mask = mask.float().masked_fill(mask == 0, float(0.0)).masked_fill(mask == 1, float('-inf'))

In [23]:
print(mask)

tensor([[-inf, -inf, -inf, -inf, -inf, -inf],
        [0., -inf, -inf, -inf, -inf, -inf],
        [0., 0., -inf, -inf, -inf, -inf],
        [0., 0., 0., -inf, -inf, -inf],
        [0., 0., 0., 0., -inf, -inf],
        [0., 0., 0., 0., 0., -inf]])


In [11]:
from torchfm.layer import FeaturesEmbedding

embedding = FeaturesEmbedding(field_dims, 16)
x = embedding(fields)

In [17]:
row, col = list(), list()
for i in range(num_fields - 1):
    for j in range(i + 1, num_fields):
        row.append(i), col.append(j)
p, q = x[:, row], x[:, col]

In [19]:
print(p.size())

torch.Size([2048, 231, 16])
