In [1]:
import os
os.chdir('/home/jupyter/github/recommender-lab/FM')

import pandas as pd
import torch
import torch.nn as nn
import torch.optim as optim
import torch.nn.init as init

from torch.nn.functional import sigmoid
from torch.utils.data import Dataset, DataLoader
from collections import defaultdict

import numpy as np
import d2l
from mxnet import gluon

In [2]:
class FM(nn.Module):
    def __init__(self, num_embeddings:int, embedding_dim:int):
        """
        num_embeddings: number of categories, which is the dimension of one-hot encoding vector
        embedding_dim: dimension of each embedding vector
        """
        super().__init__()
        num_embeddings = int(sum(num_embeddings))
        self.embedding = nn.Embedding(num_embeddings, embedding_dim)
        self.fc = nn.Embedding(num_embeddings, 1) # 輸出維度只有 1 維
        self.linear_layer = nn.Linear(1, 1)
        
        # Initialize weights using Xavier Uniform initialization
        init.xavier_uniform_(self.embedding.weight)
        init.xavier_uniform_(self.fc.weight)
        init.xavier_uniform_(self.linear_layer.weight)

    def forward(self, x):
        square_of_sum = torch.sum(self.embedding(x), axis=1) ** 2 #np.sum(self.embedding(x), axis=1) ** 2
        sum_of_square = torch.sum(self.embedding(x) ** 2, axis=1)
        x = self.linear_layer(self.fc(x).sum(1)) + 0.5 * (square_of_sum - sum_of_square).sum(1, keepdims=True)
        x = sigmoid(x)
        return x

In [3]:
import os

class CTRDataset(Dataset):
    def __init__(self, data_path='../data/ctr/train.csv', feat_mapper=None, defaults=None,
                 min_threshold=4, num_feat=34):
        self.NUM_FEATS, self.count, self.data = num_feat, 0, {}
        feat_cnts = defaultdict(lambda: defaultdict(int))
        self.feat_mapper, self.defaults = feat_mapper, defaults
        self.field_dims = np.zeros(self.NUM_FEATS, dtype=np.int64)
        with open(data_path) as f:
            for line in f:
                instance = {}
                values = line.rstrip('\n').split('\t')
                if len(values) != self.NUM_FEATS + 1:
                    continue
                label = np.float32([0, 0])
                label[int(values[0])] = 1
                instance['y'] = [np.float32(values[0])]
                for i in range(1, self.NUM_FEATS + 1):
                    feat_cnts[i][values[i]] += 1
                    instance.setdefault('x', []).append(values[i])
                self.data[self.count] = instance
                self.count = self.count + 1


        if self.feat_mapper is None and self.defaults is None:
            feat_mapper = {i: {feat for feat, c in cnt.items() if c >=
                               min_threshold} for i, cnt in feat_cnts.items()}
            self.feat_mapper = {i: {feat_v: idx for idx, feat_v in enumerate(feat_values)}
                                for i, feat_values in feat_mapper.items()}
            self.defaults = {i: len(feat_values) for i, feat_values in feat_mapper.items()}

        for i, fm in self.feat_mapper.items():
            self.field_dims[i - 1] = len(fm) + 1
        #self.offsets = np.array((0, *np.cumsum(self.field_dims).asnumpy()[:-1]))
        self.offsets = np.array((0, *np.cumsum(self.field_dims)[:-1]))

    def __len__(self):
        return self.count

    def __getitem__(self, idx):
        feat = np.array([self.feat_mapper[i + 1].get(v, self.defaults[i + 1])
                         for i, v in enumerate(self.data[idx]['x'])])
        return feat + self.offsets, self.data[idx]['y']

In [4]:
batch_size = 2048
train_data = CTRDataset('../data/ctr/train.csv')
test_data = CTRDataset('../data/ctr/test.csv',
                       feat_mapper=train_data.feat_mapper,
                       defaults=train_data.defaults)

train_iter = DataLoader(train_data, shuffle=True, batch_size=batch_size, num_workers=4)
test_iter = DataLoader(test_data, shuffle=True, batch_size=batch_size, num_workers=4)

In [5]:
devices = torch.device("cuda:0")
net = FM(train_data.field_dims, embedding_dim=20)
lr, num_epochs = 0.02, 30

criterion = nn.BCEWithLogitsLoss()
optimizer = optim.Adam(net.parameters(), lr=lr)
def accuracy(pred, target):
    result = pred.round() == target
    return result.sum(), result.shape[0]

# trainer = gluon.Trainer(net.parameters(), optimizer,
#                         {'learning_rate': lr})
# loss = gluon.loss.SigmoidBinaryCrossEntropyLoss()
# d2l.train_ch13(net, train_iter, test_iter, loss, trainer, num_epochs, devices)

In [6]:
# animator = d2l.Animator(xlabel='epoch', xlim=[1, num_epochs], ylim=[0, 1],
#                         legend=['train loss', 'train acc', 'test acc'])


for epoch in range(num_epochs):  # loop over the dataset multiple times

    running_loss = 0.0
    acc, n = 0.0, 0
    for i, (inputs, labels) in enumerate(train_iter):
        # zero the parameter gradients
        optimizer.zero_grad()

        # forward + backward + optimize
        outputs = net(inputs)
        loss = criterion(outputs, torch.unsqueeze(labels[0], 1))
        matchs, ns = accuracy(outputs, torch.unsqueeze(labels[0], 1))
        acc += matchs
        n += ns

    
        loss.backward()
        optimizer.step()
        # print statistics
        running_loss += loss.item()

    running_vloss = 0.0
    acc2, n2 = 0.0, 0
    for i2, (inputs2, labels2) in enumerate(test_iter):
        outputs2 = net(inputs2)
        vloss = criterion(outputs2, torch.unsqueeze(labels2[0], 1))
        matchs2, ns2 = accuracy(outputs2, torch.unsqueeze(labels2[0], 1))
        running_vloss += vloss.item()

        acc2 += matchs2
        n2 += ns2
        

    # print every 2000 mini-batches
    print(f'[{epoch+1}] training loss: {running_loss/i:.3f}, training acc:{acc/n}, validation loss:{running_vloss/i2:.3f}, validation acc:{acc2/n2}')


[1] training loss: 0.839, training acc:0.5712666511535645, validation loss:1.347, validation acc:0.7329999804496765
[2] training loss: 0.695, training acc:0.8594666719436646, validation loss:1.133, validation acc:0.9103333353996277
[3] training loss: 0.617, training acc:0.9380000233650208, validation loss:1.082, validation acc:0.9303333163261414
[4] training loss: 0.601, training acc:0.9574666619300842, validation loss:1.070, validation acc:0.9319999814033508
[5] training loss: 0.593, training acc:0.9681333303451538, validation loss:1.067, validation acc:0.9306666851043701
[6] training loss: 0.589, training acc:0.977066695690155, validation loss:1.072, validation acc:0.9306666851043701
[7] training loss: 0.587, training acc:0.9803333282470703, validation loss:1.075, validation acc:0.9296666383743286
[8] training loss: 0.586, training acc:0.982866644859314, validation loss:1.070, validation acc:0.9300000071525574
[9] training loss: 0.583, training acc:0.9873999953269958, validation loss