In [1]:
import os
import json
import collections
from sklearn.metrics import roc_auc_score
from time import time
import numpy as np

from matplotlib import pyplot as plt

import torch
import torch.nn as nn
import torch.nn.functional as F
from torch.nn.parameter import Parameter
from torch.autograd import Variable
import torch.backends.cudnn

In [2]:
import sys
sys.path.append('../utils')
import data_preprocess

result_dict = data_preprocess.read_criteo_data('../data/tiny_train_input.csv', '../data/category_emb.csv')
test_dict = data_preprocess.read_criteo_data('../data/tiny_test_input.csv', '../data/category_emb.csv')

for key, value in result_dict.items():
    result_dict[key] = np.array(value)

In [10]:
class NFM(torch.nn.Module):
    def __init__(self, field_size, feature_sizes, max_num_hidden_layers, qtd_neuron_per_hidden_layer,
                 dropout_shallow=[0.5], embedding_size=4, n_classes=2, batch_size=1,
                 verbose=False, interaction_type=True, eval_metric=roc_auc_score,
                 b=0.99, n=0.01, s=0.2, use_cuda = True, greater_is_better = True):
        super(NFM, self).__init__()

        # Check CUDA
        if torch.cuda.is_available() and use_cuda:
            print("Using CUDA")

        self.device = torch.device("cuda:0" if torch.cuda.is_available() and use_cuda else "cpu")

        self.field_size = field_size
        self.feature_sizes = feature_sizes
        self.max_num_hidden_layers = max_num_hidden_layers
        self.qtd_neuron_per_hidden_layer = qtd_neuron_per_hidden_layer
        self.dropout_shallow = dropout_shallow
        self.embedding_size = embedding_size
        self.n_classes = n_classes
        self.batch_size = batch_size
        self.verbose = verbose
        self.interaction_type = interaction_type
        self.eval_metric = eval_metric
        self.use_cuda = use_cuda
        self.greater_is_better = greater_is_better

        self.b = Parameter(torch.tensor(b), requires_grad=False).to(self.device)
        self.n = Parameter(torch.tensor(n), requires_grad=False).to(self.device)
        self.s = Parameter(torch.tensor(s), requires_grad=False).to(self.device)

        # FM
        self.first_order_embeddings = nn.ModuleList([nn.Embedding(feature_size,1)
                                                     for feature_size in self.feature_sizes])
        
        if self.dropout_shallow:
            self.first_order_dropout = nn.Dropout(self.dropout_shallow[0])
        self.second_order_embeddings = nn.ModuleList([nn.Embedding(feature_size, self.embedding_size)
                                                      for feature_size in self.feature_sizes])
        self.bias = torch.nn.Parameter(torch.randn(1))

        # Neural Networks
        self.hidden_layers = []
        self.output_layers = []

        if self.interaction_type:
            self.hidden_layers.append(nn.Linear(embedding_size, qtd_neuron_per_hidden_layer))
        else:
            self.hidden_layers.append(nn.Linear(self.field_size * (self.field_size-1) / 2, qtd_neuron_per_hidden_layer))

        for i in range(max_num_hidden_layers - 1):
            self.hidden_layers.append(nn.Linear(qtd_neuron_per_hidden_layer, qtd_neuron_per_hidden_layer))

        for i in range(max_num_hidden_layers):
            self.output_layers.append(nn.Linear(qtd_neuron_per_hidden_layer, n_classes))

        self.hidden_layers = nn.ModuleList(self.hidden_layers).to(self.device)
        self.output_layers = nn.ModuleList(self.output_layers).to(self.device)

        self.alpha = Parameter(torch.Tensor(self.max_num_hidden_layers).fill_(1 / (self.max_num_hidden_layers + 1)),
                               requires_grad=False).to(self.device)

        self.loss_array = []

        print("Initializing Neural Networks Done")
        
    def zero_grad(self):
        for i in range(self.max_num_hidden_layers):
            self.output_layers[i].weight.grad.data.fill_(0)
            self.output_layers[i].bias.grad.data.fill_(0)
            self.hidden_layers[i].weight.grad.data.fill_(0)
            self.hidden_layers[i].bias.grad.data.fill_(0)

In [6]:
    def forward(self, Xi, Xv):
        # FM
        Xi = torch.LongTensor(Xi).to(self.device).reshape((self.field_size, 1))
        Xv = torch.FloatTensor(Xv).to(self.device)

        first_order_emb_arr = [(torch.sum(emb(Xi[i]), 1) * Xv[i]).t()
                               for i, emb in enumerate(self.first_order_embeddings)]
        first_order = torch.cat(first_order_emb_arr, 0)

        if self.dropout_shallow:
            first_order = self.first_order_dropout(first_order)

        if self.interaction_type:
            # Use 2xixj = (xi+xj)^2 - xi^2 - yj^2 to reduce calculation
            second_order_emb_arr = [(torch.sum(emb(Xi[i]), 1).t() * Xv[i]).t()
                                    for i, emb in enumerate(self.second_order_embeddings)]
            print(second_order_emb_arr)
            sum_second_order_emb = sum(second_order_emb_arr)
            # (xi+xj)^2
            sum_second_order_emb_square = sum_second_order_emb * sum_second_order_emb
            # xi^2+xj^2
            second_order_emb_square = [item * item for item in second_order_emb_arr]
            second_order_emb_square_sum = sum(second_order_emb_square)
            second_order = (sum_second_order_emb_square - second_order_emb_square_sum) * 0.5
        else:
            second_order_emb_arr = [(torch.sum(emb(Xi), 1).t() * Xv).t()
                                    for i, emb in enumerate(self.second_order_embeddings)]
            weights_fm = []
            for i in range(self.field_size):
                for j in range(i + 1, self.field_size):
                    weights_fm.append(second_order_emb_arr[i] * second_order_emb_arr[j])

        # Neural Networks
        if self.interaction_type:
            x = second_order
        else:
            x = torch.cat([torch.sum(weight_fm, 1).view([-1, 1])
                            for weight_fm in weights_fm], 1)
        hidden_connections = []
        activation = F.relu

        x = activation(self.hidden_layers[0](x))
        hidden_connections.append(x)

        for i in range(1, self.max_num_hidden_layers):
            hidden_connections.append(
                F.relu(self.hidden_layers[i](hidden_connections[i - 1])))

        output_class = []

        for i in range(self.max_num_hidden_layers):
            output_class.append(self.output_layers[i](hidden_connections[i]))

        pred_per_layer = torch.stack(output_class)

        return first_order, pred_per_layer


In [None]:
    def update_weights(self, Xi, Xv, Y, show_loss):
        Y = torch.LongTensor(Y).to(self.device)
        first_order, predictions_per_layer = self.forward(Xi, Xv)

        losses_per_layer = []

        for out in predictions_per_layer:
            criterion = nn.CrossEntropyLoss().to(self.device)
            loss = criterion(out.view(self.batch_size, self.n_classes),
                             Y.view(self.batch_size).long())
            losses_per_layer.append(loss)

        w = []
        b = []

        for i in range(len(losses_per_layer)):
            losses_per_layer[i].backward(retain_graph=True)
            self.output_layers[i].weight.data -= self.n * \
                self.alpha[i] * self.output_layers[i].weight.grad.data
            self.output_layers[i].bias.data -= self.n * \
                self.alpha[i] * self.output_layers[i].bias.grad.data
            w.append(self.alpha[i] * self.hidden_layers[i].weight.grad.data)
            b.append(self.alpha[i] * self.hidden_layers[i].bias.grad.data)
            self.zero_grad()

        for i in range(1, len(losses_per_layer)):
            self.hidden_layers[i].weight.data -= self.n * torch.sum(torch.cat(w[i:]))
            self.hidden_layers[i].bias.data -= self.n * torch.sum(torch.cat(b[i:]))

        for i in range(len(losses_per_layer)):
            self.alpha[i] *= torch.pow(self.b, losses_per_layer[i])
            self.alpha[i] = torch.max(self.alpha[i], self.s / self.max_num_hidden_layers)

        z_t = torch.sum(self.alpha)

        self.alpha = Parameter(self.alpha / z_t, requires_grad=False).to(self.device)

        if show_loss:
            real_output = torch.sum(torch.mul(
                self.alpha.view(self.max_num_hidden_layers, 1).repeat(1, self.batch_size).view(
                    self.max_num_hidden_layers, self.batch_size, 1), predictions_per_layer), 0)
            criterion = nn.CrossEntropyLoss().to(self.device)
            loss = criterion(real_output.view(self.batch_size, self.n_classes), Y.view(self.batch_size).long())
            self.loss_array.append(loss)
            if (len(self.loss_array) % 1000) == 0:
                print("WARNING: Set 'show_loss' to 'False' when not debugging. "
                      "It will deteriorate the fitting performance.")
                loss = torch.Tensor(self.loss_array).mean().cpu().numpy()
                print("Alpha:" + str(self.alpha.data.cpu().numpy()))
                print("Training Loss: " + str(loss))
                self.loss_array.clear()

In [None]:

    def partial_fit_(self, Xi_data, Xv_data, Y_data, show_loss=True):
        self.update_weights(Xi_data, Xv_data, Y_data, show_loss)

    def partial_fit(self, Xi_data, Xv_data, Y_data, show_loss=True):
        self.partial_fit_(Xi_data, Xv_data, Y_data, show_loss)

    def predict_(self, Xi_data, Xv_data):
        return torch.argmax(torch.sum(torch.mul(
            self.alpha.view(self.max_num_hidden_layers, 1).repeat(1, 1).view(
                self.max_num_hidden_layers, 1, 1), self.forward(Xi_data, Xv_data)[1]), 0), dim=1).cpu().numpy()

    def predict(self, Xi_data, Xv_data):
        pred = self.predict_(Xi_data, Xv_data)
        return pred

    def plot_accuracy(self, Xi_data, Xv_data, Y_data):
        right_pred = 0
        result = []

        for i in range(len(Y_data)):
            ## prediction part
            pred = self.predict(Xi_data[i], Xv_data[i])
            
            if (Y_data[i] == pred[i]):
                right_pred += 1
            result.append(right_pred / (i + 1) * 100)

            ## Online NN part update
            self.update_weights(Xi_data[i], Xv_data[i], Y_data, show_loss=False)


        plt.plot([i for i in range(len(result))], result)
        plt.ylim(-4, 104)
        plt.xlabel('Number of Data')
        plt.ylabel('Right Predictions / Whole Predictions * 100 (%)')

        plt.grid()
        plt.title('Accuracy')

        plt.savefig(f'Accuracy{time()}.png')
        plt.show()

        print(result[-1])

In [8]:
with torch.cuda.device(0):
    nfm = NFM(39, result_dict['feature_sizes'], max_num_hidden_layers=5,
              qtd_neuron_per_hidden_layer=10, verbose=True, use_cuda=True, interaction_type=True).cuda()

Using CUDA
Initializing Neural Networks Done


In [9]:
    nfm.plot_accuracy(result_dict['index'], result_dict['value'], result_dict['label'])

RuntimeError: size mismatch, m1: [1 x 1], m2: [4 x 10] at /pytorch/aten/src/THC/generic/THCTensorMathBlas.cu:268