In [1]:
import pickle
import numpy as np
import paddle
import pandas as pd
import paddle.nn as nn

file = open('data/list.pkl', 'rb')
data = pickle.load(file)
np.random.shuffle(data)
users_df = pd.read_csv('data/users.csv')
items_df = pd.read_csv('data/items.csv')

In [2]:
emb_scale = 1024
batch_size = 64
len_users = len(users_df)
len_items = len(items_df)

In [3]:
class Dataset(paddle.io.Dataset):
    def __init__(self, data):
        self.feature = data[:,0:2]
        self.label = data[:,2]
        pass
    def __getitem__(self, idx):
        return self.feature[idx], self.label[idx]
    def __len__(self):
        return len(self.feature)
    pass

dataset = paddle.io.DataLoader(Dataset(data),
                               batch_size=batch_size,
                               shuffle=True)

In [4]:
class DSSM(nn.Layer):
    def __init__(self):
        super(DSSM,self).__init__()
        self.users_emb = nn.Embedding(len_users, emb_scale)
        self.items_emb = nn.Embedding(len_items, emb_scale)
        self.users_fc1 = nn.Linear(emb_scale, 512)
        self.users_fc2 = nn.Linear(512, 300)
        self.users_fc3 = nn.Linear(300, 300)
        self.users_fc4 = nn.Linear(300, 128)
        self.items_fc1 = nn.Linear(emb_scale, 512)
        self.items_fc2 = nn.Linear(512, 300)
        self.items_fc3 = nn.Linear(300, 300)
        self.items_fc4 = nn.Linear(300, 128)
        pass
    def forward(self, input):
        # user tower
        user = self.users_emb(input[:,0])
        user = self.users_fc1(user)
        user = nn.functional.relu(user)
        user = self.users_fc2(user)
        user = nn.functional.relu(user)
        user = self.users_fc3(user)
        user = nn.functional.relu(user)
        user = self.users_fc4(user)
        # item tower
        item = self.items_emb(input[:,1])
        item = self.items_fc1(item)
        item = nn.functional.relu(item)
        item = self.items_fc2(item)
        item = nn.functional.relu(item)
        item = self.items_fc3(item)
        item = nn.functional.relu(item)
        item = self.items_fc4(item)
        # similarity
        x = nn.functional.cosine_similarity(user, item, axis=0)
        x = nn.functional.sigmoid(x)
        return x
    pass