In [29]:
import pickle
import numpy as np
import paddle
import pandas as pd
import paddle.nn as nn

file = open('data/list.pkl', 'rb')
data = pickle.load(file)
np.random.shuffle(data)
users_df = pd.read_csv('data/users.csv')
items_df = pd.read_csv('data/items.csv')

In [2]:
emb_scale = 1024
batch_size = 64
len_users = len(users_df)
len_items = len(items_df)

In [21]:
class Dataset(paddle.io.Dataset):
    def __init__(self, data):
        self.feature = data[:,0:2]
        self.label = data[:,2]
        pass
    def __getitem__(self, idx):
        return self.feature[idx], self.label[idx]
    def __len__(self):
        return len(self.feature)
    pass

dataset = paddle.io.DataLoader(Dataset(data),
                               batch_size=batch_size,
                               shuffle=True)

In [13]:
class DSSM(nn.Layer):
    def __init__(self):
        super(DSSM,self).__init__()
        self.users_emb = nn.Embedding(len_users, emb_scale)
        self.items_emb = nn.Embedding(len_items, emb_scale)
        self.users_fc1 = nn.Linear(emb_scale, 512)
        self.users_fc2 = nn.Linear(512, 300)
        self.users_fc3 = nn.Linear(300, 300)
        self.users_fc4 = nn.Linear(300, 128)
        self.items_fc1 = nn.Linear(emb_scale, 512)
        self.items_fc2 = nn.Linear(512, 300)
        self.items_fc3 = nn.Linear(300, 300)
        self.items_fc4 = nn.Linear(300, 128)
        pass
    def forward(self, input):
        # user tower
        user = self.users_emb(input[:,0])
        user = self.users_fc1(user)
        user = nn.functional.relu(user)
        user = self.users_fc2(user)
        user = nn.functional.relu(user)
        user = self.users_fc3(user)
        user = nn.functional.relu(user)
        user = self.users_fc4(user)
        # item tower
        item = self.items_emb(input[:,1])
        item = self.items_fc1(item)
        item = nn.functional.relu(item)
        item = self.items_fc2(item)
        item = nn.functional.relu(item)
        item = self.items_fc3(item)
        item = nn.functional.relu(item)
        item = self.items_fc4(item)
        # similarity
        x = nn.functional.cosine_similarity(user, item, axis=0)
        x = nn.functional.sigmoid(x)
        return x
    pass

In [5]:
x = paddle.to_tensor(np.random.randint(0,len_users,size=(64,2)))

In [14]:
DSSM()(x)


Tensor(shape=[128], dtype=float32, place=Place(cpu), stop_gradient=False,
       [0.49535686, 0.31723797, 0.57621199, 0.67880642, 0.67578387, 0.54226148,
        0.50485319, 0.42153674, 0.61306316, 0.50633538, 0.35136047, 0.53561687,
        0.41069722, 0.66122580, 0.51983237, 0.33422592, 0.60878909, 0.59796405,
        0.68280590, 0.59740305, 0.42344308, 0.49230734, 0.32854435, 0.35653985,
        0.54609293, 0.53759527, 0.48363721, 0.47112915, 0.42539600, 0.44058111,
        0.33394700, 0.53857565, 0.37423849, 0.64414495, 0.36155275, 0.56251663,
        0.30308816, 0.51769984, 0.43930537, 0.44129738, 0.50934547, 0.67138296,
        0.38797316, 0.41335759, 0.60153967, 0.51722938, 0.46695280, 0.36728516,
        0.59007019, 0.44402546, 0.53434145, 0.35307908, 0.48867029, 0.57768184,
        0.34501320, 0.50747496, 0.64528549, 0.52622914, 0.56067663, 0.52464890,
        0.46789047, 0.40805262, 0.59158677, 0.50790560, 0.66658956, 0.30530289,
        0.63995224, 0.53939569, 0.42851242, 0.

In [22]:
for x in dataset:
    x

In [23]:
x

[Tensor(shape=[59, 2], dtype=float64, place=Place(cpu), stop_gradient=True,
        [[1707., 5095.],
         [8810., 8437.],
         [6466., 1036.],
         [7654., 1037.],
         [4791., 6446.],
         [2071., 7208.],
         [1200., 3627.],
         [2640., 20126.],
         [2220., 6504.],
         [8630., 10433.],
         [8601., 4067.],
         [5923., 5993.],
         [4412., 3822.],
         [3814., 9271.],
         [8329., 9387.],
         [8871., 11194.],
         [2690., 9833.],
         [4448., 11434.],
         [2455., 8048.],
         [1311., 6677.],
         [4475., 11458.],
         [3406., 1244.],
         [10803., 16616.],
         [9322., 7753.],
         [666. , 8301.],
         [4875., 3722.],
         [3399., 4690.],
         [5765., 13797.],
         [6011., 6236.],
         [6762., 5584.],
         [1771., 9362.],
         [11451., 10817.],
         [3293., 5106.],
         [4043., 512. ],
         [6506., 21407.],
         [3574., 10420.],
         [21

In [18]:
DSSM()(paddle.cast(x[0],dtype='int32'))

Tensor(shape=[128], dtype=float32, place=Place(cpu), stop_gradient=False,
       [0.48207566, 0.57544214, 0.30892187, 0.53896534, 0.47055021, 0.36516881,
        0.47171926, 0.60106188, 0.50094616, 0.45000693, 0.64961791, 0.38005090,
        0.46481833, 0.32499811, 0.33320057, 0.47021577, 0.69257373, 0.59902048,
        0.41818479, 0.49337858, 0.55532718, 0.30980867, 0.33188701, 0.66010177,
        0.42743745, 0.31603885, 0.31787628, 0.40076011, 0.62327665, 0.55508786,
        0.37898621, 0.51321679, 0.61310780, 0.41133499, 0.59758359, 0.48653394,
        0.39046362, 0.53451794, 0.38562405, 0.65734702, 0.52013063, 0.52452213,
        0.40066433, 0.63580883, 0.36570600, 0.62570536, 0.58469880, 0.57495338,
        0.44313377, 0.38637263, 0.43041423, 0.64863467, 0.53700525, 0.68507481,
        0.40981266, 0.59386617, 0.68781483, 0.41485479, 0.53454995, 0.54171872,
        0.40578777, 0.54966635, 0.40798607, 0.51533461, 0.52586460, 0.54418558,
        0.54018360, 0.46874642, 0.58406365, 0.