In [1]:
import numpy as np 
import pandas as pd 
from math import ceil
import torch
from torch.utils.data import TensorDataset, DataLoader

In [2]:
def sliding_win(row, window_sz = 6, skip_num = 1):
    group = ceil((len(row) - window_sz + 1)/skip_num)
    j = 0
    sample = []
#     label = []
    for i in range(group):
        sample.append(row[j:j + window_sz - 1]) 
#         label.append(row[j + window_sz - 1])
        j += skip_num

    return np.array(sample)

def sliding_win_lab(row, window_sz = 6, skip_num = 1):
    group = ceil((len(row) - window_sz + 1)/skip_num)
    j = 0
#     sample = []
    label = []
    for i in range(group):
#         sample.append(row[j:j + window_sz - 1]) 
        label.append(row[j + window_sz - 1])
        j += skip_num

    return np.array(label)

In [3]:
trainset = pd.read_csv('./bigdata2021-rl-recsys/trainset.csv', sep = ' ')
trainset.head()

Unnamed: 0,user_id,user_click_history,user_protrait,exposed_items,labels,time
0,1,"30:1580603130,34:1581178937,15:1581178939,28:1...","64054,21804,80911,36504,8867,7615,54240,37294,...",120289986119213237164,111111111,1582992009
1,2,"20:1580644762,13:1580644765,1:1580644770,127:1...","64054,26565,93755,88510,6344,7615,54240,21927,...",142611286117191234172,111111110,1582992010
2,3,"39:1580772975,6:1580772981,1:1581178309,2:1581...","64054,64086,63021,88510,93500,7615,54240,21927...",224284810542193236159,111010000,1582992014
3,4,"6:1580657608,9:1580657612,15:1580657615,127:15...","64054,21531,6599,16721,37078,7615,54240,65505,...",516174133122235218172,111111110,1582992014
4,5,"37:1580743545,36:1580743554,25:1580743556,37:1...","64054,66036,6599,88510,76066,20543,83978,37294...",61168573112239172205,111111111,1582992017


In [4]:
trainset['history_num'] = trainset["user_click_history"].apply(lambda row: list(map(lambda t: int(t.split(":")[0]), row.split(","))))
trainset['state'] = trainset['history_num'].apply(lambda row: row[-10:] if len(row) >= 10 else [0]*(10 - len(row)) + row)

In [5]:
data_x = np.array(
    list(trainset['state'].apply(sliding_win).values),
    dtype = 'int64').reshape(-1, 1)
data_y = np.array(
    list(trainset['state'].apply(sliding_win_lab).values),
    ).reshape(-1)

In [7]:
data_y = torch.from_numpy(data_y)
data_x = torch.from_numpy(data_x)

In [7]:
from torch.nn import functional as F
one_hot_x = F.one_hot(data_x, 382).reshape(-1, 5, 382)

In [8]:
one_hot_x = torch.zeros(data_x.size()[0], 382).scatter_(1, data_x, 1).reshape(-1, 5, 382)

In [9]:
train_data = TensorDataset(one_hot_x, data_y)

In [10]:
train_loader = DataLoader(dataset=train_data, batch_size=512, shuffle=True)

In [11]:
import torch
from torch import nn
from torch.nn import functional as F
from torch.distributions import Categorical
from torch.autograd import Variable

In [12]:
def try_gpu(): #single gpu
    i = 0
    if torch.cuda.device_count() >= i + 1:
        return torch.device(f'cuda:{i}')
    return torch.device('cpu')

In [19]:
class CBOW(nn.Module):
    def __init__(self, item_size, hidden_size, hidden_size2):
        super().__init__()
        device = try_gpu()
        self.item_size = item_size
        self.hidden_size = hidden_size
        self.look_up_table = nn.Parameter(nn.init.xavier_uniform_(torch.normal(0, 1, size = (item_size, hidden_size), requires_grad = True, device = device)))
        self.W1 = nn.Parameter(
                        nn.init.xavier_uniform_(torch.rand(hidden_size, hidden_size2, device = device))
                        )
        self.b1 = nn.Parameter(
                        nn.init.xavier_uniform_(torch.rand(1, hidden_size2, device = device))
                        )
        
        self.W2 = nn.Parameter(
                        nn.init.xavier_uniform_(torch.rand(hidden_size2, item_size, device = device))
                        )
        self.b2 = nn.Parameter(
                        nn.init.xavier_uniform_(torch.rand(1, item_size, device = device))
                        )
        
        for param in self.parameters():
            param.to(device)
                                                    
    def forward(self, x):  #x.shape:(batch_sz, seq_size: 3, item_size: 382)
        batch_sz, seq_sz, _ = x.size()
#         H = torch.zeros((batch_sz, self.hidden_size), device = x.device)
#         for seq in range(seq_sz):
#             x_t = x[:, seq, :]
#             H += (x_t @ self.look_up_table )/seq_sz
        H = torch.sum(x @ self.look_up_table, dim = 1)/seq_sz
        
        H2 = torch.relu(H @ self.W1 + self.b1)
        output = F.softmax(H2 @ self.W2 + self.b2, dim = 1)
        
        return output 

In [20]:
#train CBOW
cbow = CBOW(382,30,32)
# cbow.load_state_dict(torch.load('embedding.pth'))

In [21]:
for parameters in cbow.parameters():
    print(parameters)

Parameter containing:
tensor([[-0.0923, -0.0886, -0.0897,  ...,  0.0115,  0.0408, -0.0709],
        [-0.1053, -0.1006,  0.0378,  ..., -0.0471, -0.0823, -0.1121],
        [ 0.0135, -0.0961,  0.1148,  ...,  0.0577,  0.0003, -0.0845],
        ...,
        [-0.0023,  0.1113, -0.0199,  ..., -0.0511, -0.0551,  0.0036],
        [ 0.1028, -0.1033, -0.0442,  ...,  0.1107,  0.0250, -0.1060],
        [-0.1106,  0.0555, -0.0872,  ...,  0.0559,  0.0230,  0.1132]],
       device='cuda:0', requires_grad=True)
Parameter containing:
tensor([[-0.0754,  0.1803,  0.2097,  0.0213,  0.0817, -0.1185, -0.2455,  0.2131,
          0.1742, -0.1824, -0.0188,  0.0499,  0.1093, -0.0781,  0.0436,  0.2918,
         -0.2615, -0.0875,  0.2230, -0.3035, -0.2608,  0.1237,  0.1943,  0.2498,
         -0.0352,  0.1471, -0.1665, -0.1710,  0.0489,  0.0626, -0.0431,  0.1596],
        [-0.1529, -0.1107,  0.0449, -0.2511,  0.1123, -0.2734,  0.1867,  0.1104,
          0.0390,  0.1723,  0.1477, -0.1666, -0.1214, -0.3086,  0.0959, 

In [22]:
loss_fn = nn.NLLLoss(reduction='mean')
# optimizer = torch.optim.Adam(cbow.parameters(), lr = 0.01)
optimizer = torch.optim.SGD(cbow.parameters(), lr = 0.01)
device = try_gpu()
for epoch in range(20):
    for X, Y in train_loader:
        X, Y = X.to(device), Y.to(device)
        
        optimizer.zero_grad()
        y_hat = cbow(X)
# #         print(y_hat.device)
        loss = loss_fn(torch.log(y_hat), Y.long())
        loss.backward()
        optimizer.step()
    print(f'loss:{loss.data}')

loss:4.543539047241211
loss:4.201603412628174
loss:4.3856401443481445
loss:4.20010232925415
loss:4.246908664703369
loss:4.072366714477539
loss:4.19942045211792
loss:4.0240631103515625
loss:4.0638957023620605
loss:3.9501426219940186
loss:4.066921710968018
loss:3.83845591545105
loss:3.8177998065948486
loss:3.892683982849121
loss:3.8095293045043945
loss:3.8375251293182373
loss:3.9538707733154297
loss:3.9706623554229736
loss:3.7868030071258545
loss:3.918647050857544


In [44]:
device = try_gpu()
for X, Y in train_loader:
    X, Y = X.to(device), Y.to(device)
    y_hat = cbow(X)
    print(X[0,:,:])
    print(Y[0])
    break

tensor([[0., 0., 0.,  ..., 0., 0., 0.],
        [0., 0., 0.,  ..., 0., 0., 0.],
        [0., 0., 0.,  ..., 0., 0., 0.],
        [0., 0., 0.,  ..., 0., 0., 0.],
        [0., 0., 0.,  ..., 0., 0., 0.]], device='cuda:0')
tensor(240, device='cuda:0', dtype=torch.int32)


In [36]:
max(y_hat[0]), torch.argmax(y_hat[0])

(tensor(0.0706, device='cuda:0', grad_fn=<UnbindBackward>),
 tensor(51, device='cuda:0'))

In [56]:
# y_hat = cbow(X)
i = 23
print(X[i,:,:])
print(Y[i])
torch.max(y_hat[i]), torch.argmax(y_hat[i])

tensor([[0., 0., 0.,  ..., 0., 0., 0.],
        [0., 0., 0.,  ..., 0., 0., 0.],
        [0., 0., 0.,  ..., 0., 0., 0.],
        [0., 0., 0.,  ..., 0., 0., 0.],
        [0., 0., 0.,  ..., 0., 0., 0.]], device='cuda:0')
tensor(196, device='cuda:0', dtype=torch.int32)


(tensor(0.0884, device='cuda:0', grad_fn=<MaxBackward1>),
 tensor(196, device='cuda:0'))

In [20]:
torch.save(cbow.state_dict(),'embedding.pth')

In [16]:
item_vec = cbow.look_up_table.data.to('cpu').numpy()

In [17]:
keys = [str(item) for item in range(382)]

In [18]:
embed_dict = dict(zip(keys, item_vec))

In [19]:
np.save('embed_dict.npy', embed_dict)

In [20]:
map_dict=np.load('embed_dict.npy',allow_pickle=True).item()