In [66]:
import numpy as np
import pandas as pd

In [67]:
np.set_printoptions(threshold=np.inf)

In [68]:
def load_cf(filename):
    user = []
    item = []
    user_dict = dict()

    lines = open(filename, 'r').readlines()
    for l in lines:
        tmp = l.strip()
        inter = [int(i) for i in tmp.split()]

        if len(inter) > 1:
            user_id, item_ids = inter[0], inter[1:]
            item_ids = list(set(item_ids))

            for item_id in item_ids:
                user.append(user_id)
                item.append(item_id)
            user_dict[user_id] = item_ids

    user = np.array(user, dtype=np.int32)
    item = np.array(item, dtype=np.int32)
    return (user, item), user_dict

In [69]:
cf_train_data, train_user_dict = load_cf("stage2/data/Douban/train.txt")
cf_test_data, test_user_dict = load_cf("stage2/data/Douban/test.txt")

In [70]:
cf_train_data[0].shape

(41830,)

In [71]:
cf_test_data[0].shape

(10840,)

In [72]:

n_users = max(max(cf_train_data[0]), max(cf_test_data[0])) + 1
n_items = max(max(cf_train_data[1]), max(cf_test_data[1])) + 1
n_cf_train = len(cf_train_data[0])
n_cf_test = len(cf_test_data[0])

In [73]:
from torch import nn

user_embed = nn.Embedding(n_users, 4)
item_embed = nn.Embedding(n_items, 4)

In [75]:
nn.init.xavier_uniform_(user_embed.weight)
nn.init.xavier_uniform_(item_embed.weight)

Parameter containing:
tensor([[-0.0627,  0.0620,  0.0109,  0.0126],
        [-0.0332, -0.0844, -0.0234,  0.0171],
        [ 0.0398, -0.0845, -0.0393,  0.0088],
        ...,
        [-0.0556, -0.0112, -0.0197, -0.0314],
        [ 0.0355, -0.0277,  0.0066,  0.0097],
        [-0.0639, -0.0773, -0.0302, -0.0873]], requires_grad=True)

In [80]:
user_embed.weight

Parameter containing:
tensor([[ 0.0560, -0.0751,  0.0737,  0.0429],
        [ 0.0546, -0.0680, -0.0129, -0.0414],
        [ 0.1108,  0.0930, -0.0094, -0.0956],
        ...,
        [ 0.0251,  0.0170, -0.0293,  0.0423],
        [-0.0503, -0.0619,  0.0430,  0.0938],
        [ 0.0858,  0.0089, -0.0449, -0.0977]], requires_grad=True)

In [81]:
import torch
a = [1,2,3]
a = torch.LongTensor(a)
b = user_embed(a)

In [87]:
b

tensor([[ 0.5605, -0.6981, -0.1326, -0.4253],
        [ 0.6382,  0.5355, -0.0544, -0.5504],
        [-0.5396,  0.4762,  0.5085,  0.4728]], grad_fn=<DivBackward0>)

In [88]:
import torch
import torch.nn as nn
import torch.nn.functional as F

b = F.normalize(b)
b

tensor([[ 0.5605, -0.6981, -0.1326, -0.4253],
        [ 0.6382,  0.5355, -0.0544, -0.5504],
        [-0.5396,  0.4762,  0.5085,  0.4728]], grad_fn=<DivBackward0>)

In [89]:
b = F.normalize(b,dim=1)
b

tensor([[ 0.5605, -0.6981, -0.1326, -0.4253],
        [ 0.6382,  0.5355, -0.0544, -0.5504],
        [-0.5396,  0.4762,  0.5085,  0.4728]], grad_fn=<DivBackward0>)

In [11]:
import torch

name = [1,2,3]
name = torch.LongTensor(name)
print(user_embed(name))

mul = [0,1,2]
mul = torch.LongTensor(mul)
print(user_embed(mul))

tensor([[ 0.1577, -0.2178, -1.2461,  1.1155],
        [-1.2773,  1.5063,  2.3168, -0.3868],
        [-1.0973, -0.2087, -0.2062,  0.2385]], grad_fn=<EmbeddingBackward>)
tensor([[ 2.3716, -0.5199, -0.0712,  0.9906],
        [ 0.1577, -0.2178, -1.2461,  1.1155],
        [-1.2773,  1.5063,  2.3168, -0.3868]], grad_fn=<EmbeddingBackward>)


In [12]:
pos_score = torch.sum(user_embed(name) * user_embed(mul), dim=0)
print(pos_score)

tensor([ 1.5742, -0.5292, -3.2760,  0.5813], grad_fn=<SumBackward1>)


In [13]:
print(name * mul)

tensor([0, 2, 6])


In [14]:
import pandas as pd
kg_data = pd.read_csv("stage2/data/Douban/kg_final.txt ", sep=' ', names=['h', 'r', 't'], engine='python')
kg_data = kg_data.drop_duplicates()

In [51]:
import pandas as pd
reverse_kg = kg_data[["t","r","h"]]
reverse_kg = reverse_kg.rename(columns = {"t":"h","h":"t"})

In [53]:
reverse_kg["r"] = reverse_kg["r"].apply(lambda x: x + 10)

In [54]:
reverse_kg

Unnamed: 0,h,r,t
0,606,10,899
1,1176,10,899
2,978,10,899
3,1440,10,899
4,597,10,899
...,...,...,...
42696,1512,74,448
42697,1101,28,448
42698,1178,21,448
42699,972,24,448


In [22]:
new = pd.concat([kg_data,reverse_kg],axis=0,ignore_index=True)

In [39]:
import collections
kg_dict = collections.defaultdict(list)
relation_dict = collections.defaultdict(list)

In [40]:
a = (1,2)
kg_dict["san"].append(a)

In [47]:
triples = kg_dict["san"]
triples[0]

(1, 2)

In [18]:
my_dict = dict()
my_dict["zhangsan"] = [1,2,3,4,5]
my_dict["lisi"] = [2,4,5,6]

In [23]:
my_dict.keys()

dict_keys(['zhangsan', 'lisi'])

In [38]:
np.random.randint(low=0, high=10, size=1)[0]

3

In [114]:
import torch
import torch.nn.functional as F

x = torch.tensor([[1, 2, 3, 4], [4, 3, 2, 1], [5, 6, 7, 8]])
# x = x.float()
x

tensor([[1, 2, 3, 4],
        [4, 3, 2, 1],
        [5, 6, 7, 8]])

In [115]:
x = x.squeeze(0)
x

tensor([[1, 2, 3, 4],
        [4, 3, 2, 1],
        [5, 6, 7, 8]])

In [117]:
data = pd.read_csv("stage1/second_selected_freebase_douban.csv")

In [119]:
data[0]

KeyError: 0